diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..90ec22b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.svn diff --git a/CMakeLists.txt b/CMakeLists.txt index 8944201..decdfef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,8 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckCSourceCompiles) CHECK_FUNCTION_EXISTS(sync_file_range HAVE_SYNC_FILE_RANGE) CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE) +CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN) +CHECK_FUNCTION_EXISTS(posix_fadvise HAVE_POSIX_FADVISE) CHECK_FUNCTION_EXISTS(fdatasync HAVE_FDATASYNC) CHECK_FUNCTION_EXISTS(tdestroy HAVE_TDESTROY) @@ -83,15 +85,33 @@ endif(NOT DBUG_TEST) SET(CMAKE_REQUIRED_FLAGS "-lm") CHECK_FUNCTION_EXISTS(powl HAVE_POWL) + +MACRO(CHECK_CONSTANT_EXISTS FLAG) CHECK_C_SOURCE_COMPILES("#define _GNU_SOURCE #include #include #include +#include +#include int main(int argc, char * argv[]) { - argc = O_DIRECT; + argc = ${FLAG}; } -" HAVE_O_DIRECT) +" HAVE_${FLAG}) +ENDMACRO(CHECK_CONSTANT_EXISTS) + +CHECK_CONSTANT_EXISTS(O_DIRECT) +CHECK_CONSTANT_EXISTS(O_DSYNC) +CHECK_CONSTANT_EXISTS(O_NOATIME) +CHECK_CONSTANT_EXISTS(PTHREAD_STACK_MIN) + +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") +SET(ON_LINUX "LINUX") +ENDIF(CMAKE_SYSTEM_NAME) +IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin") +SET(ON_DARWIN "DARWIN") +ENDIF(CMAKE_SYSTEM_NAME) +#other options are "Windows" and "Solaris" CHECK_C_SOURCE_COMPILES("#include int main(int argc, char * argv[]) { alloca(1); }" HAVE_ALLOCA_H) @@ -111,16 +131,6 @@ int main(int argc, char* argv[]) { } " HAVE_GCC_ATOMICS) -CHECK_C_SOURCE_COMPILES(" -#include -#include -int main(int argc, char* argv[]) { - pthread_attr_t a; - pthread_attr_setstacksize(&a, PTHREAD_STACK_MIN); - return 0; -} -" HAVE_PTHREAD_STACK_MIN) - MACRO(CREATE_CHECK NAME) ADD_EXECUTABLE(${NAME} ${NAME}.c) TARGET_LINK_LIBRARIES(${NAME} ${COMMON_LIBRARIES}) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..527ace5 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +LIBSRC = $(wildcard src/stasis/*.c) $(wildcard src/stasis/*/*.c) $(wildcard src/stasis/*/*/*.c) \ + $(wildcard src/stasis/*.cpp) $(wildcard src/stasis/*/*.cpp) $(wildcard src/stasis/*/*/*.cpp) +LIBNAME = stasis + +include config/Makefile.stasis \ No newline at end of file diff --git a/README b/README index 52211f3..f7b7d29 100644 --- a/README +++ b/README @@ -1,9 +1,5 @@ Stasis is a transactional storage library. -An increasing range of applications requires robust support for atomic, durable and concurrent transactions. Databases provide the default solution, but force applications to interact via SQL and to forfeit control over data layout and access mechanisms. In principle, a specialized database stack could be built for each application, but such approaches have proven to be impractical. We argue there is a gap between DBMSs and file systems that limits designers of data-oriented applications. - -Stasis is a storage framework that incorporates ideas from traditional write-ahead logging algorithms and file systems. It provides applications with flexible control over data structures, data layout, robustness and performance. Stasis enables the development of unforeseen variants on transactional storage by generalizing write-ahead logging algorithms. Instead of implementing support for each new storage system from scratch, I have extended Stasis to provide specialized storage mechanisms to a wide variety of applications. It now provides cleaner semantics than similar application-specific approaches would, with significantly less source code than would be required by multiple separate storage implementations. In addition to the conventional write-ahead logging algorithms that Stasis was designed for, it now provides support for large objects, and for log-structured indexes. A number of other extensions, such as distributed recovery algorithms and snapshot-based recovery are under development. - Please see the COPYING file for licensing information, and INSTALL for compilation instructions. @@ -11,22 +7,3 @@ More information, including papers, a tutorial and API documentation are available at: http://www.cs.berkeley.edu/~sears/stasis/ -http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.html -http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.pdf - -@phdthesis{Sears:EECS-2010-2, - Author = {Sears, Russell C}, - Title = {Stasis: Flexible Transactional Storage}, - School = {EECS Department, University of California, Berkeley}, - Year = {2010}, - Month = {Jan}, - URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.html}, - Number = {UCB/EECS-2010-2}, - Abstract = {An increasing range of applications requires robust support for atomic, durable and concurrent transactions. Databases provide the default solution, but force applications to interact via SQL and to forfeit control over data layout and access mechanisms. In principle, a specialized database stack could be built for each application, but such approaches have proven to be impractical. We argue there is a gap between DBMSs and file systems that limits designers of data-oriented applications. - -Stasis is a storage framework that incorporates ideas from traditional write-ahead logging algorithms and file systems. It provides applications with flexible control over data structures, data layout, robustness and performance. Stasis enables the development of unforeseen variants on transactional storage by generalizing write-ahead logging algorithms. Instead of implementing support for each new storage system from scratch, I have extended Stasis to provide specialized storage mechanisms to a wide variety of applications. It now provides cleaner semantics than similar application-specific approaches would, with significantly less source code than would be required by multiple separate storage implementations. In addition to the conventional write-ahead logging algorithms that Stasis was designed for, it now provides support for large objects, and for log-structured indexes. A number of other extensions, such as distributed recovery algorithms and snapshot-based recovery are under development. - -This dissertation describes the range of data models and program architectures that have been commonly used in the past, and argues that Stasis is sufficiently general to support most storage applications. It then turns to a description of Stasis' high-level application interfaces and APIs that are designed to allow applications to add their own transactional data structures to Stasis. The performance of a number of such extensions is evaluated, showing that Stasis performs favorably relative to existing systems. - -The dissertation then turns to a careful definition of Stasis' recovery algorithms, and provides a novel generalization of ARIES, the de facto standard approach to transactional storage. The generalization is particularly promising in the context of distributed systems. Finally, it presents Stasis' lower-level interfaces, providing systems developers and application designers with the ability to tailor high-level transactional primitives to new types of storage hardware and operating system primitives. To the greatest extent possible, the ideas presented within are composable, allowing Stasis' simple implementation to support an unusually wide range of storage architectures.} -} diff --git a/benchmarks/bufferManager.c b/benchmarks/bufferManager.c index 37bdd7c..eb5ce50 100644 --- a/benchmarks/bufferManager.c +++ b/benchmarks/bufferManager.c @@ -153,6 +153,8 @@ int main(int argc, char * argv[]) { target_ops = atoi(argv[i]); } else if(!strcmp(argv[i], "--raid1")) { stasis_handle_factory = stasis_handle_raid1_factory; + } else if(!strcmp(argv[i], "--raid0")) { + stasis_handle_factory = stasis_handle_raid0_factory; } else { fprintf(stderr, "unknown argument: %s\n", argv[i]); abort(); diff --git a/benchmarks/butterfly.c b/benchmarks/butterfly.c index 0331f51..64def6c 100644 --- a/benchmarks/butterfly.c +++ b/benchmarks/butterfly.c @@ -20,11 +20,19 @@ int main(int argc, char * argv[]) { int sector_size = atoi(argv[3]); int fd = -1; +#ifdef HAVE_O_DSYNC fd = open(filename, O_WRONLY|O_DSYNC); //|O_DIRECT); - +#else + fd = open(filename, O_WRONLY|O_SYNC); +#endif struct timeval start, stop; void * buf; +#ifdef HAVE_POSIX_MEMALIGN posix_memalign(&buf, sector_size, sector_size); +#else + buf = malloc(2 * sector_size); + ((intptr_t)buf) &= ~(sector_size-1); +#endif memset(buf, 0, sector_size); diff --git a/benchmarks/rawIOPS.c b/benchmarks/rawIOPS.c index 2767fd2..0a1936c 100644 --- a/benchmarks/rawIOPS.c +++ b/benchmarks/rawIOPS.c @@ -8,12 +8,15 @@ #include #include #include +#include #include #include #include #include +DECLARE_HISTOGRAM_64(iop_hist) + static const long MB = (1024 * 1024); typedef struct { @@ -25,19 +28,43 @@ typedef struct { double elapsed; } thread_arg; +uint64_t completed_ops; +uint64_t op_count; + +void * status_worker(void * ignored) { + uint64_t last_ops = 0; + int iter = 0; + while(1) { + struct timespec ts = stasis_double_to_timespec(1.0); + nanosleep(&ts,0); + printf("current ops/sec %lld\n", completed_ops - last_ops); + last_ops = completed_ops; + iter ++; + if((! (iter % 10)) && (op_count == 0)) { + stasis_histograms_auto_dump(); + stasis_histogram_64_clear(&iop_hist); + } + } +} + void * worker(void * argp) { thread_arg * arg = argp; void * buf = 0; - int err = posix_memalign(&buf, 512, arg->page_size); + int err; +#ifdef HAVE_POSIX_MEMALIGN + err = posix_memalign(&buf, 512, arg->page_size); if(err) { printf("Couldn't allocate memory with posix_memalign: %s\n", strerror(err)); fflush(stdout); abort(); } - +#else + buf = malloc(arg->page_size * 2); + buf = (void*)((intptr_t)buf & ~(arg->page_size-1)); +#endif struct timeval start, stop; - for(uint64_t i = 0; i < arg->opcount; i++) { + for(uint64_t i = 0; (!arg->opcount) || i < arg->opcount; i++) { gettimeofday(&start, 0); uint64_t offset = arg->start_off + stasis_util_random64(arg->end_off @@ -45,14 +72,19 @@ void * worker(void * argp) { offset &= ~(arg->page_size-1); DEBUG("pread(%d %x %d %lld)\n", arg->fd, (unsigned int)buf, (int)arg->page_size, (long long)offset); + stasis_histogram_tick(&iop_hist); err = pread(arg->fd, buf, arg->page_size, offset); + stasis_histogram_tock(&iop_hist); + __sync_fetch_and_add(&completed_ops, 1); if(err == -1) { perror("Could not read from file"); fflush(stderr); fflush(stdout); abort(); } gettimeofday(&stop, 0); arg->elapsed += stasis_timeval_to_double(stasis_subtract_timeval(stop, start)); } +#ifdef HAVE_POSIX_MEMALIGN free(buf); +#endif return 0; } @@ -66,22 +98,29 @@ int main(int argc, char * argv[]) { char * filename = argv[1]; int page_size = atoi(argv[2]); int num_threads = atoi(argv[3]); - uint64_t op_count = atoll(argv[4]); + op_count = atoll(argv[4]); uint64_t start_off = atoll(argv[5]); uint64_t end_off = atoll(argv[6]) * MB; - int fd = open(filename, O_RDONLY|O_DIRECT); + completed_ops = 0; +#ifdef HAVE_O_DIRECT + int fd = open(filename, O_RDONLY|O_DIRECT); +#else + printf("Warning: not using O_DIRECT; file system cache will be used.\n"); + int fd = open(filename, O_RDONLY); +#endif if(fd == -1) { perror("Couldn't open file"); abort(); } struct timeval start, stop; - + pthread_t status; pthread_t * threads = malloc(sizeof(threads[0]) * num_threads); thread_arg * arg = malloc(sizeof(arg[0]) * num_threads); gettimeofday(&start,0); + pthread_create(&status, 0, status_worker, 0); for(int i = 0; i < num_threads; i++) { arg[i].fd = fd; arg[i].page_size = page_size; @@ -109,6 +148,6 @@ int main(int argc, char * argv[]) { printf("%d threads %lld mb %lld ops / %f seconds = %f IOPS.\n", num_threads, (long long)(end_off / MB), (long long)op_count, wallclock_elapsed, ((double)op_count) / wallclock_elapsed); close(fd); - + stasis_histograms_auto_dump(); return 0; } diff --git a/benchmarks/seekMap.c b/benchmarks/seekMap.c index e07874e..ce9399a 100644 --- a/benchmarks/seekMap.c +++ b/benchmarks/seekMap.c @@ -39,8 +39,12 @@ int main(int argc, char * argv[]) { printf("Usage: %s filename steps iterations start_off length random_mode\n", argv[0]); abort(); } + #ifdef HAVE_POSIX_MEMALIGN posix_memalign(&buf, 512, 512); - + #else + buf = malloc(2 * 512); + buf = (void*)(((intptr_t)buf) & ~(512-1)); + #endif const char * filename = argv[1]; int fd = open(filename, O_RDONLY);//|O_DIRECT); if(fd == -1) { diff --git a/benchmarks/sequentialThroughput.c b/benchmarks/sequentialThroughput.c index f4ea01d..ba32f98 100644 --- a/benchmarks/sequentialThroughput.c +++ b/benchmarks/sequentialThroughput.c @@ -60,7 +60,12 @@ int main(int argc, char ** argv) { for(int i = 1; i < argc; i++) { if(!strcmp(argv[i], "--direct")) { direct = 1; +#ifdef HAVE_O_DIRECT stasis_buffer_manager_io_handle_flags |= O_DIRECT; +#else + printf("O_DIRECT not supported by this build."); + return -1; +#endif } else if(!strcmp(argv[i], "--log_safe_writes")) { stasis_log_type = LOG_TO_FILE; log_mode = 1; diff --git a/benchmarks/stride.c b/benchmarks/stride.c index 3c1f8a7..5dbd668 100644 --- a/benchmarks/stride.c +++ b/benchmarks/stride.c @@ -20,11 +20,19 @@ int main(int argc, char * argv[]) { int write_size = atoi(argv[4]); uint64_t stride = atoll(argv[5]); int fd = -1; +#ifdef HAVE_O_DSYNC fd = open(argv[1], O_WRONLY|O_DSYNC); //|O_DIRECT); - +#else + fd = open(argv[1], O_WRONLY|O_SYNC); //|O_DIRECT); +#endif struct timeval start, stop; void * buf; +#ifdef HAVE_POSIX_MEMALIGN posix_memalign(&buf, 512, write_size); +#else + buf = malloc(2 * write_size); + buf = (void*)(((intptr_t)buf) & ~(write_size-1)); +#endif memset(buf, 0, write_size); gettimeofday(&start, 0); diff --git a/benchmarks/turbine.c b/benchmarks/turbine.c index e72eee5..c4d3828 100644 --- a/benchmarks/turbine.c +++ b/benchmarks/turbine.c @@ -31,13 +31,22 @@ typedef struct worker { void * func(void * argp) { worker * arg = argp; void * buf; +#ifdef HAVE_POSIX_MEMALIGN posix_memalign(&buf, 512, arg->write_size); +#else + buf = malloc(2*arg->write_size); + buf = (void*)(((intptr_t)buf) & ~((intptr_t)arg->write_size-1)); +#endif memset(buf, 0, arg->write_size); // void * buf = calloc(arg->write_size, 1); pthread_mutex_lock(&arg->mutex); uint64_t offset = 0; if(many_handles) { - arg->fd = open(arg->filename, O_WRONLY|O_DSYNC); //|O_DIRECT); +#ifdef HAVE_O_DSYNC + arg->fd = open(arg->filename, O_WRONLY|O_DSYNC); +#else + arg->fd = open(arg->filename, O_WRONLY|O_SYNC); +#endif if(arg->fd == -1) { perror("Couldn't open file"); abort(); @@ -81,7 +90,11 @@ int main(int argc, char * argv[]) { uint64_t stride = atoll(argv[5]); int fd = -1; if(!many_handles) { +#ifdef HAVE_O_DSYNC fd = open(argv[1], O_WRONLY|O_DSYNC); //|O_DIRECT); +#else + fd = open(argv[1], O_WRONLY|O_SYNC); //|O_DIRECT); +#endif } for(int i = 0; i < NUM_WORKERS; i++) { diff --git a/config.h.cmake b/config.h.cmake index 2360349..9346af3 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -1,15 +1,18 @@ #define __USE_GNU #define _GNU_SOURCE #cmakedefine HAVE_POSIX_FALLOCATE +#cmakedefine HAVE_POSIX_MEMALIGN +#cmakedefine HAVE_POSIX_FADVISE #cmakedefine HAVE_FDATASYNC #cmakedefine HAVE_SYNC_FILE_RANGE +#cmakedefine HAVE_O_NOATIME #cmakedefine HAVE_O_DIRECT +#cmakedefine HAVE_O_DSYNC #cmakedefine HAVE_GCC_ATOMICS #cmakedefine HAVE_PTHREAD_STACK_MIN #cmakedefine HAVE_ALLOCA_H #cmakedefine HAVE_TDESTROY #cmakedefine HAVE_POWL #cmakedefine DBUG -//#ifndef HAVE_PTHREAD_STACK_MIN -//#define PTHREAD_STACK_MIN 32768 // wild guess. -//#endif +#cmakedefine ON_LINUX +#cmakedefine ON_MACOS \ No newline at end of file diff --git a/config/Makefile.stasis b/config/Makefile.stasis new file mode 100644 index 0000000..f14722a --- /dev/null +++ b/config/Makefile.stasis @@ -0,0 +1,78 @@ +ifeq ($(shell uname), Linux) +INC += -I . -I config/linux +LIB += -lrt +else +INC += -I . -I config/macos +CC = gcc-4.2 +CXX = g++-4.2 +endif + +#LIB += -lm -lpthread + +CPPFLAGS += $(INC) +CXXFLAGS += -O3 -g -Wall -pedantic -Wno-long-long -Wno-variadic-macros -fPIC $(CPPFLAGS) +CFLAGS += -std=gnu99 $(CXXFLAGS) $(CPPFLAGS) + + +ifeq ($(shell uname), Linux) + SO=bin/lib$(LIBNAME).so +endif +all: ${SO} \ + bin/lib$(LIBNAME).a \ + $(addprefix bin/, $(addsuffix .static, $(basename $(MAINSRC)))) \ + print_cxx_params print_cc_params + +bin/%.o : %.c + @echo CC $< + @mkdir -p $(dir $@) + @ $(CC) -c $(CFLAGS) -o $@ $< +print_cc_params: + @echo CC: $(CC) -c $(CFLAGS) -o %.o %.c + +bin/%.o : %.cpp + @echo C++ $< + @mkdir -p $(dir $@) + @ $(CXX) -c $(CXXFLAGS) -o $@ $< +print_cxx_params: + @echo C++: $(CXX) -c $(CXXFLAGS) -o %.o %.cpp + +bin/%.d: %.c + @mkdir -p $(dir $@) + @$(CC) -M $(CFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@ + +bin/%.d: %.cpp + @mkdir -p $(dir $@) + @$(CC) -M $(CXXFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@ + +bin/%.d: %.cc + @mkdir -p $(dir $@) + @$(CC) -M $(CXXFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@ + +bin/lib$(LIBNAME).so: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC))) + @echo LINK $@ + @g++ -shared -Wl,-soname,$@ $(LIB) $(CXXFLAGS) -o $@ $^ + +bin/lib$(LIBNAME).dylib: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC))) + @echo LINK $@ + @g++ -dynamiclib $(LIB) $(CXXFLAGS) -o $@ $^ + +bin/lib$(LIBNAME).a: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC))) + @echo AR $@ + @ar rcs $@ $^ + +bin/%.static: %.cpp bin/lib$(LIBNAME).a + @echo STATIC_EXE $@ + @mkdir -p $(dir $@) + @g++ $(LIB) $(CXXFLAGS) -o $@ $^ -static -pthread $(STATIC_LIBS) + +bin/%.static: %.cc bin/lib$(LIBNAME).a + @echo STATIC_EXE $@ + @mkdir -p $(dir $@) + @g++ $(LIB) $(CXXFLAGS) -o $@ $^ -static -pthread $(STATIC_LIBS) + +.PHONY: all clean print_cc_params print_cxx_params + +clean: + rm -rf bin/ + +-include $(patsubst %.cc,bin/%.d,$(patsubst %.cpp,bin/%.d,$(patsubst %.c,bin/%.d,$(LIBSRC)))) diff --git a/config/linux/config.h b/config/linux/config.h new file mode 100644 index 0000000..0e86d34 --- /dev/null +++ b/config/linux/config.h @@ -0,0 +1,19 @@ +#define __USE_GNU +#define _GNU_SOURCE +#define ON_LINUX +#define HAVE_POSIX_FALLOCATE +#define HAVE_POSIX_MEMALIGN +#define HAVE_POSIX_FADVISE +#define HAVE_FDATASYNC +#define HAVE_SYNC_FILE_RANGE +#define HAVE_O_NOATIME +#define HAVE_O_DIRECT +#define HAVE_O_DSYNC +#define HAVE_GCC_ATOMICS +#define HAVE_PTHREAD_STACK_MIN +#define HAVE_ALLOCA_H +#define HAVE_TDESTROY +#define HAVE_POWL +#define PBL_COMPAT 1 +#define STLSEARCH 1 +//#define DBUG diff --git a/config/macos/config.h b/config/macos/config.h new file mode 100644 index 0000000..ffd467f --- /dev/null +++ b/config/macos/config.h @@ -0,0 +1,17 @@ +#define __USE_GNU +#define _GNU_SOURCE +#define ON_MACOS +//#define HAVE_POSIX_FALLOCATE +//#define HAVE_FDATASYNC +//#define HAVE_SYNC_FILE_RANGE +//#define HAVE_O_NOATIME +//#define HAVE_POSIX_FADVISE +#define HAVE_O_DIRECT +#define HAVE_GCC_ATOMICS +#define HAVE_PTHREAD_STACK_MIN +#define HAVE_ALLOCA_H +#define HAVE_TDESTROY +#define HAVE_POWL +#define PBL_COMPAT 1 +#define STLSEARCH 1 +//#define DBUG diff --git a/doc/index.html b/doc/index.html index da7be20..e63aebf 100644 --- a/doc/index.html +++ b/doc/index.html @@ -104,7 +104,7 @@

- $Id$ + $Id: index.html 96 2004-10-27 03:49:02Z sears $
diff --git a/doc/paper/usenix.sty b/doc/paper/usenix.sty index 08b06a8..58083b0 100644 --- a/doc/paper/usenix.sty +++ b/doc/paper/usenix.sty @@ -5,7 +5,7 @@ % \usepackage{usenix-2e} % and put {\rm ....} around the author names. % -% $Id$ +% $Id: usenix.sty 83 2004-10-23 02:19:01Z sears $ % % The following definitions are modifications of standard article.sty % definitions, arranged to do a better job of matching the USENIX diff --git a/doc/paper3/usenix.sty b/doc/paper3/usenix.sty index fb5dc98..8a13d49 100644 --- a/doc/paper3/usenix.sty +++ b/doc/paper3/usenix.sty @@ -1,7 +1,7 @@ % usenix.sty - to be used with latex2e for USENIX. % To use this style file, look at the template usenix_template.tex % -% $Id$ +% $Id: usenix.sty 679 2006-09-04 22:51:40Z sears $ % % The following definitions are modifications of standard article.sty % definitions, arranged to do a better job of matching the USENIX diff --git a/lang/java/CMakeLists.txt b/lang/java/CMakeLists.txt index 0b18ebb..4c7159c 100644 --- a/lang/java/CMakeLists.txt +++ b/lang/java/CMakeLists.txt @@ -1,3 +1,4 @@ +IF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") IF(JNI_FOUND) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/org/stasis/Stasis.class @@ -14,4 +15,5 @@ IF(JNI_FOUND) #INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} /usr/lib/jvm/java-6-openjdk/include/ ${INCLUDE_DIRECTORIES}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${JAVA_INCLUDE_PATH} ${INCLUDE_DIRECTORIES}) ADD_LIBRARY(stasisjni ${CMAKE_CURRENT_SOURCE_DIR}/org_stasis_Stasis.c) -ENDIF(JNI_FOUND) \ No newline at end of file +ENDIF(JNI_FOUND) +ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") diff --git a/src/stasis/CMakeLists.txt b/src/stasis/CMakeLists.txt index 1c6010b..ed05623 100644 --- a/src/stasis/CMakeLists.txt +++ b/src/stasis/CMakeLists.txt @@ -1,4 +1,5 @@ ADD_LIBRARY(stasis util/crc32.c + util/bloomFilter.c util/redblack.c util/lhtable.c util/concurrentHash.c @@ -53,8 +54,15 @@ ADD_LIBRARY(stasis util/crc32.c operations/regions.c operations/bTree.c operations/blobs.c - io/rangeTracker.c io/memory.c io/file.c io/pfile.c io/raid1.c - io/non_blocking.c io/debug.c io/handle.c + io/rangeTracker.c + io/memory.c + io/file.c + io/pfile.c + io/raid1.c + io/raid0.c + io/non_blocking.c + io/debug.c + io/handle.c bufferManager/pageArray.c bufferManager/bufferHash.c replacementPolicy/lru.c @@ -63,6 +71,7 @@ ADD_LIBRARY(stasis util/crc32.c replacementPolicy/concurrentWrapper.c replacementPolicy/clock.c ) +IF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") ADD_LIBRARY(stasis_experimental experimental/consumer.c experimental/fifo.c @@ -75,5 +84,6 @@ ADD_LIBRARY(stasis_experimental experimental/group.c experimental/lsmTree.c ) +ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") INSTALL(TARGETS stasis LIBRARY DESTINATION lib) diff --git a/src/stasis/bufferManager.c b/src/stasis/bufferManager.c index f6a4a20..f985cc6 100644 --- a/src/stasis/bufferManager.c +++ b/src/stasis/bufferManager.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /******************************* - * $Id$ + * $Id: bufferManager.c 1543 2011-08-25 21:29:51Z sears.russell@gmail.com $ * * implementation of the page buffer * *************************************************/ diff --git a/src/stasis/bufferPool.c b/src/stasis/bufferPool.c index b82d727..79ab716 100644 --- a/src/stasis/bufferPool.c +++ b/src/stasis/bufferPool.c @@ -45,7 +45,7 @@ terms specified in this license. * * Implementation of in memory buffer pool * - * $Id$ + * $Id: bufferPool.c 1542 2011-08-23 18:25:26Z sears.russell@gmail.com $ * */ #include diff --git a/src/stasis/common.c b/src/stasis/common.c index 2b9a78b..7a6bfbd 100644 --- a/src/stasis/common.c +++ b/src/stasis/common.c @@ -1,5 +1,4 @@ -#define _GNU_SOURCE - +#include #include #include diff --git a/src/stasis/experimental/lockManagerImpl.c b/src/stasis/experimental/lockManagerImpl.c index 321973e..c31460f 100644 --- a/src/stasis/experimental/lockManagerImpl.c +++ b/src/stasis/experimental/lockManagerImpl.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/stasis/experimental/multiplexer.c b/src/stasis/experimental/multiplexer.c index e6740d1..efd75f3 100644 --- a/src/stasis/experimental/multiplexer.c +++ b/src/stasis/experimental/multiplexer.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/stasis/flags.c b/src/stasis/flags.c index 7294e2e..d1012bd 100644 --- a/src/stasis/flags.c +++ b/src/stasis/flags.c @@ -30,7 +30,11 @@ int stasis_buffer_manager_io_handle_flags = #ifdef STASIS_BUFFER_MANAGER_IO_HANDLE_FLAGS STASIS_BUFFER_MANAGER_IO_HANDLE_FLAGS; #else +#ifdef HAVE_O_NOATIME O_NOATIME; +#else + 0; +#endif #endif int stasis_buffer_manager_preallocate_mode = @@ -94,18 +98,27 @@ stasis_handle_t* (*stasis_handle_factory)() = #endif stasis_handle_t* (*stasis_handle_file_factory)(const char* filename, int open_mode, int creat_perms) = #ifdef STASIS_FILE_HANDLE_FACTORY - STASIS_FILE_HANDLE_FACTORY + STASIS_FILE_HANDLE_FACTORY; #else stasis_handle_open_pfile; #endif stasis_handle_t* (*stasis_non_blocking_handle_file_factory)(const char* filename, int open_mode, int creat_perms) = #ifdef STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY - STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY + STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY; #else stasis_handle_open_pfile; #endif +uint32_t stasis_handle_raid0_stripe_size = +#ifdef STASIS_HANDLE_RAID0_STRIPE_SIZE + STASIS_HANDLE_RAID0_STRIPE_SIZE; +#else + (256 * 1024); +#endif + +char ** stasis_handle_raid0_filenames = 0; + #ifdef STASIS_BUFFER_MANAGER_HINT_WRITES_ARE_SEQUENTIAL int stasis_buffer_manager_hint_writes_are_sequential = STASIS_BUFFER_MANAGER_HINT_WRITES_ARE_SEQUENTIAL; #else diff --git a/src/stasis/io/pfile.c b/src/stasis/io/pfile.c index 852281d..7d93439 100644 --- a/src/stasis/io/pfile.c +++ b/src/stasis/io/pfile.c @@ -86,8 +86,10 @@ static stasis_handle_t * pfile_dup(stasis_handle_t *h) { static void pfile_enable_sequential_optimizations(stasis_handle_t *h) { pfile_impl *impl = h->impl; impl->sequential = 1; +#ifdef HAVE_POSIX_FADVISE int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_SEQUENTIAL); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed"); +#endif } static lsn_t pfile_end_position(stasis_handle_t *h) { pfile_impl *impl = (pfile_impl*)h->impl; @@ -300,8 +302,10 @@ static int pfile_force(stasis_handle_t *h) { DEBUG("File was opened with O_SYNC. pfile_force() is a no-op\n"); } if(impl->sequential) { +#ifdef HAVE_POSIX_FADVISE int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed"); +#endif } TOCK(force_hist); return 0; @@ -334,10 +338,12 @@ static int pfile_async_force(stasis_handle_t *h) { #endif int ret = 0; #endif +#ifdef HAVE_POSIX_FADVISE if(impl->sequential) { int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed"); } +#endif TOCK(force_range_hist); return ret; } @@ -370,10 +376,12 @@ static int pfile_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) { #endif int ret = 0; #endif +#ifdef HAVE_POSIX_FADVISE if(impl->sequential) { int err = posix_fadvise(impl->fd, start, stop-start, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed"); } +#endif TOCK(force_range_hist); return ret; } diff --git a/src/stasis/io/raid0.c b/src/stasis/io/raid0.c new file mode 100644 index 0000000..bd98d3f --- /dev/null +++ b/src/stasis/io/raid0.c @@ -0,0 +1,222 @@ +/* + * raid0.c + * + * Created on: Feb 24, 2012 + * Author: sears + */ +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#ifdef RAID0_LATENCY_PROF +DECLARE_HISTOGRAM(read_hist) +DECLARE_HISTOGRAM(write_hist) +DECLARE_HISTOGRAM(force_hist) +DECLARE_HISTOGRAM(force_range_hist) +#define TICK(hist) stasis_histogram_tick(&hist); +#define TOCK(hist) stasis_histogram_tock(&hist); +#else +#define TICK(hist) +#define TOCK(hist) +#endif + +typedef struct raid0_impl { + stasis_handle_t ** h; + int handle_count; + int stripe_size; +} raid0_impl; + +static int raid0_num_copies(stasis_handle_t *h) { + raid0_impl * i = h->impl; + return i->h[0]->num_copies(i->h[0]); +} +static int raid0_num_copies_buffer(stasis_handle_t *h) { + raid0_impl * i = h->impl; + return i->h[0]->num_copies_buffer(i->h[0]); +} +static int raid0_close(stasis_handle_t *h) { + raid0_impl * r = h->impl; + int ret = 0; + for(int i = 0; i < r->handle_count; i++) { + int this_ret = r->h[i]->close(r->h[i]); + if(this_ret && !ret) ret = this_ret; + } + free(r->h); + free(r); + free(h); + return ret; +} +static stasis_handle_t* raid0_dup(stasis_handle_t *h) { + raid0_impl * r = h->impl; + stasis_handle_t ** h_dup = malloc(sizeof(h_dup[0]) * r->handle_count); + for(int i = 0; i < r->handle_count; i++) { + h_dup[i] = r->h[i]->dup(r->h[i]); + } + stasis_handle_t * ret = stasis_handle_open_raid0(r->handle_count, h_dup, r->stripe_size); + free(h_dup); + return ret; +} +static void raid0_enable_sequential_optimizations(stasis_handle_t *h) { + raid0_impl * r = h->impl; + for(int i = 0; i < r->handle_count; i++) { + r->h[i]->enable_sequential_optimizations(r->h[i]); + } +} +static lsn_t raid0_end_position(stasis_handle_t *h) { + raid0_impl *r = h->impl; + lsn_t max_end = 0; + for(int i = 0; i < r->handle_count; i++) { + lsn_t this_end = r->h[i]->end_position(r->h[i]) + (i * r->stripe_size); + if(this_end > max_end) max_end = this_end; + } + return max_end; +} +/** + * Figure out which stripe this operation belongs on. We don't support + * inter-stripe operations, so returning a single stripe suffices. + * + * @param off The first byte to be accessed. + * @param len If the access will span stripes, this method will call abort(). + */ +static int raid0_calc_stripe(raid0_impl * r, lsn_t off, lsn_t len) { + assert(len < r->stripe_size); + int start_stripe = (off % (r->handle_count * r->stripe_size)) / r->stripe_size; + int end_stripe = ((off+len-1) % (r->handle_count * r->stripe_size)) / r->stripe_size; + assert(start_stripe == end_stripe); + return start_stripe; +} +static lsn_t raid0_calc_block(raid0_impl * r, lsn_t off, lsn_t len) { + return off / (r->stripe_size * r->handle_count); +} +static lsn_t raid0_calc_off(raid0_impl * r, lsn_t off, lsn_t len) { + lsn_t block = raid0_calc_block(r, off, len); + return block * r->stripe_size + (off % r->stripe_size); +} +static int raid0_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) { + raid0_impl *r = h->impl; + int stripe = raid0_calc_stripe(r, off, len); + lsn_t stripe_off = raid0_calc_off(r, off, len); + return r->h[stripe]->read(r->h[stripe], stripe_off, buf, len); +} +static int raid0_write(stasis_handle_t *h, lsn_t off, const byte *dat, lsn_t len) { + raid0_impl *r = h->impl; + int stripe = raid0_calc_stripe(r, off, len); + lsn_t stripe_off = raid0_calc_off(r, off, len); + return r->h[stripe]->write(r->h[stripe], stripe_off, dat, len); +} +static stasis_write_buffer_t * raid0_write_buffer(stasis_handle_t *h, lsn_t off, lsn_t len) { + raid0_impl *r = h->impl; + int stripe = raid0_calc_stripe(r, off, len); + lsn_t stripe_off = raid0_calc_off(r, off, len); + return r->h[stripe]->write_buffer(r->h[stripe], stripe_off, len); +} +static int raid0_release_write_buffer(stasis_write_buffer_t *w) { + return w->h->release_write_buffer(w); +} +static stasis_read_buffer_t *raid0_read_buffer(stasis_handle_t *h, + lsn_t off, lsn_t len) { + raid0_impl *r = h->impl; + int stripe = raid0_calc_stripe(r, off, len); + lsn_t stripe_off = raid0_calc_off(r, off, len); + return r->h[stripe]->read_buffer(r->h[stripe], stripe_off, len); +} +static int raid0_release_read_buffer(stasis_read_buffer_t *r) { + return r->h->release_read_buffer(r); +} +static int raid0_force(stasis_handle_t *h) { + raid0_impl * r = h->impl; + int ret = 0; + for(int i = 0; i < r->handle_count; i++) { + int this_ret = r->h[i]->force(r->h[i]); + if(this_ret && !ret) ret = this_ret; + } + return ret; +} +/** + * TODO Implement raid0_force_range properly instead of forcing whole file. + */ +static int raid0_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) { + return raid0_force(h); +} +static int raid0_async_force(stasis_handle_t *h) { + raid0_impl * r = h->impl; + int ret = 0; + for(int i = 0; i < r->handle_count; i++) { + int this_ret = r->h[i]->async_force(r->h[i]); + if(this_ret && !ret) ret = this_ret; + } + return ret; +} +static int raid0_fallocate(stasis_handle_t *h, lsn_t off, lsn_t len) { + raid0_impl * r = h->impl; + int ret = 0; + lsn_t start_block = raid0_calc_block(r, off, 0); + lsn_t start_off = (start_block) * r->stripe_size; + lsn_t end_block = raid0_calc_block(r, off+len-1, 0); + lsn_t end_off = (end_block+1) * r->stripe_size; + + for(int i = 0; i < r->handle_count; i++) { + int this_ret = r->h[i]->fallocate(r->h[i], start_off, end_off-start_off); + if(this_ret && !ret) ret = this_ret; + } + return ret; +} +struct stasis_handle_t raid0_func = { + .num_copies = raid0_num_copies, + .num_copies_buffer = raid0_num_copies_buffer, + .close = raid0_close, + .dup = raid0_dup, + .enable_sequential_optimizations = raid0_enable_sequential_optimizations, + .end_position = raid0_end_position, + .write = raid0_write, + .write_buffer = raid0_write_buffer, + .release_write_buffer = raid0_release_write_buffer, + .read = raid0_read, + .read_buffer = raid0_read_buffer, + .release_read_buffer = raid0_release_read_buffer, + .force = raid0_force, + .async_force = raid0_async_force, + .force_range = raid0_force_range, + .fallocate = raid0_fallocate, + .error = 0 +}; + +stasis_handle_t * stasis_handle_open_raid0(int handle_count, stasis_handle_t** h, uint32_t stripe_size) { + stasis_handle_t * ret = malloc(sizeof(*ret)); + *ret = raid0_func; + raid0_impl * r = malloc(sizeof(*r)); + r->stripe_size = stripe_size; + r->handle_count = handle_count; + r->h = malloc(sizeof(r->h[0]) * handle_count); + for(int i = 0; i < handle_count; i++) { + r->h[i] = h[i]; + } + ret->impl = r; + return ret; +} + +stasis_handle_t * stasis_handle_raid0_factory() { + if(stasis_handle_raid0_filenames == NULL) { + stasis_handle_t * h[2]; + h[0] = stasis_handle_file_factory(stasis_store_file_1_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM); + h[1] = stasis_handle_file_factory(stasis_store_file_2_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM); + return stasis_handle_open_raid0(2, h, stasis_handle_raid0_stripe_size); + } else { + int count = 0; + while(stasis_handle_raid0_filenames[count]) count++; + stasis_handle_t * h[count]; + for(int i = 0; i < count; i++) { + h[i] = stasis_handle_file_factory(stasis_handle_raid0_filenames[i], O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM); + } + return stasis_handle_open_raid0(count, h, stasis_handle_raid0_stripe_size); + } +} diff --git a/src/stasis/io/raid1.c b/src/stasis/io/raid1.c index 1cb9f43..fbaaca9 100644 --- a/src/stasis/io/raid1.c +++ b/src/stasis/io/raid1.c @@ -11,7 +11,7 @@ #include -#ifdef RAID_LATENCY_PROF +#ifdef RAID1_LATENCY_PROF DECLARE_HISTOGRAM(read_hist) DECLARE_HISTOGRAM(write_hist) DECLARE_HISTOGRAM(force_hist) @@ -21,6 +21,7 @@ DECLARE_HISTOGRAM(force_range_hist) #else #define TICK(hist) #define TOCK(hist) +#endif typedef struct raid1_impl { stasis_handle_t * a; @@ -109,12 +110,24 @@ static int raid1_force(stasis_handle_t *h) { int retB = i->b->force(i->b); return retA ? retA : retB; } +static int raid1_async_force(stasis_handle_t *h) { + raid1_impl *i = h->impl; + int retA = i->a->async_force(i->a); + int retB = i->b->async_force(i->b); + return retA ? retA : retB; +} static int raid1_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) { raid1_impl *i = h->impl; int retA = i->a->force_range(i->a, start, stop); int retB = i->b->force_range(i->b, start, stop); return retA ? retA : retB; } +static int raid1_fallocate(stasis_handle_t *h, lsn_t off, lsn_t len) { + raid1_impl *i = h->impl; + int retA = i->a->fallocate(i->a, off, len); + int retB = i->b->fallocate(i->b, off, len); + return retA ? retA : retB; +} struct stasis_handle_t raid1_func = { .num_copies = raid1_num_copies, .num_copies_buffer = raid1_num_copies_buffer, @@ -129,7 +142,9 @@ struct stasis_handle_t raid1_func = { .read_buffer = raid1_read_buffer, .release_read_buffer = raid1_release_read_buffer, .force = raid1_force, + .async_force = raid1_async_force, .force_range = raid1_force_range, + .fallocate = raid1_fallocate, .error = 0 }; @@ -147,4 +162,3 @@ stasis_handle_t * stasis_handle_raid1_factory() { stasis_handle_t * b = stasis_handle_file_factory(stasis_store_file_2_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM); return stasis_handle_open_raid1(a, b); } -#endif diff --git a/src/stasis/logger/filePool.c b/src/stasis/logger/filePool.c index c737b6d..a215156 100644 --- a/src/stasis/logger/filePool.c +++ b/src/stasis/logger/filePool.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,6 @@ #include #include -#include #include #include @@ -60,6 +60,7 @@ typedef struct { pthread_t write_thread; pthread_t write_thread2; + pthread_t prealloc_thread; stasis_ringbuffer_t * ring; /** Need this because the min aggregate in the ringbuffer doesn't * want to malloc keys, but needs to maintain some sort of state @@ -68,6 +69,19 @@ typedef struct { pthread_key_t handle_key; pthread_mutex_t mut; + + /** + * If there are fewer than this many dead files in the pool, a background + * thread allocates some more. + */ + int dead_threshold; + /** + * Signal this mutex to wake up the prealloc thread. + */ + pthread_cond_t prealloc_log_cond; + + int shutdown; + } stasis_log_file_pool_state; enum file_type { @@ -111,7 +125,15 @@ enum file_type stasis_log_file_pool_file_type(const struct dirent* file, lsn_t * /** * No latching required. Does not touch shared state. */ +#ifdef ON_LINUX int stasis_log_file_pool_file_filter(const struct dirent* file) { +#else +#ifdef ON_MACOS +int stasis_log_file_pool_file_filter(struct dirent* file) { +#else +#error Not on linux or macos? +#endif +#endif lsn_t junk; if(UNKNOWN != stasis_log_file_pool_file_type(file, &junk)) { return 1; @@ -184,17 +206,20 @@ char * build_path(const char * dir, const char * file) { * not concurrently change. */ void stasis_log_file_pool_chunk_open(stasis_log_file_pool_state * fp, int chunk) { - char* full_name = malloc(strlen(fp->dirname) + 1 + strlen(fp->live_filenames[chunk]) + 1); - full_name[0] = 0; - strcat(full_name, fp->dirname); - strcat(full_name, "/"); - strcat(full_name, fp->live_filenames[chunk]); + char* full_name = build_path(fp->dirname, fp->live_filenames[chunk]); +// char* full_name = malloc(strlen(fp->dirname) + 1 + strlen(fp->live_filenames[chunk]) + 1); +// full_name[0] = 0; +// strcat(full_name, fp->dirname); +// strcat(full_name, "/"); +// strcat(full_name, fp->live_filenames[chunk]); fp->ro_fd[chunk] = open(full_name, fp->filemode, fp->fileperm); free(full_name); } /** * Does no latching. Relies on stability of fp->live_offsets and fp->live_count. + * + * @return chunk id or -1 if the offset is past the end of the live chunks. */ static int get_chunk_from_offset(stasis_log_t * log, lsn_t lsn) { stasis_log_file_pool_state * fp = log->impl; @@ -207,6 +232,82 @@ static int get_chunk_from_offset(stasis_log_t * log, lsn_t lsn) { } return chunk; } +static void stasis_log_file_pool_prealloc_file(stasis_log_file_pool_state * fp) { + if(fp->dead_count < fp->dead_threshold) { + char * tmpfile = "preallocating~"; + char * tmpfilepath = build_path(fp->dirname, tmpfile); + size_t bufsz = PAGE_SIZE; + pthread_mutex_unlock(&fp->mut); +#ifdef HAVE_O_DSYNC + int sync = O_DSYNC; // XXX cut and pasted from above... +#else + int sync = O_SYNC; +#endif + + + int fd = open(tmpfilepath, fp->filemode & (~sync), fp->fileperm); + +#ifdef HAVE_POSIX_FALLOCATE + printf("posix_fallocate()'ing empty log file...\n"); + posix_fallocate(fd, 0, fp->target_chunk_size + bufsz); +#endif + printf("Writing zeros to empty log file...\n"); + byte * buffer = calloc(bufsz, sizeof(byte)); + for(off_t i = 0; i <= fp->target_chunk_size; i += bufsz) { + int ret = pwrite(fd, buffer, bufsz, i); + if(ret != bufsz) { + perror("Couldn't write to empty log"); + abort(); + } + } + free(buffer); + fsync(fd); + close(fd); + printf("Log preallocation done.\n"); + + pthread_mutex_lock(&fp->mut); + char * filenametmp = stasis_log_file_pool_build_filename(fp, fp->dead_count); + char * filename = malloc(strlen(filenametmp) + 2); + strcpy(filename, filenametmp); + strcat(filename, "~"); + char * newfilepath = build_path(fp->dirname, filename); + fp->dead_filenames = realloc(fp->dead_filenames, sizeof(char**) * fp->dead_count + 1); + fp->dead_filenames[fp->dead_count] = filename; + fp->dead_count ++; + int err = rename(tmpfilepath, newfilepath); + printf("Created new chunk: %s.", newfilepath); + // TODO Another rename holding mutex sneaks in... + if(err) { + perror("could not rename file"); + assert(err == -1); + abort(); + } + free(tmpfilepath); + free(filenametmp); + // don't free filename; we installed it into the dead_filenames array! + free(newfilepath); + } +} +static void * stasis_log_file_pool_prealloc_worker(void * fpp) { + stasis_log_file_pool_state * fp = fpp; + + pthread_mutex_lock(&fp->mut); + + while(1) { + while(fp->dead_count >= fp->dead_threshold) { +// printf("Dead count = %d, threshold = %d, log preallocation sleeps.\n", fp->dead_count, fp->dead_threshold); + if(fp->shutdown) { break; } + pthread_cond_wait(&fp->prealloc_log_cond, &fp->mut); + } + if(fp->shutdown) { break; } + printf("Available log chunk count = %d (want %d), preallocating new chunk.\n", fp->dead_count, fp->dead_threshold); + stasis_log_file_pool_prealloc_file(fp); + } + + pthread_mutex_unlock(&fp->mut); + return 0; +} + /** * Does no latching. Modifies all mutable fields of fp. */ @@ -223,6 +324,11 @@ int stasis_log_file_pool_append_chunk(stasis_log_t * log, off_t new_offset) { if(fp->dead_count) { old_file = fp->dead_filenames[fp->dead_count-1]; fp->dead_count--; + + if(fp->dead_count < fp->dead_threshold) { + pthread_cond_signal(&fp->prealloc_log_cond); + } + char * old_path = build_path(fp->dirname, old_file); int err = rename(old_path, new_path); if(err) { @@ -464,8 +570,8 @@ int stasis_log_file_pool_truncate(struct stasis_log_t* log, lsn_t lsn) { // Rename should be fast, but we're placing a lot of faith in the filesystem. int err = rename(old, new); if(err) { - assert(err == -1); perror("could not rename file"); + assert(err == -1); abort(); } close(fp->ro_fd[i]); @@ -513,11 +619,14 @@ int stasis_log_file_pool_close(stasis_log_t * log) { stasis_log_file_pool_state * fp = log->impl; log->force_tail(log, 0); /// xxx use real constant for wal mode.. - stasis_ringbuffer_shutdown(fp->ring); + fp->shutdown = 1; + pthread_cond_signal(&fp->prealloc_log_cond); + pthread_join(fp->write_thread, 0); // pthread_join(fp->write_thread2, 0); + pthread_join(fp->prealloc_thread, 0); stasis_ringbuffer_free(fp->ring); @@ -600,7 +709,18 @@ void key_destr(void * key) { free(key); } /** * Does no latching. No shared state. */ +#ifdef ON_LINUX int filesort(const struct dirent ** a, const struct dirent ** b) { +#else +#ifdef ON_MACOS +int filesort(const void * ap, const void * bp) { + const struct dirent *const *const a = ap; + const struct dirent *const *const b = bp; +#else +#error Not on linux or macos? +#endif +#endif + int ret = strcmp((*a)->d_name, (*b)->d_name); DEBUG("%d = %s <=> %s\n", ret, (*a)->d_name, (*b)->d_name); return ret; @@ -676,11 +796,16 @@ stasis_log_t* stasis_log_file_pool_open(const char* dirname, int filemode, int f fp->live_count = 0; fp->dead_count = 0; - fp->target_chunk_size = 512 * 1024 * 1024; + fp->target_chunk_size = 128* 1024 * 1024; - fp->filemode = filemode | O_DSYNC; /// XXX should not hard-code O_SYNC. +#ifdef HAVE_O_DSYNC + int SYNC = O_DSYNC; +#else + int SYNC = O_SYNC; +#endif + fp->filemode = filemode | SYNC; /// XXX should not hard-code O_SYNC. fp->fileperm = fileperm; - fp->softcommit = !(filemode & O_DSYNC); + fp->softcommit = !(filemode & SYNC); off_t current_target = 0; for(int i = 0; i < n; i++) { @@ -751,6 +876,10 @@ stasis_log_t* stasis_log_file_pool_open(const char* dirname, int filemode, int f fp->ring = stasis_ringbuffer_init(26, next_lsn); // 64mb buffer pthread_key_create(&fp->handle_key, key_destr); + fp->dead_threshold = 1; + pthread_cond_init(&fp->prealloc_log_cond, 0); + fp->shutdown = 0; + pthread_create(&fp->prealloc_thread, 0, stasis_log_file_pool_prealloc_worker, fp); pthread_create(&fp->write_thread, 0, stasis_log_file_pool_writeback_worker, ret); // pthread_create(&fp->write_thread2, 0, stasis_log_file_pool_writeback_worker, ret); diff --git a/src/stasis/operations/alloc.c b/src/stasis/operations/alloc.c index 9b90a4b..094919a 100644 --- a/src/stasis/operations/alloc.c +++ b/src/stasis/operations/alloc.c @@ -69,7 +69,7 @@ @ingroup OPERATIONS - $Id$ + $Id: alloc.c 1569 2011-11-07 22:53:55Z sears.russell@gmail.com $ */ //}end diff --git a/src/stasis/operations/decrement.c b/src/stasis/operations/decrement.c index 79db7e2..4c245fd 100644 --- a/src/stasis/operations/decrement.c +++ b/src/stasis/operations/decrement.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: decrement.c 1310 2009-12-31 20:20:47Z sears.russell $ * * Decrements the given reference by one *********************************************/ diff --git a/src/stasis/operations/increment.c b/src/stasis/operations/increment.c index 90dae85..522b6eb 100644 --- a/src/stasis/operations/increment.c +++ b/src/stasis/operations/increment.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: increment.c 1310 2009-12-31 20:20:47Z sears.russell $ * * Increments the given reference by one **********************************************/ diff --git a/src/stasis/operations/naiveLinearHash.c b/src/stasis/operations/naiveLinearHash.c index 86663b7..7021166 100644 --- a/src/stasis/operations/naiveLinearHash.c +++ b/src/stasis/operations/naiveLinearHash.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/stasis/operations/noop.c b/src/stasis/operations/noop.c index d42e14b..6b87f68 100644 --- a/src/stasis/operations/noop.c +++ b/src/stasis/operations/noop.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: noop.c 1210 2009-07-16 03:05:32Z sears.russell $ * * sets the given reference to dat **********************************************/ diff --git a/src/stasis/operations/prepare.c b/src/stasis/operations/prepare.c index d016140..b2fac8f 100644 --- a/src/stasis/operations/prepare.c +++ b/src/stasis/operations/prepare.c @@ -41,7 +41,7 @@ terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: prepare.c 1215 2009-07-26 18:51:45Z sears.russell $ * * sets the given reference to dat **********************************************/ diff --git a/src/stasis/operations/set.c b/src/stasis/operations/set.c index 1dbc45d..9ad5aa0 100644 --- a/src/stasis/operations/set.c +++ b/src/stasis/operations/set.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: set.c 1526 2011-06-13 11:26:25Z sears.russell@gmail.com $ * * sets the given reference to dat **********************************************/ diff --git a/src/stasis/util/bloomFilter.c b/src/stasis/util/bloomFilter.c new file mode 100644 index 0000000..3b61dbb --- /dev/null +++ b/src/stasis/util/bloomFilter.c @@ -0,0 +1,159 @@ +/* + * bloomFilter.c + * + * Copyright 2010-2012 Yahoo! Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: sears + */ +#include +#include +#include +#include +/** + Variable names: + m: number of bloom filter bits + n: number of bloom filter entries + k: number of hash functions = ln(2) * (m/n) + c: m/n + f: false positive rate = (1/2)^k ~= 0.6185)^(m/n) ; + taking log_0.6185 of both sides: k log_0.6185(1/2) = m/n ; + applying change of base: k log(1/2) / log(6.128) = m / n + (but that's not useful; this is:) + + f ~= 0.6185 ^ (m/n) + log_0.6185(f) = m/n + log(f) / log(0.6185) = m / n + m = n log f / log 0.6185 + p: probability a given bit is 1 ~= e^(-kn/m) + */ + +static uint64_t stasis_bloom_filter_calc_num_buckets(uint64_t num_expected_items, + double false_positive_rate) { + // m = n log f / log 0.6185 + return ((uint64_t) ceil(((double)num_expected_items) * + log(false_positive_rate) / log(0.6185))); + // m = - n ln f / ln 2 ^ 2 = - n ln f / 0.4804 = - n log f / (0.4343 * 0.4804) = -n log f / 0.2086 +} +static int stasis_bloom_filter_calc_num_functions(uint64_t num_expected_items, + uint64_t num_buckets) { + // k = ln(2) * (m/n) + int ret = floor((log(2) / log(exp(1.0))) + * ((double) num_buckets) / (double) num_expected_items); + if(ret == 0) { + return 1; + } else { + return ret; + } +} +static double stasis_bloom_filter_current_false_positive_rate(uint64_t actual_number_of_items, + uint64_t num_buckets) { + // 0.6185^(m/n) + return pow(0.6185, ((double)num_buckets)/(double)actual_number_of_items); +} + +struct stasis_bloom_filter_t { + uint64_t (*func_a)(const char *, int); + uint64_t (*func_b)(const char *, int); + uint64_t num_expected_items; + double desired_false_positive_rate; + uint64_t num_buckets; + uint8_t * buckets; + uint64_t num_functions; + uint64_t*result_scratch_space; + uint64_t actual_number_of_items; +}; +stasis_bloom_filter_t * stasis_bloom_filter_create(uint64_t(*func_a)(const char*,int), + uint64_t(*func_b)(const char*,int), + uint64_t num_expected_items, + double false_positive_rate) { + stasis_bloom_filter_t * ret = malloc(sizeof(*ret)); + ret->func_a = func_a; + ret->func_b = func_b; + ret->num_expected_items = num_expected_items; + ret->desired_false_positive_rate = false_positive_rate; + ret->num_buckets = stasis_bloom_filter_calc_num_buckets(ret->num_expected_items, ret->desired_false_positive_rate); + ret->buckets = calloc((ret->num_buckets / 8) + ((ret->num_buckets % 8 == 0) ? 0 : 1), 1); + ret->num_functions = stasis_bloom_filter_calc_num_functions(ret->num_expected_items, ret->num_buckets); + ret->result_scratch_space = malloc(sizeof(*ret->result_scratch_space) * ret->num_functions); + ret->actual_number_of_items = 0; + return ret; +} +void stasis_bloom_filter_destroy(stasis_bloom_filter_t* bf) { + free(bf->buckets); + free(bf->result_scratch_space); + free(bf); +} +// TODO this uses %. It would be better if it used &, but that would potentially double the memory we use. #define a flag. +static void stasis_bloom_filter_calc_functions(stasis_bloom_filter_t * bf, uint64_t* results, const char * key, int keylen) { + uint64_t fa = bf->func_a(key, keylen); + uint64_t fb = bf->func_b(key, keylen); + + results[0] = (fa + fb) % bf->num_buckets; + for(int i = 1; i < bf->num_functions; i++) { + results[i] = (results[i-1] + fb ) % bf->num_buckets; + } +} + +static const uint8_t stasis_bloom_filter_bit_masks[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; +static void stasis_bloom_filter_set_bit(stasis_bloom_filter_t *bf, uint64_t bit) { + uint64_t array_offset = bit >> 3; + uint8_t bit_number = bit & 7; + + assert(bit < bf->num_buckets); + + bf->buckets[array_offset] |= stasis_bloom_filter_bit_masks[bit_number]; + +} +/** + @return 0 if the bit is not set, true otherwise. + */ +static uint8_t stasis_bloom_filter_get_bit(stasis_bloom_filter_t *bf, uint64_t bit) { + uint64_t array_offset = bit >> 3; + uint8_t bit_number = bit & 7; + + assert(bit < bf->num_buckets); + + return bf->buckets[array_offset] & stasis_bloom_filter_bit_masks[bit_number]; +} +void stasis_bloom_filter_insert(stasis_bloom_filter_t * bf, const char *key, int len) { + stasis_bloom_filter_calc_functions(bf, bf->result_scratch_space, key, len); + for(int i = 0; i < bf->num_functions; i++) { + stasis_bloom_filter_set_bit(bf, bf->result_scratch_space[i]); + } + bf->actual_number_of_items++; +} +int stasis_bloom_filter_lookup(stasis_bloom_filter_t * bf, const char * key, int len) { + int ret = 1; + uint64_t * scratch = malloc(sizeof(*scratch) * bf->num_functions); + stasis_bloom_filter_calc_functions(bf, scratch, key, len); + for(int i = 0; i < bf->num_functions; i++) { + ret = ret && stasis_bloom_filter_get_bit(bf, scratch[i]); + } + free(scratch); + return ret; +} + +void stasis_bloom_filter_print_stats(stasis_bloom_filter_t * bf) { + printf("Design capacity %lld design false positive %f\n" + "Current item count %lld current false positive %f\n" + "Number of buckets %lld (%f MB), number of hash functions %lld\n", + (long long)bf->num_expected_items, bf->desired_false_positive_rate, + (long long)bf->actual_number_of_items, + stasis_bloom_filter_current_false_positive_rate(bf->actual_number_of_items, + bf->num_buckets), + (long long)bf->num_buckets, + ((double)bf->num_buckets) / (8.0 * 1024.0 * 1024.0), + (long long)bf->num_functions); +} diff --git a/src/stasis/util/linkedlist.c b/src/stasis/util/linkedlist.c index 3f6e55d..e9c8d17 100644 --- a/src/stasis/util/linkedlist.c +++ b/src/stasis/util/linkedlist.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /****************************** - * $Id$ + * $Id: linkedlist.c 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $ * * simple linked list *****************************/ diff --git a/src/stasis/util/redblack.c b/src/stasis/util/redblack.c index e16c87c..1708dc0 100644 --- a/src/stasis/util/redblack.c +++ b/src/stasis/util/redblack.c @@ -1,4 +1,4 @@ -static char rcsid[]="$Id$"; +static char rcsid[]="$Id: redblack.c 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $"; /* Redblack balanced tree algorithm diff --git a/src/stasis/util/ringbuffer.c b/src/stasis/util/ringbuffer.c index 1e25b7e..11b883d 100644 --- a/src/stasis/util/ringbuffer.c +++ b/src/stasis/util/ringbuffer.c @@ -294,7 +294,8 @@ stasis_ringbuffer_t * stasis_ringbuffer_init(intptr_t base, lsn_t initial_offset if(err == -1) { perror("Couldn't ftruncate file"); } - ring->mem = mmap(0, size*2, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + // Note: MAP_ANON is deprecated, but MAP_ANONYMOUS is not supported on MacOS. + ring->mem = mmap(0, size*2, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0); if(ring->mem == MAP_FAILED) { perror("Couldn't mmap anonymous region"); abort(); } diff --git a/stasis/bufferManager.h b/stasis/bufferManager.h index c5d37d4..397552f 100644 --- a/stasis/bufferManager.h +++ b/stasis/bufferManager.h @@ -74,7 +74,7 @@ terms specified in this license. error is passed up to the application. @ingroup BUFFER_MANAGER - * $Id$ + * $Id: bufferManager.h 1560 2011-10-08 22:01:04Z sears.russell@gmail.com $ */ #ifndef __BUFFERMANAGER_H__ #define __BUFFERMANAGER_H__ diff --git a/stasis/bufferPool.h b/stasis/bufferPool.h index 8e5b129..5821313 100644 --- a/stasis/bufferPool.h +++ b/stasis/bufferPool.h @@ -47,7 +47,7 @@ terms specified in this license. * @file * * Functions to manage the in-memory pool of free page buffers. - * $Id$ + * $Id: bufferPool.h 1542 2011-08-23 18:25:26Z sears.russell@gmail.com $ */ #include diff --git a/stasis/common.h b/stasis/common.h index 7a550a4..e561f3c 100644 --- a/stasis/common.h +++ b/stasis/common.h @@ -53,13 +53,16 @@ terms specified in this license. * the right thing' and build, even if they do not \#include the * config.h file that all of the Stasis stuff uses. * - * $Id$ + * $Id: common.h 1576 2012-01-09 23:00:08Z sears.russell $ */ //#define NDEBUG 1 #ifndef __stasis_common_h #define __stasis_common_h +#ifndef _DARWIN_C_SOURCE +#define _DARWIN_C_SOURCE // For dirent on mac os +#endif #ifndef _XOPEN_SOURCE #define _XOPEN_SOURCE 600 #endif @@ -67,9 +70,11 @@ terms specified in this license. #define _BSD_SOURCE #endif +#include #include // uint32, et. al. (has to be before sys/types.h for mcpp atop some broken gcc headers) -#include #include // for size_t +#include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { diff --git a/stasis/constants.h b/stasis/constants.h index 3ca957f..ee83e26 100644 --- a/stasis/constants.h +++ b/stasis/constants.h @@ -50,7 +50,7 @@ terms specified in this license. * * @ingroup LLADD_CORE * - * $Id$ + * $Id: constants.h 1543 2011-08-25 21:29:51Z sears.russell@gmail.com $ */ #ifndef __CONSTANTS_H__ diff --git a/stasis/experimental/logMemory.h b/stasis/experimental/logMemory.h index 5ff35e1..9170fe3 100644 --- a/stasis/experimental/logMemory.h +++ b/stasis/experimental/logMemory.h @@ -14,7 +14,7 @@ @todo Move this all to some reasonably named interface. :) - $Id$ + $Id: logMemory.h 1523 2011-06-13 06:58:34Z sears.russell@gmail.com $ */ lladdFifo_t * logMemoryFifo(size_t size, lsn_t initialOffset); diff --git a/stasis/flags.h b/stasis/flags.h index 9d6d2ae..e9cb900 100644 --- a/stasis/flags.h +++ b/stasis/flags.h @@ -95,6 +95,13 @@ extern stasis_handle_t* (*stasis_handle_factory)(); Valid options: stasis_handle_open_file(), stasis_handle_open_pfile(), and stasis_handle_non_blocking_factory. */ extern stasis_handle_t* (*stasis_handle_file_factory)(const char* filename, int open_mode, int creat_perms); +/** + * The default stripe size for Stasis' user space raid0 implementation. + * + * This must be a multiple of PAGE_SIZE. + */ +extern uint32_t stasis_handle_raid0_stripe_size; +extern char ** stasis_handle_raid0_filenames; /** The factory that non_blocking handles will use for slow handles. (Only used if stasis_buffer_manager_io_handle_default_factory is set to diff --git a/stasis/io/handle.h b/stasis/io/handle.h index 1f8f85d..f048fe3 100644 --- a/stasis/io/handle.h +++ b/stasis/io/handle.h @@ -2,6 +2,8 @@ #define IO_HANDLE_H #include +BEGIN_C_DECLS + /** stasis_handle() is a macro that prepends a unique prefix to the its argument's function name. It's used to cope with namespace @@ -333,11 +335,23 @@ stasis_handle_t * stasis_handle(open_verifying)(stasis_handle_t * h); */ stasis_handle_t * stasis_handle(open_debug)(stasis_handle_t * h); stasis_handle_t * stasis_handle(open_raid1)(stasis_handle_t *a, stasis_handle_t *b); +/** + * Open a raid0 handle + * + * @param handle_count The number of underlying file handles. + * @param h An array of pointers to the handles. The caller manages the memory that backs the array. + * @param stripe_size The raid 0 stripe size. Must be a multiple of PAGE_SIZE. + */ +stasis_handle_t * stasis_handle(open_raid0)(int handle_count, stasis_handle_t **h, uint32_t stripe_size); stasis_handle_t * stasis_handle_raid1_factory(); +stasis_handle_t * stasis_handle_raid0_factory(); /** * Open a Stasis file handle using default arguments. */ stasis_handle_t * stasis_handle_default_factory(); + +END_C_DECLS + #endif diff --git a/stasis/logger/logEntry.h b/stasis/logger/logEntry.h index 3be9d3d..34d4f34 100644 --- a/stasis/logger/logEntry.h +++ b/stasis/logger/logEntry.h @@ -52,7 +52,7 @@ terms specified in this license. @ingroup LLADD_CORE - $Id$ + $Id: logEntry.h 1332 2010-01-19 02:14:09Z sears.russell $ */ #include diff --git a/stasis/operations.h b/stasis/operations.h index 769f10d..cdcd7d2 100644 --- a/stasis/operations.h +++ b/stasis/operations.h @@ -47,7 +47,7 @@ terms specified in this license. * * @ingroup OPERATIONS * @todo The functions in operations.h don't belong in the API, but it defines some constants and typedefs that should be there. - * $Id$ + * $Id: operations.h 1526 2011-06-13 11:26:25Z sears.russell@gmail.com $ */ /** @defgroup COLLECTIONS Collections diff --git a/stasis/operations/alloc.h b/stasis/operations/alloc.h index 50768f2..a3ff139 100644 --- a/stasis/operations/alloc.h +++ b/stasis/operations/alloc.h @@ -5,7 +5,7 @@ @ingroup OPERATIONS - $Id$ + $Id: alloc.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $ */ diff --git a/stasis/operations/arrayList.h b/stasis/operations/arrayList.h index 715c1a6..e992dec 100644 --- a/stasis/operations/arrayList.h +++ b/stasis/operations/arrayList.h @@ -67,7 +67,7 @@ terms specified in this license. @ingroup COLLECTIONS - $Id$ + $Id: arrayList.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $ */ diff --git a/stasis/operations/decrement.h b/stasis/operations/decrement.h index 2f720cd..e7001c1 100644 --- a/stasis/operations/decrement.h +++ b/stasis/operations/decrement.h @@ -49,7 +49,7 @@ terms specified in this license. * * @see increment.h * - * $Id$ + * $Id: decrement.h 1240 2009-08-22 00:01:02Z sears.russell $ */ #ifndef __DECREMENT_H__ diff --git a/stasis/operations/increment.h b/stasis/operations/increment.h index 6307106..04446d4 100644 --- a/stasis/operations/increment.h +++ b/stasis/operations/increment.h @@ -54,7 +54,7 @@ terms specified in this license. * * @ingroup OPERATIONS * - * $Id$ + * $Id: increment.h 1240 2009-08-22 00:01:02Z sears.russell $ */ diff --git a/stasis/operations/lladdhash.h b/stasis/operations/lladdhash.h index 401b91f..4ff77f9 100644 --- a/stasis/operations/lladdhash.h +++ b/stasis/operations/lladdhash.h @@ -4,7 +4,7 @@ * (actually based on jbhash.c, which was based on pblhash) * * - * $Id$ + * $Id: lladdhash.h 147 2005-01-20 21:19:47Z sears $ */ /** @@ -27,7 +27,7 @@ @ingroup OPERATIONS - $Id$ + $Id: lladdhash.h 147 2005-01-20 21:19:47Z sears $ */ diff --git a/stasis/operations/naiveLinearHash.h b/stasis/operations/naiveLinearHash.h index 61b4aaa..0cd27c7 100644 --- a/stasis/operations/naiveLinearHash.h +++ b/stasis/operations/naiveLinearHash.h @@ -7,7 +7,7 @@ @ingroup OPERATIONS - $Id$ + $Id: naiveLinearHash.h 1090 2008-11-03 21:42:42Z sears.russell $ */ #ifndef __NAIVE_LINEAR_HASH_H diff --git a/stasis/operations/noop.h b/stasis/operations/noop.h index 07ad897..bfda90d 100644 --- a/stasis/operations/noop.h +++ b/stasis/operations/noop.h @@ -47,7 +47,7 @@ terms specified in this license. * * @ingroup OPERATIONS * - * $Id$ + * $Id: noop.h 1157 2009-03-31 05:02:54Z sears.russell $ * **********************************************/ diff --git a/stasis/operations/pageOperations.h b/stasis/operations/pageOperations.h index 4ccf283..b9b985d 100644 --- a/stasis/operations/pageOperations.h +++ b/stasis/operations/pageOperations.h @@ -53,7 +53,7 @@ terms specified in this license. * * @see page.h * - * $Id$ + * $Id: pageOperations.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $ */ #ifndef __PAGE_OPERATIONS_H__ #define __PAGE_OPERATIONS_H__ diff --git a/stasis/operations/prepare.h b/stasis/operations/prepare.h index 2d6ac6d..bb8f987 100644 --- a/stasis/operations/prepare.h +++ b/stasis/operations/prepare.h @@ -67,7 +67,7 @@ terms specified in this license. * * @ingroup OPERATIONS * - * $Id$ + * $Id: prepare.h 1157 2009-03-31 05:02:54Z sears.russell $ * * */ diff --git a/stasis/operations/set.h b/stasis/operations/set.h index e87595c..5eb4395 100644 --- a/stasis/operations/set.h +++ b/stasis/operations/set.h @@ -47,7 +47,7 @@ terms specified in this license. * * @ingroup OPERATIONS * - * $Id$ + * $Id: set.h 1519 2011-06-12 09:20:58Z sears.russell@gmail.com $ * **********************************************/ diff --git a/stasis/page/slotted.h b/stasis/page/slotted.h index d55fdce..ca497d2 100644 --- a/stasis/page/slotted.h +++ b/stasis/page/slotted.h @@ -52,7 +52,7 @@ Slotted page layout: START - $Id$ + $Id: slotted.h 1571 2011-11-09 21:37:38Z sears.russell@gmail.com $ @todo slotted.c Should know that specific record types (like blobs) exist, (but should not hardcode information about these types) This diff --git a/stasis/transactional.h b/stasis/transactional.h index 2020d53..396b7b4 100644 --- a/stasis/transactional.h +++ b/stasis/transactional.h @@ -566,7 +566,7 @@ terms specified in this license. * @todo error handling * * @ingroup LLADD_CORE - * $Id$ + * $Id: transactional.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $ */ #ifndef __TRANSACTIONAL_H__ diff --git a/stasis/truncation.h b/stasis/truncation.h index 02cfa54..40e7ac9 100644 --- a/stasis/truncation.h +++ b/stasis/truncation.h @@ -49,7 +49,7 @@ terms specified in this license. * (instead of by absolute logfile size) * @todo avoid copying the non-truncated tail of the log each time truncation occurs. * - * $Id$ + * $Id: truncation.h 1263 2009-10-14 21:22:50Z sears.russell $ * */ #ifndef STASIS_TRUNCATION_H diff --git a/stasis/util/bloomFilter.h b/stasis/util/bloomFilter.h new file mode 100644 index 0000000..46ade64 --- /dev/null +++ b/stasis/util/bloomFilter.h @@ -0,0 +1,50 @@ +/* + * bloomFilter.h + * + * Copyright 2010-2012 Yahoo! Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: sears + */ +#ifndef STASIS_BLOOM_FILTER_H +#define STASIS_BLOOM_FILTER_H + +#include + +BEGIN_C_DECLS + +typedef struct stasis_bloom_filter_t stasis_bloom_filter_t; + +/** + @return 0 if there is not enough memory, or some other error occurred; a + pointer to the new bloom filter otherwise. + */ +stasis_bloom_filter_t * stasis_bloom_filter_create(uint64_t(*hash_func_a)(const char*,int), + uint64_t(*hash_func_b)(const char*,int), + uint64_t num_expected_items, + double false_positive_rate); + +void stasis_bloom_filter_destroy(stasis_bloom_filter_t*); + +void stasis_bloom_filter_insert(stasis_bloom_filter_t * bf, const char* key, int len); +/** + @return 1 if the value might be in the bloom filter, 0 otherwise + */ +int stasis_bloom_filter_lookup(stasis_bloom_filter_t * bf, const char* key, int len); + +void stasis_bloom_filter_print_stats(stasis_bloom_filter_t * bf); + +END_C_DECLS + +#endif diff --git a/stasis/util/concurrentBTree.h b/stasis/util/concurrentBTree.h new file mode 100644 index 0000000..d023edc --- /dev/null +++ b/stasis/util/concurrentBTree.h @@ -0,0 +1,60 @@ +#include + +#define PAGE void* + +typedef uint64_t metadata_t; + +// TODO: These need to be packed! +typedef struct stasis_btree_index_page_header { + metadata_t metadata; + PAGE parent; + pthread_rwlock_t sx_latch; +} stasis_btree_index_page; + +typedef struct stasis_btree_data_page_header { + metadata_t metadata; + PAGE parent; + PAGE rightSibling; + PAGE leftSibling; + pthread_rwlock_t latch; +}; + +static inline byte metadata_is_leaf(metadata_t m) { + return m & 0x1; +} +static inline metadata_t metadata_set_leaf(metadata_t m) { + return m | 0x1; +} +static inline metadata_t metadata_clear_leaf(metadata_t m) { + return m & ~0x1; +} + +static inline byte metadata_is_balanced(metadata_t m) { + return m & 0x2; +} +static inline byte metadata_set_balanced(metadata_t m) { + return m | 0x2; +} +static inline byte metadata_clear_balanced(metadata_t m) { + return m & ~0x2; +} +typedef enum { + TEMP = 0, + RED = 1, + GREEN = 2, + BLUE = 3 +} color_t; + +static inline color_t leaf_metadata_get_color(metadata_t m) { + return (m & (0x4 | 0x8)) >> 2; +} +static inline metadata_t leaf_metadata_set_color(metadata_t m, color_t c) { + return (m & ~(0x4 | 0x8)) | (c << 2); +} + +static inline int index_metadata_get_level(metadata_t m) { + return (m & (0x4 | 0x8 | 0x10)) >> 2; +} +static inline metadata_t index_metadata_set_level(metadata_t m, int c) { + return (m & ~(0x4 | 0x8 | 0x10)) | (c << 2); +} diff --git a/stasis/util/concurrentSkipList.h b/stasis/util/concurrentSkipList.h new file mode 100644 index 0000000..21a2cec --- /dev/null +++ b/stasis/util/concurrentSkipList.h @@ -0,0 +1,393 @@ +/* + * concurrentSkipList.h + * + * Created on: Feb 8, 2012 + * Author: sears + */ +#ifndef CONCURRENTSKIPLIST_H_ +#define CONCURRENTSKIPLIST_H_ + +#include +#include +#include + +BEGIN_C_DECLS + +//#define stasis_util_skiplist_assert(x) assert(x) +#define stasis_util_skiplist_assert(x) + +#define STASIS_SKIPLIST_HP_COUNT 3 + +#include + +typedef struct stasis_skiplist_node_t { + hazard_ptr key; + pthread_mutex_t level_mut; + char level; + int16_t refcount; +} stasis_skiplist_node_t; +typedef struct { + hazard_ptr header; + int levelCap; + int levelHint; + pthread_mutex_t levelHint_mut; + pthread_key_t k; + hazard_t * h; + hazard_t * ret_hazard; + int (*cmp)(const void *a, const void *b); + int (*finalize)(void *node, void *nul); +} stasis_skiplist_t; +static inline stasis_skiplist_node_t** stasis_util_skiplist_get_forward_raw( + stasis_skiplist_node_t * x, int n) { + return (stasis_skiplist_node_t**)(((intptr_t)(x + 1)) + +(n-1)*(sizeof(stasis_skiplist_node_t*)+sizeof(pthread_mutex_t))); +} +static inline hazard_ptr* stasis_util_skiplist_get_forward( + stasis_skiplist_node_t * x, int n){ + return (hazard_ptr*)stasis_util_skiplist_get_forward_raw(x,n); +} +static inline pthread_mutex_t * stasis_util_skiplist_get_forward_mutex( + stasis_skiplist_node_t * x, int n) { + return (pthread_mutex_t*)(stasis_util_skiplist_get_forward(x,n)+1); +} +int stasis_util_skiplist_node_finalize(void * pp, void * conf) { + stasis_skiplist_node_t * p = pp; + stasis_skiplist_t * list = conf; + if(p->refcount == 0) { + void * oldKey = (void*)p->key; // do this early to find races. + for(int i = 1; i <= p->level; i++) { + stasis_skiplist_node_t * n = *stasis_util_skiplist_get_forward_raw(p, i); + int oldval = __sync_sub_and_fetch(&n->refcount, 1); + (void)oldval; + stasis_util_skiplist_assert(oldval >= 0); + } + hazard_free(list->ret_hazard, oldKey); + pthread_mutex_destroy(&p->level_mut); + stasis_util_skiplist_assert(oldKey == (void*)p->key); + stasis_util_skiplist_assert(p->refcount == 0); + free(p); + return 1; + } else { + return 0; + } +} +int stasis_util_skiplist_default_key_finalize(void * p, void * ignored) { + free(p); + return 1; +} + + +static inline int stasis_util_skiplist_random_level(pthread_key_t k) { + kiss_table_t * kiss = pthread_getspecific(k); + if(kiss == 0) { + kiss = malloc(sizeof(*kiss)); + stasis_util_random_kiss_settable(kiss, + random(), random(), random(), random(), random(), random()); + pthread_setspecific(k, kiss); + } + // MWC is weaker but faster than KISS. The main drawback is that it has a + // period of 2^60. I can't imagine that mattering for our purposes. + + // __builtin_ctz counts trailing zeros, so, this function hardcodes p = 0.5. + // MWC returns a 32-bit int; above 2^32 elements we start to violate the + // O(log n) bounds. + return 1+__builtin_ctz(stasis_util_random_kiss_MWC(kiss)); +} + + +static inline hazard_ptr stasis_util_skiplist_make_node(int level, void * key) { + stasis_skiplist_node_t * x + = malloc(sizeof(*x) + + (level) * (sizeof(hazard_ptr) + sizeof(pthread_mutex_t))); + x->key = (hazard_ptr)key; + x->level = level; + x->refcount = 0; + pthread_mutex_init(&x->level_mut,0); + for(int i = 1; i <= level; i++) { + pthread_mutex_init(stasis_util_skiplist_get_forward_mutex(x, i), 0); + *stasis_util_skiplist_get_forward(x, i) = 0; + } + return (hazard_ptr)x; +} +static inline void stasis_util_skiplist_cleanup_tls(void * p) { + free(p); +} +static inline int stasis_util_skiplist_cmp_helper( + stasis_skiplist_t* list, stasis_skiplist_node_t *a, void * bkey) { + if(a == NULL) { return 1; } + void *akey = hazard_ref(list->ret_hazard, 1, &(a->key)); + int ret; + if(akey == NULL) { ret = (bkey == NULL ? 0 : -1); } + else if(bkey == NULL) { ret = 1; } + else {ret = list->cmp(akey, bkey); } + hazard_release(list->ret_hazard, 1); + return ret; +} +static inline int stasis_util_skiplist_cmp_helper2( + stasis_skiplist_t* list, stasis_skiplist_node_t *a, stasis_skiplist_node_t * b) { + if(b == NULL) { return a == NULL ? 0 : -1; } + void *bkey = hazard_ref(list->ret_hazard, 2, &(b->key)); + int ret = stasis_util_skiplist_cmp_helper(list, a, bkey); + hazard_release(list->ret_hazard, 2); + return ret; +} +static inline stasis_skiplist_t * stasis_util_skiplist_init( + int (*cmp)(const void*, const void*), + int (*finalize)(void *, void * nul)) { + stasis_skiplist_t * list = malloc(sizeof(*list)); + list->levelCap = 32; + list->h = hazard_init(STASIS_SKIPLIST_HP_COUNT+list->levelCap, + STASIS_SKIPLIST_HP_COUNT, 250, stasis_util_skiplist_node_finalize, list); + list->finalize + = finalize ? finalize : stasis_util_skiplist_default_key_finalize; + list->ret_hazard = hazard_init(3, 3, 250, list->finalize, NULL); + list->levelHint = 1; + pthread_mutex_init(&list->levelHint_mut, 0); + list->header = stasis_util_skiplist_make_node(list->levelCap, NULL); + pthread_key_create(&(list->k), stasis_util_skiplist_cleanup_tls); + list->cmp = cmp; + return list; +} +static inline void stasis_util_skiplist_deinit(stasis_skiplist_t * list) { + hazard_deinit(list->h); + hazard_deinit(list->ret_hazard); + pthread_mutex_destroy(&list->levelHint_mut); + free((void*)list->header); + kiss_table_t * kiss = pthread_getspecific(list->k); + if(kiss) { + stasis_util_skiplist_cleanup_tls(kiss); + pthread_setspecific(list->k, 0); + } + pthread_key_delete(list->k); + free(list); +} + +static inline void * stasis_util_skiplist_search(stasis_skiplist_t * list, void * searchKey) { + // the = 0 here are to silence GCC -O3 warnings. + stasis_skiplist_node_t *x, *y = 0; + int cmp = 0; + x = hazard_set(list->h,0,(void*)list->header); + for(int i = list->levelHint; i > 0; i--) { + y = hazard_ref(list->h,1,stasis_util_skiplist_get_forward(x, i)); + while((cmp = stasis_util_skiplist_cmp_helper(list, y, searchKey)) < 0) { + x = hazard_set(list->h,0,(void*)y); + y = hazard_ref(list->h,1,stasis_util_skiplist_get_forward(x, i)); + } + } + void * ret; + if(cmp == 0) { + ret = hazard_ref(list->ret_hazard, 0, &(y->key)); + } else { + ret = 0; + hazard_release(list->ret_hazard, 0); + } + hazard_release(list->h,0); + hazard_release(list->h,1); + return ret; +} +static inline stasis_skiplist_node_t * stasis_util_skiplist_get_lock( + stasis_skiplist_t * list, stasis_skiplist_node_t * x, void * searchKey, int i) { + stasis_skiplist_node_t * z + = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i)); + while(stasis_util_skiplist_cmp_helper(list, z, searchKey) < 0) { + x = hazard_set(list->h, 0, (void*)z); + z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i)); + } + pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(x, i)); + z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i)); + while(stasis_util_skiplist_cmp_helper(list, z, searchKey) < 0) { + // Should lock of z be here? + pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i)); + x = hazard_set(list->h, 0, (void*)z); + // Note: lock of z was here (and it was called x) + pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(x, i)); + z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i)); + } + stasis_util_skiplist_assert(stasis_util_skiplist_cmp_helper2(list, x, (stasis_skiplist_node_t*)*stasis_util_skiplist_get_forward(x, i)) < 0); + hazard_release(list->h, 2); + return x; +} +/** + * Insert a value into the skiplist. Any existing value will be replaced. + * @return the old value or null if there was no such value. + */ +static inline void * stasis_util_skiplist_insert(stasis_skiplist_t * list, void * searchKey) { + stasis_skiplist_node_t * update[list->levelCap+1]; + stasis_skiplist_node_t *x, *y; +IN: + x = hazard_set(list->h, 0, (void*)list->header); + int L = list->levelHint; + // for i = L downto 1 + int i; + for(i = L+1; i > 1;) { + i--; + y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i)); + while(stasis_util_skiplist_cmp_helper(list, y, searchKey) < 0) { + x = hazard_set(list->h, 0, (void*)y); + y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i)); + } + update[i] = hazard_set(list->h, STASIS_SKIPLIST_HP_COUNT+(L-i), x); + } + // update[L..1] is set. + // h [HP_COUNT+[0..L-1] is set. + // Note get_lock grabs the hazard pointer for x. + x = stasis_util_skiplist_get_lock(list, x, searchKey, 1); + y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, 1)); + if(stasis_util_skiplist_cmp_helper(list, y, searchKey) == 0) { + pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, 1)); + pthread_mutex_lock(&y->level_mut); + + x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, 1)); + int isGarbage = stasis_util_skiplist_cmp_helper(list, x, searchKey) < 0; + if(!isGarbage) { + void * oldKey; + do { + oldKey = hazard_ref(list->ret_hazard, 0, &(y->key)); + } while(!__sync_bool_compare_and_swap(&(y->key), oldKey, searchKey)); + pthread_mutex_unlock(&y->level_mut); + + + hazard_release(list->h, 0); + hazard_release(list->h, 1); + for(int i = L; i > 0; i--) { + hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT); + // h [HP_COUNT+[L-1..0] is cleared + } + hazard_free(list->ret_hazard, oldKey); + return oldKey; + } else { + pthread_mutex_unlock(&y->level_mut); +// printf("insert landed on garbage node. retrying.\n"); + goto IN; + } + } + hazard_ptr newnode = stasis_util_skiplist_make_node(stasis_util_skiplist_random_level(list->k), searchKey); + y = hazard_set(list->h, 1, (void*)newnode); + pthread_mutex_lock(&y->level_mut); + for(int i = L+1; i <= y->level; i++) { + update[i] = (void*)list->header; + } + // update[L+1..y->level] is set + for(int i = 1; i <= y->level; i++) { + if(i != 1) { + x = stasis_util_skiplist_get_lock(list, update[i], searchKey, i); + } + *stasis_util_skiplist_get_forward(y, i) = *stasis_util_skiplist_get_forward(x, i); + *stasis_util_skiplist_get_forward(x, i) = (hazard_ptr)y; + pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i)); + } + pthread_mutex_unlock(&y->level_mut); + + int L2 = list->levelHint; + if(L2 < list->levelCap && *stasis_util_skiplist_get_forward((void*)list->header, L2+1) != 0) { + if(pthread_mutex_trylock(&list->levelHint_mut) == 0) { + while(list->levelHint < list->levelCap && + *stasis_util_skiplist_get_forward((void*)list->header, list->levelHint+1) != 0) { + list->levelHint = list->levelHint+1; // XXX atomics? + } + pthread_mutex_unlock(&list->levelHint_mut); + } + } + hazard_release(list->h, 0); + hazard_release(list->h, 1); + for(int i = L; i > 0; i--) { + // h [HP_COUNT+[L-1..0] is cleared + hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT); + } + return NULL; +} +/** + * Delete a value from the list, returning it if it existed. + * @return The old value, or null. + */ +static inline void * stasis_util_skiplist_delete(stasis_skiplist_t * list, void * searchKey) { + stasis_skiplist_node_t * update[list->levelCap+1]; + stasis_skiplist_node_t *x, *y; + x = hazard_set(list->h, 0, (void*)list->header); + int L = list->levelHint; + // for i = L downto 1 + int i; + for(i = L+1; i > 1;) { + i--; // decrement after check, so that i is 1 at the end of the loop. + y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i)); + while(stasis_util_skiplist_cmp_helper(list, y, searchKey) < 0) { + x = hazard_set(list->h, 0, (void*)y); + y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i)); + } + update[i] = hazard_set(list->h, STASIS_SKIPLIST_HP_COUNT+(L-i), x); + } + // h[HP_COUNT+[0..L-1] is set + y = hazard_set(list->h, 1, (void*)x); + int isGarbage = 0; + int first = 1; + // do ... until equal and not garbage + do { + // Note: it is unsafe to copy y->i directly into y, since doing so releases + // the hazard pointer in race. Fortunately, we don't need x for anything + // until we overwrite it immediately below. + x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, i)); + if(first) { + first = 0; + } else { + // This unlock was not in the pseudocode, but seems to be necessary... + pthread_mutex_unlock(&y->level_mut); + } + y = hazard_set(list->h, 1, x); + if(stasis_util_skiplist_cmp_helper(list, y, searchKey) > 0) { + hazard_release(list->ret_hazard, 0); + hazard_release(list->h, 0); + hazard_release(list->h, 1); + for(i = L+1; i > 1;) { + i--; + hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT); + // h[HP_COUNT+[L-1..0] is cleared + } + return NULL; + } + pthread_mutex_lock(&y->level_mut); + x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, i)); + // Note: this is a > in pseudocode, which lets equal nodes link back into themselves. + isGarbage = stasis_util_skiplist_cmp_helper2(list, y, x) > 0; + // pseudocode would unlock if garbage here. Moved unlock to top of loop. + } while(!(!isGarbage && stasis_util_skiplist_cmp_helper(list, y, searchKey) == 0)); + for(int i = L+1; i <= y->level; i++) { update[i] = (void*)list->header; } + for(int i = y->level; i > 0; i--) { + x = stasis_util_skiplist_get_lock(list, update[i], searchKey, i); + pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(y, i)); + stasis_util_skiplist_assert(*stasis_util_skiplist_get_forward(x, i) == (intptr_t)y); + __sync_fetch_and_add(&x->refcount, 1); + *stasis_util_skiplist_get_forward(x, i) = *stasis_util_skiplist_get_forward(y, i); + *stasis_util_skiplist_get_forward(y, i) = (hazard_ptr)x; + stasis_util_skiplist_assert(stasis_util_skiplist_cmp_helper2(list, y, x) > 0); // assert is garbage + pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i)); + pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(y, i)); + } + + void * oldKey = hazard_ref(list->ret_hazard, 0, &(y->key)); + pthread_mutex_unlock(&y->level_mut); + int L2 = list->levelHint; + if(L2 > 1 && *stasis_util_skiplist_get_forward((void*)list->header, L2) == 0) { + if(pthread_mutex_trylock(&list->levelHint_mut) == 0) { + while(list->levelHint > 1 && (stasis_skiplist_node_t*)*stasis_util_skiplist_get_forward((void*)list->header, list->levelHint) == 0) { + list->levelHint = list->levelHint - 1; + } + pthread_mutex_unlock(&list->levelHint_mut); + } + } + hazard_release(list->h, 0); + hazard_release(list->h, 1); + for(i = L+1; i > 1;) { + i--; + hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT); + // h[HP_COUNT+[L-1..0] is cleared + } + // oldKey will be freed by y's finalizer + hazard_free(list->h, y); + return oldKey; +} +void stasis_skiplist_release(stasis_skiplist_t * list) { + hazard_release(list->ret_hazard, 0); +} + +END_C_DECLS +#endif /* CONCURRENTSKIPLIST_H_ */ diff --git a/stasis/util/hazard.h b/stasis/util/hazard.h new file mode 100644 index 0000000..49b5370 --- /dev/null +++ b/stasis/util/hazard.h @@ -0,0 +1,205 @@ +/* + * hazard.h + * + * Created on: Feb 7, 2012 + * Author: sears + */ +#include +#include +#include +#ifndef HAZARD_H_ +#define HAZARD_H_ + +typedef intptr_t hazard_ptr; +typedef struct hazard_t hazard_t; + +typedef struct hazard_ptr_rec_t { + hazard_t * h; + hazard_ptr * hp; + void ** rlist; + int rlist_len; + struct hazard_ptr_rec_t * next; +} hazard_ptr_rec_t; + +struct hazard_t { + pthread_key_t hp; + int num_slots; + int stack_start; + int num_r_slots; + int (*finalizer)(void *, void* conf); + void * conf; + hazard_ptr_rec_t * tls_list; + pthread_mutex_t tls_list_mut; + pthread_cond_t thread_shutdown; +}; +static int intptr_cmp(const void * ap, const void *bp) { + intptr_t a = *(intptr_t*)ap; + intptr_t b = *(intptr_t*)bp; + return (a < b) ? -1 : ( (a > b) ? 1 : 0 ); +} +static inline void hazard_scan(hazard_t * h, hazard_ptr_rec_t * rec) { + if(rec == NULL) { + rec = pthread_getspecific(h->hp); + } + if(rec == NULL) { return; } + qsort(rec->rlist, rec->rlist_len, sizeof(void*), intptr_cmp); + hazard_ptr * ptrs = 0; + int ptrs_len = 0; + pthread_mutex_lock(&h->tls_list_mut); + hazard_ptr_rec_t * list = h->tls_list; + while(list != NULL) { + ptrs = realloc(ptrs, sizeof(hazard_ptr) * (ptrs_len+h->num_slots)); +// int would_stop = 0; + for(int i = 0; i < h->num_slots; i++) { + ptrs[ptrs_len] = list->hp[i]; + if(!ptrs[ptrs_len]) { +// if(i >= h->stack_start) { would_stop = 1; } + if(i >= h->stack_start) { break; } + } else { +// assert(! would_stop); + ptrs_len++; + } + } + list = list->next; + } + pthread_mutex_unlock(&h->tls_list_mut); + qsort(ptrs, ptrs_len, sizeof(void*), intptr_cmp); + int i = 0, j = 0; + while(j < rec->rlist_len) { + while(i < ptrs_len && (hazard_ptr)rec->rlist[j] > ptrs[i]) { i++; } + if(i == ptrs_len || (hazard_ptr)rec->rlist[j] != ptrs[i]) { + if(h->finalizer((void*)rec->rlist[j], h->conf)) { + rec->rlist[j] = 0; + } + } + j++; + } + j = 0; + for(i = 0; i < rec->rlist_len; i++) { + if(rec->rlist[i] != 0) { + rec->rlist[j] = rec->rlist[i]; + j++; + } + } + rec->rlist_len = j; + free(ptrs); +} +static void hazard_deinit_thread(void * p) { + hazard_ptr_rec_t * rec = p; + if(rec != NULL) { + while(rec->rlist_len != 0) { + hazard_scan(rec->h, rec); + if(rec->rlist_len != 0) { + pthread_mutex_lock(&rec->h->tls_list_mut); + struct timeval tv; + gettimeofday(&tv, 0); + struct timespec ts = stasis_double_to_timespec(stasis_timeval_to_double(tv) + 0.01); + pthread_cond_timedwait(&rec->h->thread_shutdown, + &rec->h->tls_list_mut, &ts); + pthread_mutex_unlock(&rec->h->tls_list_mut); + } + } + pthread_cond_broadcast(&rec->h->thread_shutdown); + pthread_mutex_lock(&rec->h->tls_list_mut); + hazard_ptr_rec_t ** last = &rec->h->tls_list; + hazard_ptr_rec_t * list = *last; + while(list != rec) { + last = &list->next; + list = *last; + } + *last = rec->next; + pthread_mutex_unlock(&rec->h->tls_list_mut); + free(rec->hp); + free(rec->rlist); + free(rec); + } +} +/** + * Init the state necessary for a set of hazard pointers. This module + * implements an optimization where the higher numbered pointers can be treated + * as a fixed length stack. Entries after the first NULL in that region will + * be ignored. This allows applications that need varying numbers of hazard + * pointers to be collected efficiently. + * + * @param hp_slots the total number of slots. + * @param stack_start the first hazard pointer in the "stack" region. + * @param r_slots the max number of uncollected values per thread. + */ +static inline hazard_t* hazard_init(int hp_slots, int stack_start, int r_slots, + int (*finalizer)(void*, void*), void * conf) { + hazard_t * ret = malloc(sizeof(hazard_t)); + pthread_key_create(&ret->hp, hazard_deinit_thread); + ret->num_slots = hp_slots; + ret->stack_start = stack_start; + ret->num_r_slots = r_slots; + ret->tls_list = NULL; + ret->finalizer = finalizer; + ret->conf = conf; + pthread_mutex_init(&ret->tls_list_mut,0); + pthread_cond_init(&ret->thread_shutdown, 0); + return ret; +} +static inline hazard_ptr_rec_t * hazard_ensure_tls(hazard_t * h) { + hazard_ptr_rec_t * rec = pthread_getspecific(h->hp); + if(rec == NULL) { + rec = malloc(sizeof(hazard_ptr_rec_t)); + rec->hp = calloc(h->num_slots, sizeof(hazard_ptr)); + rec->rlist = calloc(h->num_r_slots, sizeof(hazard_ptr)); + rec->rlist_len = 0; + rec->h = h; + pthread_setspecific(h->hp, rec); + pthread_mutex_lock(&h->tls_list_mut); + rec->next = h->tls_list; + h->tls_list = rec; + pthread_mutex_unlock(&h->tls_list_mut); + } + return rec; +} +static inline void hazard_deinit(hazard_t * h) { + hazard_ptr_rec_t * rec = pthread_getspecific(h->hp); + hazard_deinit_thread(rec); + pthread_key_delete(h->hp); + assert(h->tls_list == NULL); + pthread_mutex_destroy(&h->tls_list_mut); + pthread_cond_destroy(&h->thread_shutdown); + free(h); +} +static inline void * hazard_ref(hazard_t* h, int slot, hazard_ptr* ptr) { + hazard_ptr_rec_t * rec = hazard_ensure_tls(h); + do { + rec->hp[slot] = *ptr; // Read ptr from ram + __sync_synchronize(); // Push HP to ram + } while(rec->hp[slot] != *ptr); // Re-read ptr from ram + return (void*) rec->hp[slot]; +} +/** + * Set a hazard pointer using a known-stable address. This is mostly useful + * when the value pointed to by one hazard pointer should be pointed to by + * another hazard pointer. + */ +static inline void* hazard_set(hazard_t* h, int slot, void* val) { + hazard_ptr_rec_t * rec = hazard_ensure_tls(h); + rec->hp[slot] = (hazard_ptr)val; + // val is stable (and on our stack!) so there's no reason to re-check it. + __sync_synchronize(); + return val; +} +static inline void hazard_release(hazard_t* h, int slot) { + hazard_ptr_rec_t * rec = hazard_ensure_tls(h); + __sync_synchronize(); // prevent the = 0 from being moved before this line. + rec->hp[slot] = 0; +} +// Must be called *after* all references to ptr are removed. +static inline void hazard_free(hazard_t* h, void* ptr) { + hazard_ptr_rec_t * rec = hazard_ensure_tls(h); + rec->rlist[rec->rlist_len] = ptr; + (rec->rlist_len)++; + while(rec->rlist_len == h->num_r_slots) { + hazard_scan(h, rec); + if(rec->rlist_len == h->num_r_slots) { + struct timespec slp = stasis_double_to_timespec(0.001); + nanosleep(&slp,0); + } + } +} +#endif /* HAZARD_H_ */ diff --git a/stasis/util/latches.h b/stasis/util/latches.h index d2724c9..43f7853 100644 --- a/stasis/util/latches.h +++ b/stasis/util/latches.h @@ -115,7 +115,7 @@ void __profile_deletelock (rwl *lock); #define FETCH_AND_ADD(_o,_i) __sync_fetch_and_add(_o,_i) #if ULONG_MAX <= 4294967295 // are we on a 32 bit machine? #define ATOMIC_READ_64(mutex,_o) FETCH_AND_ADD(_o,0) -#define ATOMIC_WRITE_64(mutex,_o,_n) _sync_lock_test_and_set(_o,_n) +#define ATOMIC_WRITE_64(mutex,_o,_n) __sync_lock_test_and_set(_o,_n) #else // this is a 33 or greater bit machine. Assume it's 64 bit, and that 64 bit writes are atomic. #define ATOMIC_READ_64(mutex,_a) *_a #define ATOMIC_WRITE_64(mutex,_a,_n) do {*_a=_n; } while (0) diff --git a/stasis/util/random.h b/stasis/util/random.h index ad0d1be..c6251b8 100644 --- a/stasis/util/random.h +++ b/stasis/util/random.h @@ -14,5 +14,76 @@ BEGIN_C_DECLS uint64_t stasis_util_random64(uint64_t x); +/** + * The remainder of this file is a cleaned up and reentrant version of George + * Marsaglia's generators from stat.sci.math in 1999. + * + * I was concerned that replacing the macros with static inlines would hurt + * the optimizer, but for some reason, on my machine, and with -O3, + * check_kiss.c is about 2x faster than the version from the newsgroup posting. + * + * @see check_kiss.c for the original, unmodified code. + */ +typedef struct { + uint32_t x; + uint32_t y; + uint32_t bro; + uint8_t c; + uint32_t z; + uint32_t w; + uint32_t jsr; + uint32_t jcong; + uint32_t a; + uint32_t b; + uint32_t t[256]; + +} kiss_table_t; +static inline uint32_t stasis_util_random_kiss_znew(kiss_table_t* k) { + return k->z=36969*(k->z&65535)+(k->z>>16); +} +static inline uint32_t stasis_util_random_kiss_wnew(kiss_table_t* k) { + return k->w=18000*(k->w&65535)+(k->w>>16); +} +static inline uint32_t stasis_util_random_kiss_MWC(kiss_table_t* k) { + return (stasis_util_random_kiss_znew(k)<<16)+stasis_util_random_kiss_wnew(k); +} +static inline uint32_t stasis_util_random_kiss_SHR3(kiss_table_t* k) { + return (k->jsr^=(k->jsr<<17), k->jsr^=(k->jsr>>13), k->jsr^=(k->jsr<<5)); +} +static inline uint32_t stasis_util_random_kiss_CONG(kiss_table_t* k) { + return k->jcong=69069*k->jcong+1234567; +} +static inline uint32_t stasis_util_random_kiss_FIB(kiss_table_t* k) { + return ((k->b=k->a+k->b),(k->a=k->b-k->a)); +} +static inline uint32_t stasis_util_random_kiss_KISS(kiss_table_t* k) { + return (stasis_util_random_kiss_MWC(k)^stasis_util_random_kiss_CONG(k)) + + stasis_util_random_kiss_SHR3(k); +} +static inline uint32_t stasis_util_random_kiss_LFIB4(kiss_table_t* k) { + (k->c)++; + return + k->t[k->c]=k->t[k->c] + +k->t[(uint8_t)(k->c+58)] + +k->t[(uint8_t)(k->c+119)] + +k->t[(uint8_t)(k->c+178)]; +} +static inline uint32_t stasis_util_random_kiss_SWB(kiss_table_t* k) { + return (k->c++, + k->bro=(k->xy), + k->t[k->c]=(k->x=k->t[(uint8_t)(k->c+34)])-(k->y=k->t[(uint8_t)(k->c+19)]+k->bro)); +} +static inline float stasis_util_random_kiss_UNI(kiss_table_t *k) { + return stasis_util_random_kiss_KISS(k) * 2.328306e-10; +} +static inline float stasis_util_random_kiss_VNI(kiss_table_t *k) { + return ((int32_t) stasis_util_random_kiss_KISS(k))*4.656613e-10; +} +static inline void stasis_util_random_kiss_settable(kiss_table_t *k, + uint32_t i1, uint32_t i2, uint32_t i3, + uint32_t i4, uint32_t i5, uint32_t i6) { + k->x=0;k->y=0;k->c=0;k->z=i1;k->w=i2,k->jsr=i3; k->jcong=i4; k->a=i5; k->b=i6; + for(int i = 0; i < 256; i++) { k->t[i] = stasis_util_random_kiss_KISS(k); } +} END_C_DECLS #endif /* RANDOM_H_ */ diff --git a/stasis/util/redblack.h b/stasis/util/redblack.h index 84bec2c..6ca03b4 100644 --- a/stasis/util/redblack.h +++ b/stasis/util/redblack.h @@ -1,5 +1,5 @@ /* - * RCS $Id$ + * RCS $Id: redblack.h 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $ */ /* diff --git a/stasis/util/sux.h b/stasis/util/sux.h new file mode 100644 index 0000000..1929abb --- /dev/null +++ b/stasis/util/sux.h @@ -0,0 +1,78 @@ +/* + * sux.h + * + * Created on: Jul 19, 2011 + * Author: sears + */ +/** + * @file SUX: Shared, Update, Exclusive latch implementation. + * + * Latch protocol used by some concurrent data structures. + * + * Lock compatibility table: + * + * S U X + * +------ + * S | Y Y N + * U | Y N N + * X | N N N + * + * This file implements SUX locks using a pthread mutex and a pthread rwlock. + * + * Lock mode | Holds rwlock? | Holds mutex? + * Shared | Read | No + * Update | No | Yes + * eXclusive | Write | Yes + * + * The mutex is always acquired before the rwlock, which allows us to safely + * upgrade and downgrade the SUX latch between U and X. + */ + +#ifndef SUX_H_ +#define SUX_H_ +#include +BEGIN_C_DECLS + +typedef struct { + pthread_rwlock_t sx_rwl; + pthread_mutex_t ux_mut; +} sux_latch; + +void sux_latch_init(sux_latch *sux) { + pthread_rwlock_init(&sux->sx_rwl); + pthread_mutex_init(&sux->ux_mut); +} +void sux_latch_destroy(sux_latch *sux) { + pthread_rwlock_destroy(&sux->sx_rwl); + pthread_mutex_destroy(&sux->ux_mut); +} +void sux_latch_slock(sux_latch *sux) { + pthread_rwlock_rdlock(&sux->sx_rwl); +} +void sux_latch_sunlock(sux_latch *sux) { + pthread_rwlock_unlock(&sux->sx_rwl); +} +void sux_latch_ulock(sux_latch *sux) { + pthread_mutex_lock(&sux->ux_mut); +} +void sux_latch_uunlock(sux_latch *sux) { + pthread_mutex_unlock(&sux->ux_mut); +} +void sux_latch_upgrade(sux_latch *sux) { + pthread_rwlock_wrlock(&sux->sx_rwl); +} +void sux_latch_downgrade(sux_latch *sux) { + pthread_rwlock_unlock(&sux->sx_rwl); +} +void sux_latch_xlock(sux_latch *sux) { + sux_latch_ulock(sux); + sux_latch_upgrade(sux); +} +void sux_latch_xunlock(sux_latch *sux) { + sux_latch_downgrade(sux); + sux_latch_uunlock(sux); +} + +END_C_DECLS + +#endif /* SUX_H_ */ diff --git a/test/lladd-old/TAbort.c b/test/lladd-old/TAbort.c index b9d281a..567b78d 100644 --- a/test/lladd-old/TAbort.c +++ b/test/lladd-old/TAbort.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: TAbort.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/TGet.c b/test/lladd-old/TGet.c index 8a000d0..59b1bf9 100644 --- a/test/lladd-old/TGet.c +++ b/test/lladd-old/TGet.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: TGet.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/TInsert.c b/test/lladd-old/TInsert.c index f8f0998..f48cd37 100644 --- a/test/lladd-old/TInsert.c +++ b/test/lladd-old/TInsert.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: TInsert.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/TInsertSequential.c b/test/lladd-old/TInsertSequential.c index 9dd792d..c306002 100644 --- a/test/lladd-old/TInsertSequential.c +++ b/test/lladd-old/TInsertSequential.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: TInsertSequential.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/TUpdate.c b/test/lladd-old/TUpdate.c index 7339e78..5fb0bd6 100644 --- a/test/lladd-old/TUpdate.c +++ b/test/lladd-old/TUpdate.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: TUpdate.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/abort-speed.c b/test/lladd-old/abort-speed.c index bf021fd..1713818 100644 --- a/test/lladd-old/abort-speed.c +++ b/test/lladd-old/abort-speed.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: abort-speed.c 2 2004-06-24 21:10:31Z sears $ * * how fast can we abort? does it depend on anything? * *******************************/ diff --git a/test/lladd-old/abort.c b/test/lladd-old/abort.c index b039dd9..f3e8a4c 100644 --- a/test/lladd-old/abort.c +++ b/test/lladd-old/abort.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: abort.c 2 2004-06-24 21:10:31Z sears $ * * aborting and committing transactions * *******************************/ diff --git a/test/lladd-old/commit-speed.c b/test/lladd-old/commit-speed.c index 5ea5bb5..4264855 100644 --- a/test/lladd-old/commit-speed.c +++ b/test/lladd-old/commit-speed.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: commit-speed.c 2 2004-06-24 21:10:31Z sears $ * * how fast can we abort? does it depend on anything? * *******************************/ diff --git a/test/lladd-old/inc_dec.c b/test/lladd-old/inc_dec.c index b3be5ec..3a01f97 100644 --- a/test/lladd-old/inc_dec.c +++ b/test/lladd-old/inc_dec.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: inc_dec.c 2 2004-06-24 21:10:31Z sears $ * * testing other user functions * *******************************/ diff --git a/test/lladd-old/interleaved.c b/test/lladd-old/interleaved.c index 3e18c7f..0b01680 100644 --- a/test/lladd-old/interleaved.c +++ b/test/lladd-old/interleaved.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: interleaved.c 2 2004-06-24 21:10:31Z sears $ * * interleaved transactions * *******************************/ diff --git a/test/lladd-old/logvals.c b/test/lladd-old/logvals.c index d5d99c1..5c46a3e 100644 --- a/test/lladd-old/logvals.c +++ b/test/lladd-old/logvals.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: logvals.c 2 2004-06-24 21:10:31Z sears $ * * long values * *******************************/ diff --git a/test/lladd-old/loop.c b/test/lladd-old/loop.c index dcfa4ce..111b27d 100644 --- a/test/lladd-old/loop.c +++ b/test/lladd-old/loop.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: loop.c 2 2004-06-24 21:10:31Z sears $ * * test by looping * *******************************/ diff --git a/test/lladd-old/nontrans.c b/test/lladd-old/nontrans.c index b06d343..d5d8003 100644 --- a/test/lladd-old/nontrans.c +++ b/test/lladd-old/nontrans.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: nontrans.c 2 2004-06-24 21:10:31Z sears $ * * doesn't use transactions * *******************************/ diff --git a/test/lladd-old/prepare_1.c b/test/lladd-old/prepare_1.c index 28ae3f9..2845d73 100644 --- a/test/lladd-old/prepare_1.c +++ b/test/lladd-old/prepare_1.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: prepare_1.c 2 2004-06-24 21:10:31Z sears $ * * testing other user functions * *******************************/ diff --git a/test/lladd-old/prepare_2.c b/test/lladd-old/prepare_2.c index 3f7cea9..8b3920d 100644 --- a/test/lladd-old/prepare_2.c +++ b/test/lladd-old/prepare_2.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: prepare_2.c 2 2004-06-24 21:10:31Z sears $ * * testing other user functions * *******************************/ diff --git a/test/lladd-old/prepare_3.c b/test/lladd-old/prepare_3.c index 8ab876a..ae016a0 100644 --- a/test/lladd-old/prepare_3.c +++ b/test/lladd-old/prepare_3.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: prepare_3.c 2 2004-06-24 21:10:31Z sears $ * * testing other user functions * *******************************/ diff --git a/test/lladd-old/recover.c b/test/lladd-old/recover.c index 28b6f2e..e6dc6b5 100644 --- a/test/lladd-old/recover.c +++ b/test/lladd-old/recover.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: recover.c 2 2004-06-24 21:10:31Z sears $ * * recover from a crash * *******************************/ diff --git a/test/lladd-old/strings.c b/test/lladd-old/strings.c index 25daf7a..ad5454d 100644 --- a/test/lladd-old/strings.c +++ b/test/lladd-old/strings.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: strings.c 2 2004-06-24 21:10:31Z sears $ * * testing with various strings * *******************************/ diff --git a/test/lladd-old/test.c b/test/lladd-old/test.c index ccc2cfa..9fde102 100644 --- a/test/lladd-old/test.c +++ b/test/lladd-old/test.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /******************************************* - * $Id$ + * $Id: test.c 2 2004-06-24 21:10:31Z sears $ * * Testing driver * ****************************************/ diff --git a/test/lladd-old/test.h b/test/lladd-old/test.h index a729ffb..c22d6fe 100644 --- a/test/lladd-old/test.h +++ b/test/lladd-old/test.h @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************************** - * $Id$ + * $Id: test.h 2 2004-06-24 21:10:31Z sears $ * * dummy header file for tests * ********************************************/ diff --git a/test/lladd-old/test0.c b/test/lladd-old/test0.c index bf1c6fb..b38d4e4 100644 --- a/test/lladd-old/test0.c +++ b/test/lladd-old/test0.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /**************************** - * $Id$ + * $Id: test0.c 2 2004-06-24 21:10:31Z sears $ * * glassbox testing of pages * *************************/ diff --git a/test/lladd-old/test1.c b/test/lladd-old/test1.c index dabbc4b..33e48da 100644 --- a/test/lladd-old/test1.c +++ b/test/lladd-old/test1.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /******************************** - * $Id$ + * $Id: test1.c 2 2004-06-24 21:10:31Z sears $ * * simple reading, writing * ******************************/ diff --git a/test/lladd-old/test2.c b/test/lladd-old/test2.c index 7256e92..03def9a 100644 --- a/test/lladd-old/test2.c +++ b/test/lladd-old/test2.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /******************************** - * $Id$ + * $Id: test2.c 2 2004-06-24 21:10:31Z sears $ * * simple reading, writing * ******************************/ diff --git a/test/lladd-old/test3.c b/test/lladd-old/test3.c index 51cdd20..2f30b09 100644 --- a/test/lladd-old/test3.c +++ b/test/lladd-old/test3.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /******************************** - * $Id$ + * $Id: test3.c 2 2004-06-24 21:10:31Z sears $ * * ******************************/ diff --git a/test/lladd-old/twentyfour.c b/test/lladd-old/twentyfour.c index 98d4695..71415e1 100644 --- a/test/lladd-old/twentyfour.c +++ b/test/lladd-old/twentyfour.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: twentyfour.c 2 2004-06-24 21:10:31Z sears $ * * if you run this test 23 times, it seems to work fine. On the 24th time, you * need to allocate a new page, and it crashes; diff --git a/test/lladd-old/twentyfour2.c b/test/lladd-old/twentyfour2.c index c0bc3bb..b028e02 100644 --- a/test/lladd-old/twentyfour2.c +++ b/test/lladd-old/twentyfour2.c @@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the terms specified in this license. ---*/ /********************************* - * $Id$ + * $Id: twentyfour2.c 2 2004-06-24 21:10:31Z sears $ * * if you run this test 23 times, its fine, but on the 24 time, shit happens * *******************************/ diff --git a/test/stasis/CMakeLists.txt b/test/stasis/CMakeLists.txt index cd5399f..63ee4b6 100644 --- a/test/stasis/CMakeLists.txt +++ b/test/stasis/CMakeLists.txt @@ -1,5 +1,10 @@ SUBDIRS(fault_injection) +CREATE_CHECK(check_concurrentSkipList) +CREATE_CHECK(check_bloomFilter) +CREATE_CHECK(check_hazard) +CREATE_CHECK(check_kiss) CREATE_CHECK(check_redblack) +CREATE_CHECK(check_concurrentBTree) CREATE_CHECK(check_concurrentHash) CREATE_CHECK(check_lhtable) CREATE_CHECK(check_concurrentRingbuffer) @@ -12,23 +17,25 @@ CREATE_CHECK(check_operations) CREATE_CHECK(check_transactional2) CREATE_CHECK(check_recovery) CREATE_CHECK(check_blobRecovery) -CREATE_CHECK(check_bufferManager) CREATE_CHECK(check_pageOperations) CREATE_CHECK(check_linearHash) CREATE_CHECK(check_header) CREATE_CHECK(check_linkedListNTA) CREATE_CHECK(check_linearHashNTA) CREATE_CHECK(check_pageOrientedList) -CREATE_EXPERIMENTAL_CHECK(check_ringbuffer) -CREATE_EXPERIMENTAL_CHECK(check_iterator) -CREATE_EXPERIMENTAL_CHECK(check_multiplexer) CREATE_CHECK(check_bTree) CREATE_CHECK(check_regions) -CREATE_CHECK(check_allocationPolicy) CREATE_CHECK(check_dirtyPageTable) CREATE_CHECK(check_io) CREATE_CHECK(check_rangeTracker) CREATE_CHECK(check_replacementPolicy) +if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +CREATE_CHECK(check_bufferManager) +CREATE_CHECK(check_allocationPolicy) +CREATE_EXPERIMENTAL_CHECK(check_ringbuffer) +CREATE_EXPERIMENTAL_CHECK(check_iterator) +CREATE_EXPERIMENTAL_CHECK(check_multiplexer) CREATE_EXPERIMENTAL_CHECK(check_lsmTree) CREATE_EXPERIMENTAL_CHECK(check_groupBy) +ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") CREATE_CHECK(check_boundedLog) diff --git a/test/stasis/check_bloomFilter.c b/test/stasis/check_bloomFilter.c new file mode 100644 index 0000000..c59ca41 --- /dev/null +++ b/test/stasis/check_bloomFilter.c @@ -0,0 +1,116 @@ +/* + * check_bloomFilter.cpp + * + * Copyright 2010-2012 Yahoo! Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Created on: Oct 2, 2010 + * Author: sears + */ +#include +#include +#include +#include +#include +#include + +/* + * This file can test CRC and FNV-1 based hash functions. Based on early experiments: + * + * CRC32 insert/lookup: 11/13 seconds, 1.1% false positive + * FNV-1 insert/lookup: 8/9 seconds, 2.8% false positive + * + * Expected false positive rate is 1%. + */ + +static uint64_t hash_a(const char* a, int len) { + return stasis_crc32(a,len,0xcafebabe); +} + +static uint64_t hash_b(const char* a, int len) { + return stasis_crc32(a,len,0xdeadbeef); +} +static uint64_t hash_a_fnv(const char* a, int len) { + return stasis_util_hash_fnv_1_uint32_t((const byte*)a, len); +} +static uint64_t hash_b_fnv(const char* a, int len) { + return stasis_util_hash_fnv_1_uint64_t((const byte*)a, len); +} + +static char * malloc_random_string(int group) { + char * str = 0; + int strlen = 0; + while(!strlen) strlen = 128 + (rand() & 127); + str = (char*)malloc(strlen + 1); + str[0] = group; + + for(int i = 1; i < strlen; i++) { + str[i] = (rand() & 128) + 1; + } + str[strlen] = 0; + return str; +} + +int main(int argc, char * argv[]) { + (void)hash_a; (void)hash_b; + (void)hash_a_fnv; (void)hash_b_fnv; + + const int num_inserts = 1000000; + char ** strings = (char**)malloc(num_inserts * sizeof(char*)); + uint64_t sum_strlen = 0; + struct timeval start, stop; + gettimeofday(&start, 0); + printf("seed: %lld\n", (long long)start.tv_sec); + srand(start.tv_sec); + for(int i = 0; i < num_inserts; i++) { + strings[i] = malloc_random_string(1); + sum_strlen += strlen(strings[i]); + } + gettimeofday(&stop,0); + printf("Generated strings in %d seconds. Mean string length: %f\n", (int)(stop.tv_sec - start.tv_sec), (double)(sum_strlen)/(double)num_inserts); + + stasis_bloom_filter_t * bf = stasis_bloom_filter_create(hash_a, hash_b, num_inserts, 0.01); + stasis_bloom_filter_print_stats(bf); + gettimeofday(&start, 0); + for(int i = 0; i < num_inserts; i++) { + stasis_bloom_filter_insert(bf,strings[i], strlen(strings[i])); + } + gettimeofday(&stop, 0); + printf("Inserted strings in %d seconds.\n", (int)(stop.tv_sec - start.tv_sec)); + + gettimeofday(&start, 0); + for(int i = 0; i < num_inserts; i++) { + assert(stasis_bloom_filter_lookup(bf, strings[i], strlen(strings[i]))); + } + gettimeofday(&stop, 0); + printf("Looked up strings in %d seconds.\n", (int)(stop.tv_sec - start.tv_sec)); + stasis_bloom_filter_print_stats(bf); + + uint64_t false_positives = 0; + gettimeofday(&start, 0); + for(int i = 0; i < num_inserts; i++) { + char * str = malloc_random_string(2); + if(stasis_bloom_filter_lookup(bf, str, strlen(str))) { + false_positives ++; + } + assert(stasis_bloom_filter_lookup(bf, strings[i], strlen(strings[i]))); + free(str); + } + gettimeofday(&stop, 0); + printf("Generated and looked up non-existant strings in %d seconds\n" + "false positive rate was %f\n", (int)(stop.tv_sec - start.tv_sec), + ((double)false_positives)/(double)num_inserts); + + return 0; +} diff --git a/test/stasis/check_concurrentBTree.c b/test/stasis/check_concurrentBTree.c new file mode 100644 index 0000000..1657cb1 --- /dev/null +++ b/test/stasis/check_concurrentBTree.c @@ -0,0 +1,98 @@ +/* + * check_concurrentBTree.c + * + * Created on: Dec 22, 2011 + * Author: sears + */ +#define _GNU_SOURCE +#include "../check_includes.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define LOG_NAME "check_concurrentBTree.log" + +START_TEST(metadataBitTest) { + int NUM_ITERS = 1000000; + metadata_t m = 0; + byte isLeaf = 0; + byte balanced = 0; + byte color = 0; + byte level = 0; + for(int i = 0; i < NUM_ITERS; i++) { + switch(stasis_util_random64(3)) { + case 0: {// leaf + if(isLeaf) { + assert(metadata_is_leaf(m)); + } else { + assert(!metadata_is_leaf(m)); + } + isLeaf = stasis_util_random64(2); + if(isLeaf) { + m = metadata_set_leaf(m); + m = leaf_metadata_set_color(m, color); + } else { + m = metadata_clear_leaf(m); + m = index_metadata_set_level(m, level); + } + } break; + case 1: {// balanced + if(balanced) { + assert(metadata_is_balanced(m)); + } else { + assert(!metadata_is_balanced(m)); + } + balanced = stasis_util_random64(2); + if(balanced) { + m = metadata_set_balanced(m); + } else { + m = metadata_clear_balanced(m); + } + } break; + case 2: { + if(isLeaf) { // color + assert(color == leaf_metadata_get_color(m)); + color = stasis_util_random64(3); + m = leaf_metadata_set_color(m, color); + } else { // level + assert(level == index_metadata_get_level(m)); + level = stasis_util_random64(8); + m = index_metadata_set_level(m, level); + assert(level == index_metadata_get_level(m)); + } + } break; + default: abort(); + } + } +} END_TEST + +Suite * check_suite(void) { + Suite *s = suite_create("lhtable"); + /* Begin a new test */ + TCase *tc = tcase_create("lhtable"); + + tcase_set_timeout(tc, 0); // disable timeouts + + /* Sub tests are added, one per line, here */ + tcase_add_test(tc, metadataBitTest); + + /* --------------------------------------------- */ + + tcase_add_checked_fixture(tc, setup, teardown); + + + suite_add_tcase(s, tc); + return s; +} + +#include "../check_setup.h" + diff --git a/test/stasis/check_concurrentHash.c b/test/stasis/check_concurrentHash.c index 21a2b0e..0032202 100644 --- a/test/stasis/check_concurrentHash.c +++ b/test/stasis/check_concurrentHash.c @@ -59,7 +59,7 @@ terms specified in this license. #include #include -#define LOG_NAME "check_lhtable.log" +#define LOG_NAME "check_concurrentHash.log" #ifdef DBUG_TEST diff --git a/test/stasis/check_concurrentSkipList.c b/test/stasis/check_concurrentSkipList.c new file mode 100644 index 0000000..92aead9 --- /dev/null +++ b/test/stasis/check_concurrentSkipList.c @@ -0,0 +1,205 @@ +/* + * check_concurrentSkipList.c + * + * Created on: Feb 8, 2012 + * Author: sears + */ +#include +#include +#include + +int num_keys = 1000000; +void * key_dup(intptr_t p) { + intptr_t * ret = malloc(sizeof(intptr_t)); + *ret = p; + return ret; +} +#ifdef STRINGS +static inline int stasis_util_skiplist_cmp(const void *a, const void *b) { + if(a == NULL) return -1; + if(b == NULL) return 1; + assert(!(a == NULL && b == NULL)); + return strcmp(a,b); +} +#else +static inline int stasis_util_skiplist_cmp(const void *a, const void *b) { + // Note: Below, we ensure a and b are both >= 0 and small. + //assert(*(intptr_t*)a < 2*num_keys); + //assert(*(intptr_t*)b < 2*num_keys); + return ((int)*(intptr_t*)a-(long)*(intptr_t*)b); +} +#endif +#include +#include "../check_includes.h" + +#include +#include + +#include +#include + +#define LOG_NAME "check_lhtable.log" + +int num_threads = 4; +int concurrent = 0; +stasis_skiplist_t * list; +void * worker(void* p) { + intptr_t * keys = p; + intptr_t collisions = 0; + for(int i = 0; i < num_keys; i++) { + char * ret = stasis_util_skiplist_insert(list, key_dup(keys[i])); + if(ret != NULL) { + assert(!stasis_util_skiplist_cmp(ret, &keys[i])); + collisions++; + } + } + for(int i = 0; i < num_keys; i++) { + char * ret = stasis_util_skiplist_search(list, &keys[i]); + if(!concurrent) assert(!stasis_util_skiplist_cmp(ret, &keys[i])); + } + for(int i = 0; i < num_keys; i++) { + char * ret = stasis_util_skiplist_delete(list, &keys[i]); + if(ret == NULL) { + collisions--; + } + } + stasis_skiplist_release(list); + return (void*) collisions; +} +/** + @test +*/ +START_TEST(concurrentSkipList_smokeTest) { + list = stasis_util_skiplist_init(stasis_util_skiplist_cmp, 0); + char ** const keys = malloc(sizeof(char*) * num_keys); + for(int i = 0; i < num_keys; i++) { +#ifdef STRINGS + int err = asprintf(&keys[i], "%d", (int)stasis_util_random64(2*num_keys)); + (void) err; +#else + keys[i] = (void*)(1+stasis_util_random64(2*num_keys)); +#endif + } + printf("Initted\n"); + fflush(stdout); + struct timeval tv; + gettimeofday(&tv,0); + double start = stasis_timeval_to_double(tv); + intptr_t collisions = (intptr_t)worker(keys); + assert(collisions == 0); + gettimeofday(&tv,0); + double stop = stasis_timeval_to_double(tv); + double elapsed = stop - start; + double opspersec = 3.0*num_keys / elapsed; + printf("Run took %f seconds. %f ops/sec 1 thread %f ops/thread-second\n", elapsed, opspersec, opspersec); + stasis_util_skiplist_deinit(list); +#ifdef STRINGS + for(int i = 0; i < num_keys; i++) { + free(keys[i]); + } +#endif + free(keys); +} END_TEST + +START_TEST(concurrentSkipList_concurrentTest) { + list = stasis_util_skiplist_init(stasis_util_skiplist_cmp, 0); + concurrent = 1; + char *** const keys = malloc(sizeof(char**) * num_threads); + for(int j = 0; j < num_threads; j++) { + keys[j] = malloc(sizeof(char*) * num_keys); + for(int i = 0; i < num_keys; i++) { + #ifdef STRINGS + int err = asprintf(&keys[i], "%d", (int)stasis_util_random64(2*num_keys)); + (void) err; + #else + keys[j][i] = (void*)(1+stasis_util_random64(2*num_keys)); + #endif + } + } + printf("Initted\n"); + fflush(stdout); + pthread_t * threads = malloc(sizeof(pthread_t) * num_threads); + struct timeval tv; + gettimeofday(&tv,0); + double start = stasis_timeval_to_double(tv); + int collisions = 0; + for(int j = 0; j < num_threads; j++) { + pthread_create(&threads[j], 0, worker, keys[j]); + } + for(int j = 0; j < num_threads; j++) { + intptr_t ret; + pthread_join(threads[j], (void*)&ret); + collisions += ret; +#ifdef STRINGS + for(int i = 0; i < num_keys; i++) { + free(keys[j][i]); + } +#endif + free(keys[j]); + } + assert(collisions == 0); + free(threads); + gettimeofday(&tv,0); + double stop = stasis_timeval_to_double(tv); + double elapsed = stop - start; + double opspersec = 3.0*(double)num_keys*num_threads / elapsed; + double opsperthsec = 3.0*(double)num_keys / elapsed; + printf("Run took %f seconds. %f ops/sec %d threads %f ops/thread-second\n", elapsed, opspersec, num_threads, opsperthsec); + stasis_util_skiplist_deinit(list); + free(keys); +} END_TEST +void * worker2(void * p) { + for(int i = 0; i < num_keys; i++) { + intptr_t key = (intptr_t)stasis_util_random64(1000); + switch(stasis_util_random64(3)) { + case 0: { + stasis_util_skiplist_insert(list, key_dup(key)); + } break; + case 1: { + stasis_util_skiplist_search(list, &key); + } break; + case 2: { + stasis_util_skiplist_delete(list, &key); + } break; + } + } + stasis_skiplist_release(list); + return 0; +} +START_TEST(concurrentSkipList_concurrentRandom) { + list = stasis_util_skiplist_init(stasis_util_skiplist_cmp, 0); + pthread_t thread[num_threads]; + for(int i = 0; i < num_threads; i++) { + pthread_create(&thread[i], 0, worker2, 0); + } + for(int i = 0; i < num_threads; i++) { + pthread_join(thread[i], 0); + } + for(intptr_t i = 0; i < 1000; i++) { + stasis_util_skiplist_delete(list, &i); + } + stasis_util_skiplist_deinit(list); +} END_TEST +Suite * check_suite(void) { + Suite *s = suite_create("concurrentSkipList"); + /* Begin a new test */ + TCase *tc = tcase_create("concurrentSkipList"); + tcase_set_timeout(tc, 0); // disable timeouts + + /* Sub tests are added, one per line, here */ + tcase_add_test(tc, concurrentSkipList_smokeTest); + tcase_add_test(tc, concurrentSkipList_concurrentTest); + tcase_add_test(tc, concurrentSkipList_concurrentRandom); + + /* --------------------------------------------- */ + + tcase_add_checked_fixture(tc, setup, teardown); + + + suite_add_tcase(s, tc); + return s; +} + +#include "../check_setup.h" + + diff --git a/test/stasis/check_hazard.c b/test/stasis/check_hazard.c new file mode 100644 index 0000000..773ae5a --- /dev/null +++ b/test/stasis/check_hazard.c @@ -0,0 +1,166 @@ +/* + * check_hazard.c + * + * Created on: Feb 7, 2012 + * Author: sears + */ +/*--- +This software is copyrighted by the Regents of the University of +California, and other parties. The following terms apply to all files +associated with the software unless explicitly disclaimed in +individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, +provided that existing copyright notices are retained in all copies +and that this notice is included verbatim in any distributions. No +written agreement, license, or royalty fee is required for any of the +authorized uses. Modifications to this software may be copyrighted by +their authors and need not follow the licensing terms described here, +provided that the new terms are clearly indicated on the first page of +each file where they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND +NON-INFRINGEMENT. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, AND +THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" in +the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you are +acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. +---*/ + +#include "../check_includes.h" + +int hazard_finalize(void*p, void* ignored) { + free(p); + return 1; +} + +#include +#include +#include + +#include +#include +#include + +#define LOG_NAME "check_lhtable.log" + +/** + @test +*/ +START_TEST(hazard_smokeTest) { + hazard_t * h = hazard_init(2, 2, 10, hazard_finalize, 0); + char * a = malloc(1); + char * b = malloc(1); + *a = 0; + *b = 1; + char * ap = hazard_ref(h, 0, (hazard_ptr*)&a); + char * bp = hazard_ref(h, 1, (hazard_ptr*)&b); + hazard_free(h, ap); + hazard_free(h, bp); + hazard_scan(h,0); + assert(*ap == 0); + assert(*bp == 1); + hazard_release(h, 0); + hazard_release(h, 1); + hazard_scan(h,0); + hazard_deinit(h); +} END_TEST +/** + @test +*/ +#define NUM_OPS 1000 +#define NUM_THREADS 1000 +#define NUM_SLOTS 2000 +hazard_ptr* slots; +pthread_mutex_t* muts; +void * hazard_worker(void * hp) { + hazard_t * h = hp; + for(int i = 0; i < NUM_OPS; i++) { + int ptr_off = (int)stasis_util_random64(NUM_SLOTS); + void * p = hazard_ref(h, 0, &slots[ptr_off]); + if(p != NULL) { + assert(*(int*)p == ptr_off); + } + hazard_release(h, 0); + pthread_mutex_lock(&muts[ptr_off]); + if(slots[ptr_off] != 0) { + void* freeme = (void*)slots[ptr_off]; + slots[ptr_off] = 0; + hazard_free(h, freeme); + } + pthread_mutex_unlock(&muts[ptr_off]); + } + return 0; +} +START_TEST(hazard_loadTest) { + hazard_t * h = hazard_init(1, 1, 2, hazard_finalize, 0); + slots = malloc(sizeof(hazard_ptr) * NUM_SLOTS); + muts = malloc(sizeof(pthread_mutex_t) * NUM_SLOTS); + for(int i = 0; i < NUM_SLOTS; i++) { + slots[i] = (hazard_ptr) malloc(sizeof(int)); + *(int*)slots[i] = i; + pthread_mutex_init(&muts[i],0); + } + pthread_t * threads = malloc(sizeof(pthread_t) * NUM_THREADS); + for(int i = 0; i < NUM_THREADS; i++) { + pthread_create(&threads[i], 0, hazard_worker, h); + } + for(int i = 0; i < NUM_THREADS; i++) { + pthread_join(threads[i], 0); + } + for(int i = 0; i < NUM_SLOTS; i++) { + void * p = hazard_ref(h, 0, &slots[i]); + if(p != NULL) { + printf("found slot\n"); + slots[i] = 0; + hazard_free(h, p); + } + } + hazard_release(h, 0); + free(threads); + free(slots); + free(muts); + hazard_deinit(h); +} END_TEST + +Suite * check_suite(void) { + Suite *s = suite_create("hazard"); + /* Begin a new test */ + TCase *tc = tcase_create("hazard"); + tcase_set_timeout(tc, 0); // disable timeouts + + /* Sub tests are added, one per line, here */ + tcase_add_test(tc, hazard_smokeTest); + tcase_add_test(tc, hazard_loadTest); + + /* --------------------------------------------- */ + + tcase_add_checked_fixture(tc, setup, teardown); + + + suite_add_tcase(s, tc); + return s; +} + +#include "../check_setup.h" + + diff --git a/test/stasis/check_io.c b/test/stasis/check_io.c index bde5a7a..5f3ce0e 100644 --- a/test/stasis/check_io.c +++ b/test/stasis/check_io.c @@ -367,6 +367,53 @@ START_TEST(io_raid1pfileTest) { remove(A); remove(B); +} END_TEST +START_TEST(io_raid0pfileTest) { + printf("io_raid0pfileTest\n"); fflush(stdout); + uint32_t stripe_size = PAGE_SIZE; + const char * A = "vol1.txt"; + const char * B = "vol2.txt"; + + remove(A); + remove(B); + + + stasis_handle_t *h; + stasis_handle_t *hp[2]; + + hp[0] = stasis_handle(open_pfile)(A, O_CREAT | O_RDWR, FILE_PERM); + hp[1] = stasis_handle(open_pfile)(B, O_CREAT | O_RDWR, FILE_PERM); + h = stasis_handle_open_raid0(2, hp, stripe_size); + + // h = stasis_handle(open_debug)(h); + handle_smoketest(h); + h->close(h); + + remove(A); + remove(B); + + hp[0] = stasis_handle(open_pfile)(A, O_CREAT | O_RDWR, FILE_PERM); + hp[1] = stasis_handle(open_pfile)(B, O_CREAT | O_RDWR, FILE_PERM); + h = stasis_handle_open_raid0(2, hp, stripe_size); + + // h = stasis_handle(open_debug)(h); + handle_sequentialtest(h); + h->close(h); + + remove(A); + remove(B); + + hp[0] = stasis_handle(open_pfile)(A, O_CREAT | O_RDWR, FILE_PERM); + hp[1] = stasis_handle(open_pfile)(B, O_CREAT | O_RDWR, FILE_PERM); + h = stasis_handle_open_raid0(2, hp, stripe_size); + + // h = stasis_handle(open_debug)(h); + handle_concurrencytest(h); + h->close(h); + + remove(A); + remove(B); + } END_TEST /* static stasis_handle_t * fast_factory(lsn_t off, lsn_t len, void * ignored) { @@ -493,6 +540,7 @@ Suite * check_suite(void) { tcase_add_test(tc, io_fileTest); tcase_add_test(tc, io_pfileTest); tcase_add_test(tc, io_raid1pfileTest); + tcase_add_test(tc, io_raid0pfileTest); //tcase_add_test(tc, io_nonBlockingTest_file); //tcase_add_test(tc, io_nonBlockingTest_pfile); /* --------------------------------------------- */ diff --git a/test/stasis/check_kiss.c b/test/stasis/check_kiss.c new file mode 100644 index 0000000..670adeb --- /dev/null +++ b/test/stasis/check_kiss.c @@ -0,0 +1,409 @@ +/* + * check_kiss.c + * + * Created on: Feb 8, 2012 + * Author: sears + */ +/*--- +This software is copyrighted by the Regents of the University of +California, and other parties. The following terms apply to all files +associated with the software unless explicitly disclaimed in +individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, +provided that existing copyright notices are retained in all copies +and that this notice is included verbatim in any distributions. No +written agreement, license, or royalty fee is required for any of the +authorized uses. Modifications to this software may be copyrighted by +their authors and need not follow the licensing terms described here, +provided that the new terms are clearly indicated on the first page of +each file where they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND +NON-INFRINGEMENT. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, AND +THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" in +the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you are +acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. +---*/ + +#include "../check_includes.h" + +#include +#include + +#include +#include +#include + +#define LOG_NAME "check_lhtable.log" + +/** + @test +*/ +START_TEST(kiss_smokeTest) { + int i; uint32_t k; + kiss_table_t kiss; + stasis_util_random_kiss_settable(&kiss, 12345,65435,34221,12345,9983651,95746118); + + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_LFIB4(&kiss);} printf("%u\n", k-1064612766U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_SWB (&kiss);} printf("%u\n", k- 627749721U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_KISS (&kiss);} printf("%u\n", k-1372460312U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_CONG (&kiss);} printf("%u\n", k-1529210297U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_SHR3 (&kiss);} printf("%u\n", k-2642725982U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_MWC (&kiss);} printf("%u\n", k- 904977562U); + for(i=1;i<1000001;i++){k=stasis_util_random_kiss_FIB (&kiss);} printf("%u\n", k-3519793928U); +} END_TEST + +Suite * check_suite(void) { + Suite *s = suite_create("hazard"); + /* Begin a new test */ + TCase *tc = tcase_create("hazard"); + tcase_set_timeout(tc, 0); // disable timeouts + + /* Sub tests are added, one per line, here */ + // XXX this test might be flawed. + tcase_add_test(tc, kiss_smokeTest); + + /* --------------------------------------------- */ + + tcase_add_checked_fixture(tc, setup, teardown); + + + suite_add_tcase(s, tc); + return s; +} + +#include "../check_setup.h" + +#if 0 +/* RCS: Note, the unmodified code no longer works. It assumes longs are 32-bit + * Change the UL macro to "unsigned int" to get it to pass tests. + */ + +/* +Subject: Random numbers for C: The END? +Date: Wed, 20 Jan 1999 10:55:14 -0500 +From: George Marsaglia +Message-ID: <36A5FC62.17C9CC33@stat.fsu.edu> +Newsgroups: sci.stat.math,sci.math +Lines: 301 + +My offer of RNG's for C was an invitation to dance; +I did not expect the Tarantella. I hope this post will +stop the music, or at least slow it to a stately dance +for language chauvinists and software police---under +a different heading. + +In response to a number of requests for good RNG's in +C, and mindful of the desirability of having a variety +of methods readily available, I offered several. They +were implemented as in-line functions using the #define +feature of C. + +Numerous responses have led to improvements; the result +is the listing below, with comments describing the +generators. + +I thank all the experts who contributed suggestions, either +directly to me or as part of the numerous threads. + +It seems necessary to use a (circular) table in order +to get extremely long periods for some RNG's. Each new +number is some combination of the previous r numbers, kept +in the circular table. The circular table has to keep +at least the last r, but possible more than r, numbers. + +For speed, an 8-bit index seems best for accessing +members of the table---at least for Fortran, where an +8-bit integer is readily available via integer*1, and +arithmetic on the index is automatically mod 256 +(least-absolute-residue). + +Having little experience with C, I got out my little +(but BIG) Kernighan and Ritchie book to see if there +were an 8-bit integer type. I found none, but I did +find char and unsigned char: one byte. Furthemore, K&R +said arithmetic on characters was ok. That, and a study +of the #define examples, led me to propose #define's +for in-line generators LFIB4 and SWB, with monster +periods. But it turned out that char arithmetic jumps +"out of character", other than for simple cases such as +c++ or c+=1. So, for safety, the index arithmetic +below is kept in character by the UC definition. + +Another improvement on the original version takes +advantage of the comma operator, which, to my chagrin, +I had not seen in K&R. It is there, but only with an +example of (expression,expression). From the advice of +contributors, I found that the comma operator allows +(expression,...,expression,expression) with the +last expression determining the value. That makes it +much easier to create in-line functions via #define +(see SHR3, LFIB4, SWB and FIB below). + +The improved #define's are listed below, with a +function to initialize the table and a main program +that calls each of the in-line functions one million +times and then compares the result to what I got with +a DOS version of gcc. That main program can serve +as a test to see if your system produces the same +results as mine. + _________________________________________ + |If you run the program below, your output| + | should be seven lines, each a 0 (zero).| + ----------------------------------------- + +Some readers of the threads are not much interested +in the philosophical aspects of computer languages, +but want to know: what is the use of this stuff? +Here are simple examples of the use of the in-line +functions: Include the #define's in your program, with +the accompanying static variable declarations, and a +procedure, such as the example, for initializing +the static variable (seeds) and the table. + +Then any one of those in-line functions, inserted +in a C expression, will provide a random 32-bit +integer, or a random float if UNI or VNI is used. +For example, KISS&255; would provide a random byte, +while 5.+2.*UNI; would provide a random real (float) +from 5 to 7. Or 1+MWC%10; would provide the +proverbial "take a number from 1 to 10", +(but with not quite, but virtually, equal + probabilities). +More generally, something such as 1+KISS%n; would +provide a practical uniform random choice from 1 to n, +if n is not too big. + +A key point is: a wide variety of very fast, high- +quality, easy-to-use RNG's are available by means of +the nine in-line functions below, used individually or +in combination. + +The comments after the main test program describe the +generators. These descriptions are much as in the first +post, for those who missed them. Some of the +generators (KISS, MWC, LFIB4) seem to pass all tests of +randomness, particularly the DIEHARD battery of tests, +and combining virtually any two or more of them should +provide fast, reliable, long period generators. (CONG +or FIB alone and CONG+FIB are suspect, but quite useful +in combinations.) + +Serious users of random numbers may want to +run their simulations with several different +generators, to see if they get consistent results. +These #define's may make it easy to do. +Bonne chance, +George Marsaglia + +The C code follows---------------------------------: +*/ +#include +#define znew (z=36969*(z&65535)+(z>>16)) +#define wnew (w=18000*(w&65535)+(w>>16)) +#define MWC ((znew<<16)+wnew ) +#define SHR3 (jsr^=(jsr<<17), jsr^=(jsr>>13), jsr^=(jsr<<5)) +#define CONG (jcong=69069*jcong+1234567) +#define FIB ((b=a+b),(a=b-a)) +#define KISS ((MWC^CONG)+SHR3) +#define LFIB4 (c++,t[c]=t[c]+t[UC(c+58)]+t[UC(c+119)]+t[UC(c+178)]) +#define SWB (c++,bro=(x2^7700 and is highly + recommended. + Subtract-with-borrow has the same local behaviour + as lagged Fibonacci using +,-,xor---the borrow + merely provides a much longer period. + SWB fails the birthday spacings test, as do all + lagged Fibonacci and other generators that merely + combine two previous values by means of =,- or xor. + Those failures are for a particular case: m=512 + birthdays in a year of n=2^24 days. There are + choices of m and n for which lags >1000 will also + fail the test. A reasonable precaution is to always + combine a 2-lag Fibonacci or SWB generator with + another kind of generator, unless the generator uses + *, for which a very satisfactory sequence of odd + 32-bit integers results. + + The classical Fibonacci sequence mod 2^32 from FIB + fails several tests. It is not suitable for use by + itself, but is quite suitable for combining with + other generators. + + The last half of the bits of CONG are too regular, + and it fails tests for which those bits play a + significant role. CONG+FIB will also have too much + regularity in trailing bits, as each does. But keep + in mind that it is a rare application for which + the trailing bits play a significant role. CONG + is one of the most widely used generators of the + last 30 years, as it was the system generator for + VAX and was incorporated in several popular + software packages, all seemingly without complaint. + + Finally, because many simulations call for uniform + random variables in 0