Re-worked code to use async_nif among other things, more to come.

This commit is contained in:
Gregory Burd 2013-05-19 00:19:01 -04:00
parent 2945d0096f
commit f2c5ff30e7
24 changed files with 4452 additions and 1483 deletions

View file

@ -5,9 +5,9 @@ MODULE = emdb
DIALYZER = dialyzer
REBAR = rebar
.PHONY: build clean
.PHONY: compile clean
all: ebin priv build
all: ebin priv compile
ebin:
@mkdir -p $@
@ -15,10 +15,51 @@ ebin:
priv:
@mkdir -p $@
build:
compile:
@$(REBAR) compile
clean:
@$(REBAR) clean
@rm -f *~ */*~ erl_crash.dump
@rm -rf ebin priv
xref:
@$(REBAR) xref skip_deps=true
test: eunit
eunit: compile-for-eunit
@$(REBAR) eunit skip_deps=true
eqc: compile-for-eqc
@$(REBAR) eqc skip_deps=true
proper: compile-for-proper
@echo "rebar does not implement a 'proper' command" && false
triq: compile-for-triq
@$(REBAR) triq skip_deps=true
compile-for-eunit:
@$(REBAR) compile eunit compile_only=true
compile-for-eqc:
@$(REBAR) -D QC -D QC_EQC compile eqc compile_only=true
compile-for-eqcmini:
@$(REBAR) -D QC -D QC_EQCMINI compile eqc compile_only=true
compile-for-proper:
@$(REBAR) -D QC -D QC_PROPER compile eqc compile_only=true
compile-for-triq:
@$(REBAR) -D QC -D QC_TRIQ compile triq compile_only=true
plt: compile
@$(DIALYZER) --build_plt --output_plt .$(TARGET).plt -pa deps/lager/ebin --apps kernel stdlib
analyze: compile
@$(DIALYZER) --plt .$(TARGET).plt -pa deps/lager/ebin ebin
repl:
@$(ERL) -pa ebin -pz deps/lager/ebin

105
README.md
View file

@ -1,4 +1,4 @@
EMDB ==== EMDB is a NIF library for the [Memory-Mapped Database](http://highlandsun.com/hyc/mdb/) database, aka. MDB. The main purpose of this package is to provide a **very fast** Riak [backend](http://wiki.basho.com/Storage-Backends.html).
EMDB ==== EMDB is a NIF library for the [Memory-Mapped Database](http://highlandsun.com/hyc/mdb/) database, aka. MDB. The main purpose of this package is to provide a **very fast** Riak [backend](http://wiki.basho.com/Storage-Backends.html).
But this module could also be used as a general key-value store to replace:
@ -26,65 +26,86 @@ But this module could also be used as a general key-value store to replace:
* `drop/1`: deletes all key-value pairs in the database.
Usage ----- $ make
Usage
-----
$ make
$ ./start.sh
%% create a new database
1> {ok, Handle} = emdb:open("/tmp/emdb1").
%% create a new database 1> {ok, Handle} = emdb:open("/tmp/emdb1").
%% insert the key <<"a">> with value <<"1">>
2> ok = emdb:put(Handle, <<"a">>, <<"1">>).
%% insert the key <<"a">> with value <<"1">> 2> ok = Handle:put(<<"a">>, <<"1">>).
%% try to re-insert the same key <<"a">>
3> key_exist = emdb:put(Handle, <<"a">>, <<"2">>).
%% try to re-insert the same key <<"a">> 3> key_exist = Handle:put(<<"a">>, <<"2">>).
%% add a new key-value pair
4> ok = emdb:put(Handle, <<"b">>, <<"2">>).
%% add a new key-value pair 4> ok = Handle:put(<<"b">>, <<"2">>).
%% search a non-existing key <<"c">>
5> none = emdb:get(Handle, <<"c">>).
%% search a non-existing key <<"c">> 5> none = Handle:get(<<"c">>).
%% retrieve the value for key <<"b">>
6> {ok, <<"2">>} = emdb:get(Handle, <<"b">>).
%% retrieve the value for key <<"b">> 6> {ok, <<"2">>} = Handle:get(<<"b">>).
%% retrieve the value for key <<"a">>
7> {ok, <<"1">>} = emdb:get(Handle, <<"a">>).
%% retrieve the value for key <<"a">> 7> {ok, <<"1">>} = Handle:get(<<"a">>).
%% delete key <<"b">> 8> ok = Handle:del(<<"b">>).
%% delete key <<"b">>
8> ok = emdb:del(Handle, <<"b">>).
%% search a non-existing key <<"b">>
9> none = Handle:get(<<"b">>).
9> none = emdb:get(Handle, <<"b">>).
%% delete a non-existing key <<"z">> 10> none = Handle:del(<<"z">>).
%% delete a non-existing key <<"z">>
10> none = emdb:del(Handle, <<"z">>).
%% ensure key <<"a">>'s value is still <<"1">>
11> {ok, <<"1">>} = emdb:get(Handle, <<"a">>).
%% ensure key <<"a">>'s value is still <<"1">> 11> {ok, <<"1">>} = Handle:get(<<"a">>).
%% update the value for key <<"a">>
12> ok = Handle:update(<<"a">>, <<"7">>).
12> ok = emdb:update(Handle, <<"a">>, <<"7">>).
%% check the new value for key <<"a">>
13> {ok, <<"7">>} = Handle:get(<<"a">>).
13> {ok, <<"7">>} = emdb:get(Handle, <<"a">>).
%% delete all key-value pairs in the database 14> ok = Handle:drop().
%% delete all key-value pairs in the database
14> ok = emdb:drop(Handle).
%% try to retrieve key <<"a">> value 15> none = Handle:get(<<"a">>).
%% try to retrieve key <<"a">> value
15> none = emdb:get(Handle, <<"a">>).
%% close the database 16> ok = Handle:close().
%% close the database
16> ok = emdb:close(Handle).
...
17> q().
####Note:
17> q().
#### Note:
The code below creates a new database with **80GB** MapSize, **avoid fsync**
after each commit (for max speed) and use the experimental **MDB_FIXEDMAP**. {ok, Handle} = emdb:open("/tmp/emdb2", 85899345920, ?MDB_NOSYNC bor ?MDB_FIXEDMAP).
Performance ----------- For maximum speed, this library use only binaries for both keys and values.
See the impressive [microbench](http://highlandsun.com/hyc/mdb/microbench/) against:
after each commit (for max speed) and use the experimental **MDB_FIXEDMAP**.
* Google's LevelDB
* SQLite
{ok, Handle} = emdb:open("/tmp/emdb2", 85899345920, ?MDB_NOSYNC bor ?MDB_FIXEDMAP).
Performance
-----------
For maximum speed, this library use only binaries for both keys and values.
See the impressive [microbench](http://highlandsun.com/hyc/mdb/microbench/) against:
* Google's LevelDB (which is slower and can stall unlike Basho's fork of LevelDB)
* SQLite3
* Kyoto TreeDB
* BerkeleyDB
* BerkeleyDB 5.x
MDB performs better on 64-bit arch.
Supported OSes --------------
Supported Operating Systems
--------------
Should work on 32/64-bit architectures:
@ -93,14 +114,24 @@ Should work on 32/64-bit architectures:
* FreeBSD
* Windows
TODO ----
TODO
----
* Unit tests * PropEr testing
* Fold over keys and/or values
* Unit tests
* PropEr testing
* Bulk "writing"
* basho_bench driver
* EQC, PULSE testing
* Key expirey
* Atomic group commit (for 2i)
Volunteers are always welcome! Status
Volunteers are always welcome!
Status
------
#### Work in progress. Don't use it in production!
LICENSE -------
EMDB is Copyright (C) 2012 by Aleph Archives, and released under the [OpenLDAP](http://www.OpenLDAP.org/license.html) License.
LICENSE
-------
EMDB is Copyright (C) 2012-2013 by Aleph Archives and Basho Technologies, Inc., and released under the [OpenLDAP](http://www.OpenLDAP.org/license.html) License.

533
c_src/async_nif.h Normal file
View file

@ -0,0 +1,533 @@
/*
* async_nif: An async thread-pool layer for Erlang's NIF API
*
* Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
* Author: Gregory Burd <greg@basho.com> <greg@burd.me>
*
* This file is provided to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef __ASYNC_NIF_H__
#define __ASYNC_NIF_H__
#if defined(__cplusplus)
extern "C" {
#endif
#include <assert.h>
#include "fifo_q.h"
#include "stats.h"
#ifndef __UNUSED
#define __UNUSED(v) ((void)(v))
#endif
#define ASYNC_NIF_MAX_WORKERS 128
#define ASYNC_NIF_WORKER_QUEUE_SIZE 500
#define ASYNC_NIF_MAX_QUEUED_REQS 1000 * ASYNC_NIF_MAX_WORKERS
STAT_DECL(qwait, 1000);
struct async_nif_req_entry {
ERL_NIF_TERM ref;
ErlNifEnv *env;
ErlNifPid pid;
void *args;
void (*fn_work)(ErlNifEnv*, ERL_NIF_TERM, ErlNifPid*, unsigned int, void *);
void (*fn_post)(void *);
};
DECL_FIFO_QUEUE(reqs, struct async_nif_req_entry);
struct async_nif_work_queue {
STAT_DEF(qwait);
ErlNifMutex *reqs_mutex;
ErlNifCond *reqs_cnd;
FIFO_QUEUE_TYPE(reqs) reqs;
};
struct async_nif_worker_entry {
ErlNifTid tid;
unsigned int worker_id;
struct async_nif_state *async_nif;
struct async_nif_work_queue *q;
};
struct async_nif_state {
STAT_DEF(qwait);
unsigned int shutdown;
unsigned int num_workers;
struct async_nif_worker_entry worker_entries[ASYNC_NIF_MAX_WORKERS];
unsigned int num_queues;
unsigned int next_q;
FIFO_QUEUE_TYPE(reqs) recycled_reqs;
unsigned int num_reqs;
ErlNifMutex *recycled_req_mutex;
struct async_nif_work_queue queues[];
};
#define ASYNC_NIF_DECL(decl, frame, pre_block, work_block, post_block) \
struct decl ## _args frame; \
static void fn_work_ ## decl (ErlNifEnv *env, ERL_NIF_TERM ref, ErlNifPid *pid, unsigned int worker_id, struct decl ## _args *args) { \
__UNUSED(worker_id); \
do work_block while(0); \
} \
static void fn_post_ ## decl (struct decl ## _args *args) { \
__UNUSED(args); \
do post_block while(0); \
} \
static ERL_NIF_TERM decl(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv_in[]) { \
struct decl ## _args on_stack_args; \
struct decl ## _args *args = &on_stack_args; \
struct decl ## _args *copy_of_args; \
struct async_nif_req_entry *req = NULL; \
const char *affinity = NULL; \
ErlNifEnv *new_env = NULL; \
/* argv[0] is a ref used for selective recv */ \
const ERL_NIF_TERM *argv = argv_in + 1; \
argc -= 1; \
/* Note: !!! this assumes that the first element of priv_data is ours */ \
struct async_nif_state *async_nif = *(struct async_nif_state**)enif_priv_data(env); \
if (async_nif->shutdown) \
return enif_make_tuple2(env, enif_make_atom(env, "error"), \
enif_make_atom(env, "shutdown")); \
req = async_nif_reuse_req(async_nif); \
new_env = req->env; \
if (!req) \
return enif_make_tuple2(env, enif_make_atom(env, "error"), \
enif_make_atom(env, "eagain")); \
do pre_block while(0); \
copy_of_args = (struct decl ## _args *)enif_alloc(sizeof(struct decl ## _args)); \
if (!copy_of_args) { \
fn_post_ ## decl (args); \
return enif_make_tuple2(env, enif_make_atom(env, "error"), \
enif_make_atom(env, "enomem")); \
} \
memcpy(copy_of_args, args, sizeof(struct decl ## _args)); \
req->ref = enif_make_copy(new_env, argv_in[0]); \
enif_self(env, &req->pid); \
req->args = (void*)copy_of_args; \
req->fn_work = (void (*)(ErlNifEnv *, ERL_NIF_TERM, ErlNifPid*, unsigned int, void *))fn_work_ ## decl ; \
req->fn_post = (void (*)(void *))fn_post_ ## decl; \
int h = -1; \
if (affinity) \
h = async_nif_str_hash_func(affinity) % async_nif->num_queues; \
ERL_NIF_TERM reply = async_nif_enqueue_req(async_nif, req, h); \
if (!reply) { \
fn_post_ ## decl (args); \
enif_free(copy_of_args); \
return enif_make_tuple2(env, enif_make_atom(env, "error"), \
enif_make_atom(env, "shutdown")); \
} \
return reply; \
}
#define ASYNC_NIF_INIT(name) \
static ErlNifMutex *name##_async_nif_coord = NULL;
#define ASYNC_NIF_LOAD(name, priv) do { \
if (!name##_async_nif_coord) \
name##_async_nif_coord = enif_mutex_create(NULL); \
enif_mutex_lock(name##_async_nif_coord); \
priv = async_nif_load(); \
enif_mutex_unlock(name##_async_nif_coord); \
} while(0);
#define ASYNC_NIF_UNLOAD(name, env, priv) do { \
if (!name##_async_nif_coord) \
name##_async_nif_coord = enif_mutex_create(NULL); \
enif_mutex_lock(name##_async_nif_coord); \
async_nif_unload(env, priv); \
enif_mutex_unlock(name##_async_nif_coord); \
enif_mutex_destroy(name##_async_nif_coord); \
name##_async_nif_coord = NULL; \
} while(0);
#define ASYNC_NIF_UPGRADE(name, env) do { \
if (!name##_async_nif_coord) \
name##_async_nif_coord = enif_mutex_create(NULL); \
enif_mutex_lock(name##_async_nif_coord); \
async_nif_upgrade(env); \
enif_mutex_unlock(name##_async_nif_coord); \
} while(0);
#define ASYNC_NIF_RETURN_BADARG() do { \
async_nif_recycle_req(req, async_nif); \
return enif_make_badarg(env); \
} while(0);
#define ASYNC_NIF_WORK_ENV new_env
#define ASYNC_NIF_REPLY(msg) enif_send(NULL, pid, env, enif_make_tuple2(env, ref, msg))
/**
* Return a request structure from the recycled req queue if one exists,
* otherwise create one.
*/
struct async_nif_req_entry *
async_nif_reuse_req(struct async_nif_state *async_nif)
{
struct async_nif_req_entry *req = NULL;
ErlNifEnv *env = NULL;
enif_mutex_lock(async_nif->recycled_req_mutex);
if (fifo_q_empty(reqs, async_nif->recycled_reqs)) {
if (async_nif->num_reqs < ASYNC_NIF_MAX_QUEUED_REQS) {
req = enif_alloc(sizeof(struct async_nif_req_entry));
if (req) {
memset(req, 0, sizeof(struct async_nif_req_entry));
env = enif_alloc_env();
if (!env) {
enif_free(req);
req = NULL;
} else {
req->env = env;
async_nif->num_reqs++;
}
}
}
} else {
req = fifo_q_get(reqs, async_nif->recycled_reqs);
}
enif_mutex_unlock(async_nif->recycled_req_mutex);
STAT_TICK(async_nif, qwait);
return req;
}
/**
* Store the request for future re-use.
*
* req a request entry with an ErlNifEnv* which will be cleared
* before reuse, but not until then.
* async_nif a handle to our state so that we can find and use the mutex
*/
void
async_nif_recycle_req(struct async_nif_req_entry *req, struct async_nif_state *async_nif)
{
STAT_TOCK(async_nif, qwait);
enif_mutex_lock(async_nif->recycled_req_mutex);
fifo_q_put(reqs, async_nif->recycled_reqs, req);
enif_mutex_unlock(async_nif->recycled_req_mutex);
}
/**
* A string hash function.
*
* A basic hash function for strings of characters used during the
* affinity association.
*
* s a NULL terminated set of bytes to be hashed
* -> an integer hash encoding of the bytes
*/
static inline unsigned int
async_nif_str_hash_func(const char *s)
{
unsigned int h = (unsigned int)*s;
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (unsigned int)*s;
return h;
}
/**
* Enqueue a request for processing by a worker thread.
*
* Places the request into a work queue determined either by the
* provided affinity or by iterating through the available queues.
*/
static ERL_NIF_TERM
async_nif_enqueue_req(struct async_nif_state* async_nif, struct async_nif_req_entry *req, int hint)
{
/* Identify the most appropriate worker for this request. */
unsigned int qid = 0;
struct async_nif_work_queue *q = NULL;
unsigned int n = async_nif->num_queues;
/* Either we're choosing a queue based on some affinity/hinted value or we
need to select the next queue in the rotation and atomically update that
global value (next_q is shared across worker threads) . */
if (hint >= 0) {
qid = (unsigned int)hint;
} else {
qid = async_nif->next_q;
qid = (qid + 1) % async_nif->num_queues;
async_nif->next_q = qid;
}
/* Now we inspect and interate across the set of queues trying to select one
that isn't too full or too slow. */
do {
q = &async_nif->queues[qid];
enif_mutex_lock(q->reqs_mutex);
/* Now that we hold the lock, check for shutdown. As long as we hold
this lock either a) we're shutting down so exit now or b) this queue
will be valid until we release the lock. */
if (async_nif->shutdown) {
enif_mutex_unlock(q->reqs_mutex);
return 0;
}
double await = STAT_MEAN_LOG2_SAMPLE(async_nif, qwait);
double await_inthisq = STAT_MEAN_LOG2_SAMPLE(q, qwait);
if (fifo_q_full(reqs, q->reqs) || await_inthisq > await) {
enif_mutex_unlock(q->reqs_mutex);
qid = (qid + 1) % async_nif->num_queues;
q = &async_nif->queues[qid];
} else {
break;
}
// TODO: at some point add in work sheading/stealing
} while(n-- > 0);
/* We hold the queue's lock, and we've seletect a reasonable queue for this
new request so add the request. */
STAT_TICK(q, qwait);
fifo_q_put(reqs, q->reqs, req);
/* Build the term before releasing the lock so as not to race on the use of
the req pointer (which will soon become invalid in another thread
performing the request). */
ERL_NIF_TERM reply = enif_make_tuple2(req->env, enif_make_atom(req->env, "ok"),
enif_make_atom(req->env, "enqueued"));
enif_mutex_unlock(q->reqs_mutex);
enif_cond_signal(q->reqs_cnd);
return reply;
}
/**
* TODO:
*/
static void *
async_nif_worker_fn(void *arg)
{
struct async_nif_worker_entry *we = (struct async_nif_worker_entry *)arg;
unsigned int worker_id = we->worker_id;
struct async_nif_state *async_nif = we->async_nif;
struct async_nif_work_queue *q = we->q;
struct async_nif_req_entry *req = NULL;
for(;;) {
/* Examine the request queue, are there things to be done? */
enif_mutex_lock(q->reqs_mutex);
check_again_for_work:
if (async_nif->shutdown) {
enif_mutex_unlock(q->reqs_mutex);
break;
}
if (fifo_q_empty(reqs, q->reqs)) {
/* Queue is empty so we wait for more work to arrive. */
STAT_RESET(q, qwait);
enif_cond_wait(q->reqs_cnd, q->reqs_mutex);
goto check_again_for_work;
} else {
assert(fifo_q_size(reqs, q->reqs) > 0);
assert(fifo_q_size(reqs, q->reqs) < fifo_q_capacity(reqs, q->reqs));
/* At this point the next req is ours to process and we hold the
reqs_mutex lock. Take the request off the queue. */
req = fifo_q_get(reqs, q->reqs);
enif_mutex_unlock(q->reqs_mutex);
/* Ensure that there is at least one other worker thread watching this
queue. */
enif_cond_signal(q->reqs_cnd);
/* Perform the work. */
req->fn_work(req->env, req->ref, &req->pid, worker_id, req->args);
STAT_TOCK(q, qwait);
/* Now call the post-work cleanup function. */
req->fn_post(req->args);
/* Clean up req for reuse. */
req->ref = 0;
req->fn_work = 0;
req->fn_post = 0;
enif_free(req->args);
req->args = NULL;
enif_clear_env(req->env);
async_nif_recycle_req(req, async_nif);
req = NULL;
}
}
enif_thread_exit(0);
return 0;
}
static void
async_nif_unload(ErlNifEnv *env, struct async_nif_state *async_nif)
{
unsigned int i;
unsigned int num_queues = async_nif->num_queues;
struct async_nif_work_queue *q = NULL;
struct async_nif_req_entry *req = NULL;
__UNUSED(env);
STAT_PRINT(async_nif, qwait, "wterl");
/* Signal the worker threads, stop what you're doing and exit. To
ensure that we don't race with the enqueue() process we first
lock all the worker queues, then set shutdown to true, then
unlock. The enqueue function will take the queue mutex, then
test for shutdown condition, then enqueue only if not shutting
down. */
for (i = 0; i < num_queues; i++) {
q = &async_nif->queues[i];
enif_mutex_lock(q->reqs_mutex);
}
async_nif->shutdown = 1;
for (i = 0; i < num_queues; i++) {
q = &async_nif->queues[i];
enif_cond_broadcast(q->reqs_cnd);
enif_mutex_unlock(q->reqs_mutex);
}
/* Join for the now exiting worker threads. */
for (i = 0; i < async_nif->num_workers; ++i) {
void *exit_value = 0; /* We ignore the thread_join's exit value. */
enif_thread_join(async_nif->worker_entries[i].tid, &exit_value);
}
/* Free req structres sitting on the recycle queue. */
enif_mutex_lock(async_nif->recycled_req_mutex);
req = NULL;
fifo_q_foreach(reqs, async_nif->recycled_reqs, req, {
enif_free_env(req->env);
enif_free(req);
});
fifo_q_free(reqs, async_nif->recycled_reqs);
/* Cleanup in-flight requests, mutexes and conditions in each work queue. */
for (i = 0; i < num_queues; i++) {
q = &async_nif->queues[i];
/* Worker threads are stopped, now toss anything left in the queue. */
req = NULL;
fifo_q_foreach(reqs, q->reqs, req, {
enif_clear_env(req->env);
enif_send(NULL, &req->pid, req->env,
enif_make_tuple2(req->env, enif_make_atom(req->env, "error"),
enif_make_atom(req->env, "shutdown")));
req->fn_post(req->args);
enif_free_env(req->env);
enif_free(req->args);
enif_free(req);
});
fifo_q_free(reqs, q->reqs);
enif_mutex_destroy(q->reqs_mutex);
enif_cond_destroy(q->reqs_cnd);
}
enif_mutex_unlock(async_nif->recycled_req_mutex);
enif_mutex_destroy(async_nif->recycled_req_mutex);
memset(async_nif, 0, sizeof(struct async_nif_state) + (sizeof(struct async_nif_work_queue) * async_nif->num_queues));
enif_free(async_nif);
}
static void *
async_nif_load()
{
static int has_init = 0;
unsigned int i, j, num_queues;
ErlNifSysInfo info;
struct async_nif_state *async_nif;
/* Don't init more than once. */
if (has_init) return 0;
else has_init = 1;
/* Find out how many schedulers there are. */
enif_system_info(&info, sizeof(ErlNifSysInfo));
/* Size the number of work queues according to schedulers. */
if (info.scheduler_threads > ASYNC_NIF_MAX_WORKERS / 2) {
num_queues = ASYNC_NIF_MAX_WORKERS / 2;
} else {
int remainder = ASYNC_NIF_MAX_WORKERS % info.scheduler_threads;
if (remainder != 0)
num_queues = info.scheduler_threads - remainder;
else
num_queues = info.scheduler_threads;
if (num_queues < 2)
num_queues = 2;
}
/* Init our portion of priv_data's module-specific state. */
async_nif = enif_alloc(sizeof(struct async_nif_state) +
sizeof(struct async_nif_work_queue) * num_queues);
if (!async_nif)
return NULL;
memset(async_nif, 0, sizeof(struct async_nif_state) +
sizeof(struct async_nif_work_queue) * num_queues);
async_nif->num_queues = num_queues;
async_nif->num_workers = 2 * num_queues;
async_nif->next_q = 0;
async_nif->shutdown = 0;
async_nif->recycled_reqs = fifo_q_new(reqs, ASYNC_NIF_MAX_QUEUED_REQS);
async_nif->recycled_req_mutex = enif_mutex_create(NULL);
STAT_INIT(async_nif, qwait);
for (i = 0; i < async_nif->num_queues; i++) {
struct async_nif_work_queue *q = &async_nif->queues[i];
q->reqs = fifo_q_new(reqs, ASYNC_NIF_WORKER_QUEUE_SIZE);
q->reqs_mutex = enif_mutex_create(NULL);
q->reqs_cnd = enif_cond_create(NULL);
STAT_INIT(q, qwait);
}
/* Setup the thread pool management. */
memset(async_nif->worker_entries, 0, sizeof(struct async_nif_worker_entry) * ASYNC_NIF_MAX_WORKERS);
/* Start the worker threads. */
for (i = 0; i < async_nif->num_workers; i++) {
struct async_nif_worker_entry *we = &async_nif->worker_entries[i];
we->async_nif = async_nif;
we->worker_id = i;
we->q = &async_nif->queues[i % async_nif->num_queues];
if (enif_thread_create(NULL, &async_nif->worker_entries[i].tid,
&async_nif_worker_fn, (void*)we, NULL) != 0) {
async_nif->shutdown = 1;
for (j = 0; j < async_nif->num_queues; j++) {
struct async_nif_work_queue *q = &async_nif->queues[j];
enif_cond_broadcast(q->reqs_cnd);
}
while(i-- > 0) {
void *exit_value = 0; /* Ignore this. */
enif_thread_join(async_nif->worker_entries[i].tid, &exit_value);
}
for (j = 0; j < async_nif->num_queues; j++) {
struct async_nif_work_queue *q = &async_nif->queues[j];
enif_mutex_destroy(q->reqs_mutex);
enif_cond_destroy(q->reqs_cnd);
}
memset(async_nif->worker_entries, 0, sizeof(struct async_nif_worker_entry) * ASYNC_NIF_MAX_WORKERS);
enif_free(async_nif);
return NULL;
}
}
return async_nif;
}
static void
async_nif_upgrade(ErlNifEnv *env)
{
__UNUSED(env);
// TODO:
}
#if defined(__cplusplus)
}
#endif
#endif // __ASYNC_NIF_H__

61
c_src/common.h Normal file
View file

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
* Author: Gregory Burd <greg@basho.com> <greg@burd.me>
*
* This file is provided to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef __COMMON_H__
#define __COMMON_H__
#if defined(__cplusplus)
extern "C" {
#endif
#ifdef DEBUG
#include <stdio.h>
#include <stdarg.h>
#ifndef DPRINTF
#define DPRINTF(fmt, ...) \
do { \
fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__); \
fflush(stderr); \
} while(0)
#endif
#ifndef DPUTS
#define DPUTS(arg) DPRINTF("%s", arg)
#endif
#else
#define DPRINTF(fmt, ...) ((void) 0)
#define DPUTS(arg) ((void) 0)
#endif
#ifndef COMPQUIET
#define COMPQUIET(n, v) do { \
(n) = (v); \
(n) = (n); \
} while (0)
#endif
#ifndef __UNUSED
#define __UNUSED(v) ((void)(v))
#endif
#if defined(__cplusplus)
}
#endif
#endif // __COMMON_H__

98
c_src/duration.h Normal file
View file

@ -0,0 +1,98 @@
/*
* Copyright (C) 2013, all rights reserved by Gregory Burd <greg@burd.me>
*
* This Source Code Form is subject to the terms of the Mozilla Public License,
* version 2 (MPLv2). If a copy of the MPL was not distributed with this file,
* you can obtain one at: http://mozilla.org/MPL/2.0/
*
* NOTES:
* - on some platforms this will require -lrt
*/
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/timeb.h>
typedef enum { ns = 0, mcs, ms, s } time_scale;
struct scale_time {
const char *abbreviation;
const char *name;
uint64_t mul, div, overhead, ticks_per;
};
static const struct scale_time scale[] = {
{ "ns", "nanosecond", 1000000000LL, 1LL, 10, 2300000000000LL },
{ "mcs", "microsecond", 1000000LL, 1000LL, 10, 2300000000LL },
{ "ms", "millisecond", 1000LL, 1000000LL, 10, 2300000LL },
{ "sec", "second", 1LL, 1000000000LL, 10, 2300LL } };
static uint64_t ts(time_scale unit)
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return (((uint64_t)ts.tv_sec * scale[unit].mul) +
((uint64_t)ts.tv_nsec / scale[unit].div));
}
#if 0
//if defined(__i386__) || defined(__x86_64__)
/**
* cpu_clock_ticks()
*
* A measure provided by Intel x86 CPUs which provides the number of cycles
* (aka "ticks") executed as a counter using the RDTSC instruction.
*/
static inline uint64_t cpu_clock_ticks()
{
uint32_t lo, hi;
__asm__ __volatile__ (
"xorl %%eax, %%eax\n"
"cpuid\n"
"rdtsc\n"
: "=a" (lo), "=d" (hi)
:
: "%ebx", "%ecx" );
return (uint64_t)hi << 32 | lo;
}
/**
* cpu_clock_ticks()
*
* An approximation of elapsed [ns, mcs, ms, s] from CPU clock ticks.
*/
static uint64_t elapsed_cpu_clock_ticks(uint64_t start, time_scale unit)
{
return (cpu_clock_ticks() - start - scale[unit].overhead) * scale[unit].ticks_per;
}
#endif
typedef struct {
uint64_t then;
time_scale unit;
} duration_t;
static inline uint64_t elapsed(duration_t *d)
{
uint64_t now = ts(d->unit);
uint64_t elapsed = now - d->then;
d->then = now;
return elapsed;
}
#define DURATION(name, resolution) duration_t name = \
{ts(resolution), resolution}
#define ELAPSED_DURING(result, resolution, block) \
do { \
DURATION(__x, resolution); \
do block while(0); \
*result = elapsed(&__x); \
} while(0);
#define CYCLES_DURING(result, block) \
do { \
uint64_t __begin = cpu_clock_ticks(); \
do block while(0); \
*result = cpu_clock_ticks() - __begin; \
} while(0);

722
c_src/emdb.c Normal file
View file

@ -0,0 +1,722 @@
/* -------------------------------------------------------------------------
* This file is part of EMDB - Erlang MDB API
*
* Copyright (c) 2012 by Aleph Archives. All rights reserved.
*
* -------------------------------------------------------------------------
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* -------------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/param.h>
#include <erl_nif.h>
#include <erl_driver.h>
#include "common.h"
#include "async_nif.h"
#include "stats.h"
#include "lmdb.h"
STAT_DECL(emdb_get, 1000);
STAT_DECL(emdb_put, 1000);
STAT_DECL(emdb_del, 1000);
STAT_DECL(emdb_upd, 1000);
static ErlNifResourceType *emdb_RESOURCE;
struct emdb {
MDB_env *env;
MDB_dbi dbi;
STAT_DEF(emdb_get);
STAT_DEF(emdb_put);
STAT_DEF(emdb_del);
STAT_DEF(emdb_upd);
};
struct emdb_priv_data {
void *async_nif_priv; // Note: must be first element in struct
};
/* Global init for async_nif. */
ASYNC_NIF_INIT(emdb);
/* Atoms (initialized in on_load) */
static ERL_NIF_TERM ATOM_ERROR;
static ERL_NIF_TERM ATOM_OK;
static ERL_NIF_TERM ATOM_NOT_FOUND;
static ERL_NIF_TERM ATOM_EXISTS;
static ERL_NIF_TERM ATOM_KEYEXIST;
static ERL_NIF_TERM ATOM_NOTFOUND;
static ERL_NIF_TERM ATOM_PAGE_NOTFOUND;
static ERL_NIF_TERM ATOM_CORRUPTED;
static ERL_NIF_TERM ATOM_PANIC;
static ERL_NIF_TERM ATOM_VERSION_MISMATCH;
static ERL_NIF_TERM ATOM_KEYEXIST;
static ERL_NIF_TERM ATOM_MAP_FULL;
static ERL_NIF_TERM ATOM_DBS_FULL;
static ERL_NIF_TERM ATOM_READERS_FULL;
static ERL_NIF_TERM ATOM_TLS_FULL;
static ERL_NIF_TERM ATOM_TXN_FULL;
static ERL_NIF_TERM ATOM_CURSOR_FULL;
static ERL_NIF_TERM ATOM_PAGE_FULL;
static ERL_NIF_TERM ATOM_MAP_RESIZED;
static ERL_NIF_TERM ATOM_INCOMPATIBLE;
static ERL_NIF_TERM ATOM_BAD_RSLOT;
#define CHECK(expr, label) \
if (MDB_SUCCESS != (ret = (expr))) { \
DPRINTF("CHECK(\"%s\") failed \"%s\" at %s:%d in %s()\n", \
#expr, mdb_strerror(ret), __FILE__, __LINE__, __func__);\
err = __strerror_term(env, ret); \
goto label; \
}
#define FAIL_ERR(e, label) \
do { \
err = __strerror_term(env, (e)); \
goto label; \
} while(0)
/**
* Convenience function to generate {error, {errno, Reason}}
*
* env NIF environment
* err number of last error
*/
static ERL_NIF_TERM
__strerror_term(ErlNifEnv* env, int err)
{
ERL_NIF_TERM term;
if (err < MDB_LAST_ERRCODE && err > MDB_KEYEXIST) {
switch (err) {
case MDB_KEYEXIST: /** key/data pair already exists */
term = ATOM_KEYEXIST;
break;
case MDB_NOTFOUND: /** key/data pair not found (EOF) */
term = ATOM_NOTFOUND;
break;
case MDB_PAGE_NOTFOUND: /** Requested page not found - this usually indicates corruption */
term = ATOM_PAGE_NOTFOUND;
break;
case MDB_CORRUPTED: /** Located page was wrong type */
term = ATOM_CORRUPTED;
break;
case MDB_PANIC : /** Update of meta page failed, probably I/O error */
term = ATOM_PANIC;
break;
case MDB_VERSION_MISMATCH: /** Environment version mismatch */
term = ATOM_VERSION_MISMATCH;
break;
case MDB_INVALID: /** File is not a valid MDB file */
term = ATOM_KEYEXIST;
break;
case MDB_MAP_FULL: /** Environment mapsize reached */
term = ATOM_MAP_FULL;
break;
case MDB_DBS_FULL: /** Environment maxdbs reached */
term = ATOM_DBS_FULL;
break;
case MDB_READERS_FULL: /** Environment maxreaders reached */
term = ATOM_READERS_FULL;
break;
case MDB_TLS_FULL: /** Too many TLS keys in use - Windows only */
term = ATOM_TLS_FULL;
break;
case MDB_TXN_FULL: /** Txn has too many dirty pages */
term = ATOM_TXN_FULL;
break;
case MDB_CURSOR_FULL: /** Cursor stack too deep - internal error */
term = ATOM_CURSOR_FULL;
break;
case MDB_PAGE_FULL: /** Page has not enough space - internal error */
term = ATOM_PAGE_FULL;
break;
case MDB_MAP_RESIZED: /** Database contents grew beyond environment mapsize */
term = ATOM_MAP_RESIZED;
break;
case MDB_INCOMPATIBLE: /** Database flags changed or would change */
term = ATOM_INCOMPATIBLE;
break;
case MDB_BAD_RSLOT: /** Invalid reuse of reader locktable slot */
term = ATOM_BAD_RSLOT;
break;
}
} else {
term = enif_make_atom(env, erl_errno_id(err));
}
/* We return the errno value as well as the message here because the error
message provided by strerror() for differ across platforms and/or may be
localized to any given language (i18n). Use the errno atom rather than
the message when matching in Erlang. You've been warned. */
return enif_make_tuple(env, 2, ATOM_ERROR,
enif_make_tuple(env, 2, term,
enif_make_string(env, mdb_strerror(err), ERL_NIF_LATIN1)));
}
/**
* Opens a MDB database.
*
* argv[0] path to directory for the database files
* argv[1] size of database
* argv[2] flags
*/
ASYNC_NIF_DECL(
emdb_open,
{ // struct
char dirname[MAXPATHLEN];
ErlNifUInt64 mapsize;
ErlNifUInt64 envflags;
},
{ // pre
if (!(argc == 3 &&
enif_is_list(env, argv[0]) &&
enif_is_number(env, argv[1]) &&
enif_is_number(env, argv[2]))) {
ASYNC_NIF_RETURN_BADARG();
}
if (enif_get_string(env, argv[0], args->dirname,
MAXPATHLEN, ERL_NIF_LATIN1) <= 0)
ASYNC_NIF_RETURN_BADARG();
enif_get_uint64(env, argv[1], &(args->mapsize));
enif_get_uint64(env, argv[2], &(args->envflags));
},
{ // work
ERL_NIF_TERM err;
MDB_txn *txn;
struct emdb *handle;
int ret;
if ((handle = enif_alloc_resource(emdb_RESOURCE, sizeof(struct emdb))) == NULL)
FAIL_ERR(ENOMEM, err3);
STAT_INIT(handle, emdb_get);
STAT_INIT(handle, emdb_put);
STAT_INIT(handle, emdb_upd);
STAT_INIT(handle, emdb_del);
CHECK(mdb_env_create(&(handle->env)), err2);
if (mdb_env_set_mapsize(handle->env, args->mapsize)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
CHECK(mdb_env_open(handle->env, args->dirname, args->envflags, 0664), err2);
CHECK(mdb_txn_begin(handle->env, NULL, 0, &txn), err2);
CHECK(mdb_open(txn, NULL, 0, &(handle->dbi)), err1);
CHECK(mdb_txn_commit(txn), err1);
ERL_NIF_TERM term = enif_make_resource(env, handle);
enif_release_resource(handle);
ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_OK, term));
return;
err1:
mdb_txn_abort(txn);
err2:
mdb_env_close(handle->env);
err3:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
});
/**
* Closes a MDB database.
*
* argv[0] reference to the MDB handle resource
*/
ASYNC_NIF_DECL(
emdb_close,
{ // struct
struct emdb *handle;
},
{ // pre
if (!(argc == 1 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle))) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
enif_keep_resource((void*)args->handle);
},
{ // work
STAT_PRINT(args->handle, emdb_get, "emdb");
STAT_PRINT(args->handle, emdb_put, "emdb");
STAT_PRINT(args->handle, emdb_del, "emdb");
STAT_PRINT(args->handle, emdb_upd, "emdb");
mdb_env_close(args->handle->env);
STAT_RESET(args->handle, emdb_get);
STAT_RESET(args->handle, emdb_put);
STAT_RESET(args->handle, emdb_del);
STAT_RESET(args->handle, emdb_upd);
args->handle->env = NULL;
ASYNC_NIF_REPLY(ATOM_OK);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
/**
* Store a value indexed by key.
*
* argv[0] reference to the MDB handle resource
* argv[1] key as an Erlang binary
* argv[2] value as an Erlang binary
*/
ASYNC_NIF_DECL(
emdb_put,
{ // struct
struct emdb *handle;
ERL_NIF_TERM key;
ERL_NIF_TERM val;
},
{ // pre
if (!(argc == 3 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle) &&
enif_is_binary(env, argv[1]) &&
enif_is_binary(env, argv[2]) )) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
STAT_TICK(args->handle, emdb_put);
enif_keep_resource((void*)args->handle);
args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
args->val = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[2]);
},
{ // work
ERL_NIF_TERM err;
ErlNifBinary key;
ErlNifBinary val;
MDB_val mkey;
MDB_val mdata;
MDB_txn * txn;
int ret;
if (!enif_inspect_iolist_as_binary(env, args->key, &key)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
if (!enif_inspect_iolist_as_binary(env, args->val, &val)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
mkey.mv_size = key.size;
mkey.mv_data = key.data;
mdata.mv_size = val.size;
mdata.mv_data = val.data;
CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
ret = mdb_put(txn, args->handle->dbi, &mkey, &mdata, MDB_NOOVERWRITE);
if (MDB_KEYEXIST == ret) {
ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_ERROR, ATOM_EXISTS));
return;
}
if (ret != 0)
FAIL_ERR(ret, err1);
CHECK(mdb_txn_commit(txn), err1);
STAT_TOCK(args->handle, emdb_put);
ASYNC_NIF_REPLY(ATOM_OK);
return;
err1:
mdb_txn_abort(txn);
err2:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
/**
* Update and existin value indexed by key.
*
* argv[0] reference to the MDB handle resource
* argv[1] key as an Erlang binary
* argv[2] value as an Erlang binary
*/
ASYNC_NIF_DECL(
emdb_update,
{ // struct
struct emdb *handle;
ERL_NIF_TERM key;
ERL_NIF_TERM val;
},
{ // pre
if (!(argc == 3 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle) &&
enif_is_binary(env, argv[1]) &&
enif_is_binary(env, argv[2]) )) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
STAT_TICK(args->handle, emdb_upd);
enif_keep_resource((void*)args->handle);
args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
args->val = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[2]);
},
{ // work
ERL_NIF_TERM err;
ErlNifBinary key;
ErlNifBinary val;
MDB_val mkey;
MDB_val mdata;
MDB_txn * txn;
int ret;
if (!enif_inspect_iolist_as_binary(env, args->key, &key)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
if (!enif_inspect_iolist_as_binary(env, args->val, &val)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
mkey.mv_size = key.size;
mkey.mv_data = key.data;
mdata.mv_size = val.size;
mdata.mv_data = val.data;
CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
CHECK(mdb_put(txn, args->handle->dbi, &mkey, &mdata, 0), err1);
CHECK(mdb_txn_commit(txn), err1);
STAT_TOCK(args->handle, emdb_upd);
ASYNC_NIF_REPLY(ATOM_OK);
return;
err1:
mdb_txn_abort(txn);
err2:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
/**
* Retrieve the value associated with the key.
*
* argv[0] reference to the MDB handle resource
* argv[1] key as an Erlang binary
*/
ASYNC_NIF_DECL(
emdb_get,
{ // struct
struct emdb *handle;
ERL_NIF_TERM key;
},
{ // pre
if (!(argc == 2 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle) &&
enif_is_binary(env, argv[1]) )) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
STAT_TICK(args->handle, emdb_get);
enif_keep_resource((void*)args->handle);
args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
},
{ // work
ERL_NIF_TERM err;
ErlNifBinary key;
ERL_NIF_TERM val;
unsigned char *bin;
MDB_val mkey;
MDB_val mdata;
MDB_txn * txn;
int ret;
if (!enif_inspect_iolist_as_binary(env, args->key, &key)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
mkey.mv_size = key.size;
mkey.mv_data = key.data;
CHECK(mdb_txn_begin(args->handle->env, NULL, 0, &txn), err);
ret = mdb_get(txn, args->handle->dbi, &mkey, &mdata);
mdb_txn_abort(txn);
if (MDB_NOTFOUND == ret) {
ASYNC_NIF_REPLY(ATOM_NOT_FOUND);
return;
}
if (ret != 0)
FAIL_ERR(ret, err);
bin = enif_make_new_binary(env, mdata.mv_size, &val);
if (!bin)
FAIL_ERR(ENOMEM, err);
memcpy(bin, mdata.mv_data, mdata.mv_size);
STAT_TOCK(args->handle, emdb_get);
ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_OK, val));
return;
err:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
/**
* Delete the value associated with the key.
*
* argv[0] reference to the MDB handle resource
* argv[1] key as an Erlang binary
*/
ASYNC_NIF_DECL(
emdb_del,
{ // struct
struct emdb *handle;
ERL_NIF_TERM key;
},
{ // pre
if (!(argc == 2 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle) &&
enif_is_binary(env, argv[1]) )) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
STAT_TICK(args->handle, emdb_del);
enif_keep_resource((void*)args->handle);
args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
},
{ // work
ERL_NIF_TERM err;
ErlNifBinary key;
MDB_val mkey;
MDB_txn * txn;
int ret;
if (!enif_inspect_iolist_as_binary(env, args->key, &key)) {
ASYNC_NIF_REPLY(enif_make_badarg(env));
return;
}
mkey.mv_size = key.size;
mkey.mv_data = key.data;
CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err);
ret = mdb_del(txn, args->handle->dbi, &mkey, NULL);
if(MDB_NOTFOUND == ret) {
mdb_txn_abort(txn);
ASYNC_NIF_REPLY(ATOM_NOT_FOUND);
return;
}
CHECK(mdb_txn_commit(txn), err);
STAT_TOCK(args->handle, emdb_del);
ASYNC_NIF_REPLY(ATOM_OK);
return;
err:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
/**
* Drop a MDB database.
*
* argv[0] reference to the MDB handle resource
*/
ASYNC_NIF_DECL(
emdb_drop,
{ // struct
struct emdb *handle;
},
{ // pre
if (!(argc == 1 &&
enif_get_resource(env, argv[0], emdb_RESOURCE, (void**)&args->handle))) {
ASYNC_NIF_RETURN_BADARG();
}
if (!args->handle->env)
ASYNC_NIF_RETURN_BADARG();
enif_keep_resource((void*)args->handle);
},
{ // work
ERL_NIF_TERM err;
MDB_txn * txn;
int ret;
CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
CHECK(mdb_drop(txn, args->handle->dbi, 0), err1);
CHECK(mdb_txn_commit(txn), err1);
ASYNC_NIF_REPLY(ATOM_OK);
return;
err1:
mdb_txn_abort(txn);
err2:
ASYNC_NIF_REPLY(err);
return;
},
{ // post
enif_release_resource((void*)args->handle);
});
static int emdb_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
{
__UNUSED(load_info);
ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
struct emdb_priv_data *priv = enif_alloc(sizeof(struct emdb_priv_data));
if (!priv)
return ENOMEM;
memset(priv, 0, sizeof(struct emdb_priv_data));
/* Note: !!! the first element of our priv_data struct *must* be the
pointer to the async_nif's private data which we set here. */
ASYNC_NIF_LOAD(emdb, priv->async_nif_priv);
if (!priv)
return ENOMEM;
*priv_data = priv;
ATOM_ERROR = enif_make_atom(env, "error");
ATOM_OK = enif_make_atom(env, "ok");
ATOM_NOT_FOUND = enif_make_atom(env, "not_found");
ATOM_EXISTS = enif_make_atom(env, "exists");
ATOM_KEYEXIST = enif_make_atom(env, "key_exist");
ATOM_NOTFOUND = enif_make_atom(env, "notfound");
ATOM_CORRUPTED = enif_make_atom(env, "corrupted");
ATOM_PANIC = enif_make_atom(env, "panic");
ATOM_VERSION_MISMATCH = enif_make_atom(env, "version_mismatch");
ATOM_MAP_FULL = enif_make_atom(env, "map_full");
ATOM_DBS_FULL = enif_make_atom(env, "dbs_full");
ATOM_READERS_FULL = enif_make_atom(env, "readers_full");
ATOM_TLS_FULL = enif_make_atom(env, "tls_full");
ATOM_TXN_FULL = enif_make_atom(env, "txn_full");
ATOM_CURSOR_FULL = enif_make_atom(env, "cursor_full");
ATOM_PAGE_FULL = enif_make_atom(env, "page_full");
ATOM_MAP_RESIZED = enif_make_atom(env, "map_resized");
ATOM_INCOMPATIBLE = enif_make_atom(env, "incompatible");
ATOM_BAD_RSLOT = enif_make_atom(env, "bad_rslot");
emdb_RESOURCE = enif_open_resource_type(env, NULL, "emdb_resource",
NULL, flags, NULL);
return (0);
}
static int emdb_reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM info)
{
__UNUSED(env);
__UNUSED(priv_data);
__UNUSED(info);
return (0); // TODO:
}
static int emdb_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv, ERL_NIF_TERM load_info)
{
__UNUSED(env);
__UNUSED(priv_data);
__UNUSED(old_priv);
__UNUSED(load_info);
ASYNC_NIF_UPGRADE(emdb, env);
return (0); // TODO:
}
static void emdb_unload(ErlNifEnv* env, void* priv_data)
{
struct emdb_priv_data *priv = (struct emdb_priv_data *)priv_data;
ASYNC_NIF_UNLOAD(emdb, env, priv->async_nif_priv);
enif_free(priv);
return;
}
static ErlNifFunc nif_funcs [] = {
{"open", 4, emdb_open},
{"close", 2, emdb_close},
{"put", 4, emdb_put},
{"get", 3, emdb_get},
{"del", 3, emdb_del},
{"update", 4, emdb_update},
{"drop", 2, emdb_drop}
};
/* driver entry point */
ERL_NIF_INIT(emdb,
nif_funcs,
& emdb_load,
& emdb_reload,
& emdb_upgrade,
& emdb_unload)

View file

@ -1,479 +0,0 @@
/* -------------------------------------------------------------------------
* This file is part of EMDB - Erlang MDB API
*
* Copyright (c) 2012 by Aleph Archives. All rights reserved.
*
* -------------------------------------------------------------------------
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* -------------------------------------------------------------------------*/
/*
* C headers
*/
#include <sys/param.h> /* for MAXPATHLEN constant */
#include <erl_nif.h> /* for Erlang NIF interface */
#include "uthash.h" /* for uthash */
#include "mdb.h" /* for MDB interface */
#define FREE(p) (NULL == (p) ? 0 : (free(p), p = NULL))
#define FAIL_FAST(Error, Goto) \
do{ \
err = Error; \
goto Goto; \
}while(0)
struct emdb_map_t {
MDB_env * env;
MDB_dbi dbi;
UT_hash_handle hh;
};
static ERL_NIF_TERM atom_ok;
static ERL_NIF_TERM atom_none;
static struct emdb_map_t * emdb_map = NULL;
/* emdb ret */
#define EMDB_RET_KEY_EXIST "key_exist"
/* emdb errors */
#define EMDB_MALLOC_ERR "error_malloc"
#define EMDB_MAKE_BINARY_ERR "error_make_binary"
#define EMDB_CREATE_ERR "error_create"
#define EMDB_MAPSIZE_ERR "error_mapsize"
#define EMDB_OPEN_ERR "error_open"
#define EMDB_TXN_BEGIN_ERR "error_txn_begin"
#define EMDB_TXN_COMMIT_ERR "error_txn_commit"
#define EMDB_OPEN_DBI_ERR "error_open_dbi"
#define EMDB_INVALID_HANDLE_ERR "error_invalid_handle"
#define EMDB_PUT_ERR "error_put"
#define EMDB_UPDATE_ERR "error_update"
#define EMDB_KEY_NOT_FOUND "error_key_not_found"
#define EMDB_DROP_ERR "error_drop"
/*
* Error handling callbacks
*/
static void emdb_free (struct emdb_map_t * emdb_obj)
{
FREE(emdb_obj);
}
/*
* Driver callbacks
*/
static ERL_NIF_TERM emdb_open_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
char dirname [MAXPATHLEN];
struct emdb_map_t * node;
MDB_txn * txn;
char * err;
ErlNifUInt64 mapsize;
ErlNifUInt64 envflags;
if (enif_get_string(env, argv[0], dirname, MAXPATHLEN, ERL_NIF_LATIN1) <= 0)
return enif_make_badarg(env);
if(! (node = calloc(1, sizeof(struct emdb_map_t))))
FAIL_FAST(EMDB_MALLOC_ERR, err3);
if (mdb_env_create(& (node -> env)))
FAIL_FAST(EMDB_CREATE_ERR, err2);
if (! enif_get_uint64(env, argv[1], & mapsize))
return enif_make_badarg(env);
if (mdb_env_set_mapsize(node -> env, mapsize))
FAIL_FAST(EMDB_MAPSIZE_ERR, err2);
if (! enif_get_uint64(env, argv[2], & envflags))
return enif_make_badarg(env);
if (mdb_env_open(node -> env, dirname, envflags, 0664))
FAIL_FAST(EMDB_OPEN_ERR, err2);
if (mdb_txn_begin(node -> env, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err2);
if (mdb_open(txn, NULL, 0, & (node -> dbi)))
FAIL_FAST(EMDB_OPEN_DBI_ERR, err1);
if (mdb_txn_commit(txn))
FAIL_FAST(EMDB_TXN_COMMIT_ERR, err1);
HASH_ADD_PTR(emdb_map, env, node);
return enif_make_tuple(env, 2,
atom_ok,
enif_make_ulong(env, (unsigned long) node -> env));
err1:
mdb_txn_abort(txn);
err2:
mdb_env_close(node -> env);
err3:
emdb_free(node);
return enif_make_atom(env, err);
}
static ERL_NIF_TERM emdb_close_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
MDB_env * handle;
struct emdb_map_t * node;
unsigned long addr;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
HASH_DEL(emdb_map, node);
mdb_env_close(handle);
emdb_free(node);
return atom_ok;
}
static ERL_NIF_TERM emdb_put_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary key;
ErlNifBinary val;
MDB_val mkey;
MDB_val mdata;
MDB_env * handle;
MDB_txn * txn;
struct emdb_map_t * node;
unsigned long addr;
char * err;
int ret;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
if (! enif_inspect_iolist_as_binary(env, argv[1], &key))
return enif_make_badarg(env);
if (! enif_inspect_iolist_as_binary(env, argv[2], &val))
return enif_make_badarg(env);
if (mdb_txn_begin(handle, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err2);
mkey.mv_size = key.size;
mkey.mv_data = key.data;
mdata.mv_size = val.size;
mdata.mv_data = val.data;
ret = mdb_put(txn, node -> dbi, & mkey, & mdata, MDB_NOOVERWRITE);
if (MDB_KEYEXIST == ret)
FAIL_FAST(EMDB_RET_KEY_EXIST, err1);
if (ret)
FAIL_FAST(EMDB_PUT_ERR, err1);
if (mdb_txn_commit(txn))
FAIL_FAST(EMDB_TXN_COMMIT_ERR, err1);
return atom_ok;
err1:
mdb_txn_abort(txn);
err2:
return enif_make_atom(env, err);
}
static ERL_NIF_TERM emdb_get_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary key;
ErlNifBinary val = {0};
ERL_NIF_TERM term;
MDB_val mkey;
MDB_val mdata;
MDB_env * handle;
MDB_txn * txn;
struct emdb_map_t * node;
char * err;
unsigned long addr;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
if (! enif_inspect_iolist_as_binary(env, argv[1], &key))
return enif_make_badarg(env);
mkey.mv_size = key.size;
mkey.mv_data = key.data;
if (mdb_txn_begin(handle, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err);
if(mdb_get(txn, node -> dbi, & mkey, & mdata))
{
mdb_txn_abort(txn);
return atom_none;
}
val.size = mdata.mv_size;
val.data = mdata.mv_data;
term = enif_make_binary(env, &val);
mdb_txn_abort(txn);
if (! term)
FAIL_FAST(EMDB_MAKE_BINARY_ERR, err);
return enif_make_tuple(env, 2,
atom_ok,
term);
err:
return enif_make_atom(env, err);
}
static ERL_NIF_TERM emdb_del_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary key;
MDB_val mkey;
MDB_env * handle;
MDB_txn * txn;
struct emdb_map_t * node;
char * err;
unsigned long addr;
int ret;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
if (! enif_inspect_iolist_as_binary(env, argv[1], &key))
return enif_make_badarg(env);
mkey.mv_size = key.size;
mkey.mv_data = key.data;
if (mdb_txn_begin(handle, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err);
ret = mdb_del(txn, node -> dbi, & mkey, NULL);
if (mdb_txn_commit(txn))
FAIL_FAST(EMDB_TXN_COMMIT_ERR, err);
if(ret)
return atom_none;
return atom_ok;
err:
return enif_make_atom(env, err);
}
static ERL_NIF_TERM emdb_update_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary key;
ErlNifBinary val;
MDB_val mkey;
MDB_val mdata;
MDB_env * handle;
MDB_txn * txn;
struct emdb_map_t * node;
unsigned long addr;
char * err;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
if (! enif_inspect_iolist_as_binary(env, argv[1], &key))
return enif_make_badarg(env);
if (! enif_inspect_iolist_as_binary(env, argv[2], &val))
return enif_make_badarg(env);
if (mdb_txn_begin(handle, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err2);
mkey.mv_size = key.size;
mkey.mv_data = key.data;
mdata.mv_size = val.size;
mdata.mv_data = val.data;
if (mdb_put(txn, node -> dbi, & mkey, & mdata, 0))
FAIL_FAST(EMDB_UPDATE_ERR, err1);
if (mdb_txn_commit(txn))
FAIL_FAST(EMDB_TXN_COMMIT_ERR, err1);
return atom_ok;
err1:
mdb_txn_abort(txn);
err2:
return enif_make_atom(env, err);
}
static ERL_NIF_TERM emdb_drop_nif (ErlNifEnv * env,
int argc, const ERL_NIF_TERM argv[])
{
MDB_env * handle;
MDB_txn * txn;
struct emdb_map_t * node;
unsigned long addr;
char * err;
int ret;
if (! enif_get_ulong(env, argv[0], & addr))
return enif_make_badarg(env);
handle = (MDB_env *) addr;
HASH_FIND_PTR(emdb_map, & handle, node);
if (NULL == node)
return enif_make_atom(env, EMDB_INVALID_HANDLE_ERR);
if (mdb_txn_begin(handle, NULL, 0, & txn))
FAIL_FAST(EMDB_TXN_BEGIN_ERR, err2);
ret = mdb_drop(txn, node -> dbi, 0);
if (ret)
FAIL_FAST(EMDB_DROP_ERR, err1);
if (mdb_txn_commit(txn))
FAIL_FAST(EMDB_TXN_COMMIT_ERR, err1);
return atom_ok;
err1:
mdb_txn_abort(txn);
err2:
return enif_make_atom(env, err);
}
static int emdb_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info)
{
atom_ok = enif_make_atom(env, "ok");
atom_none = enif_make_atom(env, "none");
return (0);
}
static int emdb_reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
{
return (0);
}
static int emdb_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM load_info)
{
return (0);
}
static void emdb_unload(ErlNifEnv* env, void* priv)
{
return;
}
static ErlNifFunc nif_funcs [] = {
{"open", 3, emdb_open_nif},
{"close", 1, emdb_close_nif},
{"put", 3, emdb_put_nif},
{"get", 2, emdb_get_nif},
{"del", 2, emdb_del_nif},
{"update", 3, emdb_update_nif},
{"drop", 1, emdb_drop_nif}
};
/* driver entry point */
ERL_NIF_INIT(emdb_drv,
nif_funcs,
& emdb_load,
& emdb_reload,
& emdb_upgrade,
& emdb_unload)

93
c_src/fifo_q.h Normal file
View file

@ -0,0 +1,93 @@
/*
* fifo_q: a macro-based implementation of a FIFO Queue
*
* Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
* Author: Gregory Burd <greg@basho.com> <greg@burd.me>
*
* This file is provided to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef __FIFO_Q_H__
#define __FIFO_Q_H__
#if defined(__cplusplus)
extern "C" {
#endif
#define FIFO_QUEUE_TYPE(name) \
struct fifo_q__ ## name *
#define DECL_FIFO_QUEUE(name, type) \
struct fifo_q__ ## name { \
unsigned int h, t, s; \
type *items[]; \
}; \
static struct fifo_q__ ## name *fifo_q_ ## name ## _new(unsigned int n) { \
int sz = sizeof(struct fifo_q__ ## name) + ((n+1) * sizeof(type *));\
struct fifo_q__ ## name *q = enif_alloc(sz); \
if (!q) \
return 0; \
memset(q, 0, sz); \
q->s = n + 1; \
return q; \
} \
static inline void fifo_q_ ## name ## _free(struct fifo_q__ ## name *q) { \
memset(q, 0, sizeof(struct fifo_q__ ## name) + (q->s * sizeof(type *))); \
enif_free(q); \
} \
static inline type *fifo_q_ ## name ## _put(struct fifo_q__ ## name *q, type *n) { \
q->items[q->h] = n; \
q->h = (q->h + 1) % q->s; \
return n; \
} \
static inline type *fifo_q_ ## name ## _get(struct fifo_q__ ## name *q) { \
type *n = q->items[q->t]; \
q->items[q->t] = 0; \
q->t = (q->t + 1) % q->s; \
return n; \
} \
static inline unsigned int fifo_q_ ## name ## _size(struct fifo_q__ ## name *q) { \
return (q->h - q->t + q->s) % q->s; \
} \
static inline unsigned int fifo_q_ ## name ## _capacity(struct fifo_q__ ## name *q) { \
return q->s - 1; \
} \
static inline int fifo_q_ ## name ## _empty(struct fifo_q__ ## name *q) { \
return (q->t == q->h); \
} \
static inline int fifo_q_ ## name ## _full(struct fifo_q__ ## name *q) { \
return ((q->h + 1) % q->s) == q->t; \
}
#define fifo_q_new(name, size) fifo_q_ ## name ## _new(size)
#define fifo_q_free(name, queue) fifo_q_ ## name ## _free(queue)
#define fifo_q_get(name, queue) fifo_q_ ## name ## _get(queue)
#define fifo_q_put(name, queue, item) fifo_q_ ## name ## _put(queue, item)
#define fifo_q_size(name, queue) fifo_q_ ## name ## _size(queue)
#define fifo_q_capacity(name, queue) fifo_q_ ## name ## _capacity(queue)
#define fifo_q_empty(name, queue) fifo_q_ ## name ## _empty(queue)
#define fifo_q_full(name, queue) fifo_q_ ## name ## _full(queue)
#define fifo_q_foreach(name, queue, item, task) do { \
while(!fifo_q_ ## name ## _empty(queue)) { \
item = fifo_q_ ## name ## _get(queue); \
do task while(0); \
} \
} while(0);
#if defined(__cplusplus)
}
#endif
#endif // __FIFO_Q_H__

643
c_src/khash.h Normal file
View file

@ -0,0 +1,643 @@
/* The MIT License
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
An example:
#include "khash.h"
KHASH_MAP_INIT_INT(32, char)
int main() {
int ret, is_missing;
khiter_t k;
khash_t(32) *h = kh_init(32);
k = kh_put(32, h, 5, &ret);
kh_value(h, k) = 10;
k = kh_get(32, h, 10);
is_missing = (k == kh_end(h));
k = kh_get(32, h, 5);
kh_del(32, h, k);
for (k = kh_begin(h); k != kh_end(h); ++k)
if (kh_exist(h, k)) kh_value(h, k) = 1;
kh_destroy(32, h);
return 0;
}
*/
/*
2011-12-29 (0.2.7):
* Minor code clean up; no actual effect.
2011-09-16 (0.2.6):
* The capacity is a power of 2. This seems to dramatically improve the
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
- http://code.google.com/p/ulib/
- http://nothings.org/computer/judy/
* Allow to optionally use linear probing which usually has better
performance for random input. Double hashing is still the default as it
is more robust to certain non-random input.
* Added Wang's integer hash function (not used by default). This hash
function is more robust to certain non-random input.
2011-02-14 (0.2.5):
* Allow to declare global functions.
2009-09-26 (0.2.4):
* Improve portability
2008-09-19 (0.2.3):
* Corrected the example
* Improved interfaces
2008-09-11 (0.2.2):
* Improved speed a little in kh_put()
2008-09-10 (0.2.1):
* Added kh_clear()
* Fixed a compiling error
2008-09-02 (0.2.0):
* Changed to token concatenation which increases flexibility.
2008-08-31 (0.1.2):
* Fixed a bug in kh_get(), which has not been tested previously.
2008-08-31 (0.1.1):
* Added destructor
*/
#ifndef __AC_KHASH_H
#define __AC_KHASH_H
/*!
@header
Generic hash table library.
*/
#define AC_VERSION_KHASH_H "0.2.6"
#include <stdlib.h>
#include <string.h>
#include <limits.h>
/* compiler specific configuration */
#if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t;
#elif ULONG_MAX == 0xffffffffu
typedef unsigned long khint32_t;
#endif
#if ULONG_MAX == ULLONG_MAX
typedef unsigned long khint64_t;
#else
typedef unsigned long long khint64_t;
#endif
#ifdef _MSC_VER
#define kh_inline __inline
#else
#define kh_inline inline
#endif
typedef khint32_t khint_t;
typedef khint_t khiter_t;
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
#ifdef KHASH_LINEAR
#define __ac_inc(k, m) 1
#else
#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
#endif
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#ifndef kcalloc
#define kcalloc(N,Z) calloc(N,Z)
#endif
#ifndef kmalloc
#define kmalloc(Z) malloc(Z)
#endif
#ifndef krealloc
#define krealloc(P,Z) realloc(P,Z)
#endif
#ifndef kfree
#define kfree(P) free(P)
#endif
static const double __ac_HASH_UPPER = 0.77;
#define __KHASH_TYPE(name, khkey_t, khval_t) \
typedef struct { \
khint_t n_buckets, size, n_occupied, upper_bound; \
khint32_t *flags; \
khkey_t *keys; \
khval_t *vals; \
} kh_##name##_t;
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
extern kh_##name##_t *kh_init_##name(void); \
extern void kh_destroy_##name(kh_##name##_t *h); \
extern void kh_clear_##name(kh_##name##_t *h); \
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
SCOPE kh_##name##_t *kh_init_##name(void) { \
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
} \
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
{ \
if (h) { \
kfree((void *)h->keys); kfree(h->flags); \
kfree((void *)h->vals); \
kfree(h); \
} \
} \
SCOPE void kh_clear_##name(kh_##name##_t *h) \
{ \
if (h && h->flags) { \
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
h->size = h->n_occupied = 0; \
} \
} \
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
{ \
if (h->n_buckets) { \
khint_t inc, k, i, last, mask; \
mask = h->n_buckets - 1; \
k = __hash_func(key); i = k & mask; \
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
i = (i + inc) & mask; \
if (i == last) return h->n_buckets; \
} \
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
} else return 0; \
} \
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
khint32_t *new_flags = 0; \
khint_t j = 1; \
{ \
kroundup32(new_n_buckets); \
if (new_n_buckets < 4) new_n_buckets = 4; \
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
else { /* hash table size to be changed (shrink or expand); rehash */ \
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (!new_flags) return -1; \
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (h->n_buckets < new_n_buckets) { /* expand */ \
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
if (!new_keys) return -1; \
h->keys = new_keys; \
if (kh_is_map) { \
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
if (!new_vals) return -1; \
h->vals = new_vals; \
} \
} /* otherwise shrink */ \
} \
} \
if (j) { /* rehashing is needed */ \
for (j = 0; j != h->n_buckets; ++j) { \
if (__ac_iseither(h->flags, j) == 0) { \
khkey_t key = h->keys[j]; \
khval_t val; \
khint_t new_mask; \
new_mask = new_n_buckets - 1; \
if (kh_is_map) val = h->vals[j]; \
__ac_set_isdel_true(h->flags, j); \
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
khint_t inc, k, i; \
k = __hash_func(key); \
i = k & new_mask; \
inc = __ac_inc(k, new_mask); \
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
__ac_set_isempty_false(new_flags, i); \
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
} else { /* write the element and jump out of the loop */ \
h->keys[i] = key; \
if (kh_is_map) h->vals[i] = val; \
break; \
} \
} \
} \
} \
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
} \
kfree(h->flags); /* free the working space */ \
h->flags = new_flags; \
h->n_buckets = new_n_buckets; \
h->n_occupied = h->size; \
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
} \
return 0; \
} \
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
{ \
khint_t x; \
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
if (h->n_buckets > (h->size<<1)) { \
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
*ret = -1; return h->n_buckets; \
} \
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
*ret = -1; return h->n_buckets; \
} \
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
{ \
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
else { \
inc = __ac_inc(k, mask); last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
if (__ac_isdel(h->flags, i)) site = i; \
i = (i + inc) & mask; \
if (i == last) { x = site; break; } \
} \
if (x == h->n_buckets) { \
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
else x = i; \
} \
} \
} \
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; ++h->n_occupied; \
*ret = 1; \
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; \
*ret = 2; \
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
return x; \
} \
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
{ \
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
__ac_set_isdel_true(h->flags, x); \
--h->size; \
} \
}
#define KHASH_DECLARE(name, khkey_t, khval_t) \
__KHASH_TYPE(name, khkey_t, khval_t) \
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
__KHASH_TYPE(name, khkey_t, khval_t) \
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
/* --- BEGIN OF HASH FUNCTIONS --- */
/*! @function
@abstract Integer hash function
@param key The integer [khint32_t]
@return The hash value [khint_t]
*/
#define kh_int_hash_func(key) (khint32_t)(key)
/*! @function
@abstract Integer comparison function
*/
#define kh_int_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract 64-bit integer hash function
@param key The integer [khint64_t]
@return The hash value [khint_t]
*/
#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
/*! @function
@abstract 64-bit integer comparison function
*/
#define kh_int64_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract Pointer hash function
@param key The integer void *
@return The hash value [khint_t]
*/
#define kh_ptr_hash_func(key) (khint32_t)(key)
/*! @function
@abstract Pointer comparison function
*/
#define kh_ptr_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract 64-bit pointer hash function
@param key The integer void *
@return The hash value [khint_t]
*/
#define kh_ptr64_hash_func(key) (khint32_t)(((khint64_t)key)>>33^((khint64_t)key)^((khint64_t)key)<<11)
/*! @function
@abstract 64-bit pointer comparison function
*/
#define kh_ptr64_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract const char* hash function
@param s Pointer to a null terminated string
@return The hash value
*/
static kh_inline khint_t __ac_X31_hash_string(const char *s)
{
khint_t h = (khint_t)*s;
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
return h;
}
/*! @function
@abstract Another interface to const char* hash function
@param key Pointer to a null terminated string [const char*]
@return The hash value [khint_t]
*/
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
/*! @function
@abstract Const char* comparison function
*/
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
static kh_inline khint_t __ac_Wang_hash(khint_t key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
/* --- END OF HASH FUNCTIONS --- */
/* Other convenient macros... */
/*!
@abstract Type of the hash table.
@param name Name of the hash table [symbol]
*/
#define khash_t(name) kh_##name##_t
/*! @function
@abstract Initiate a hash table.
@param name Name of the hash table [symbol]
@return Pointer to the hash table [khash_t(name)*]
*/
#define kh_init(name) kh_init_##name()
/*! @function
@abstract Destroy a hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
*/
#define kh_destroy(name, h) kh_destroy_##name(h)
/*! @function
@abstract Reset a hash table without deallocating memory.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
*/
#define kh_clear(name, h) kh_clear_##name(h)
/*! @function
@abstract Resize a hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param s New size [khint_t]
*/
#define kh_resize(name, h, s) kh_resize_##name(h, s)
/*! @function
@abstract Insert a key to the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
@param r Extra return code: 0 if the key is present in the hash table;
1 if the bucket is empty (never used); 2 if the element in
the bucket has been deleted [int*]
@return Iterator to the inserted element [khint_t]
*/
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
/*! @function
@abstract Retrieve a key from the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
*/
#define kh_get(name, h, k) kh_get_##name(h, k)
/*! @function
@abstract Remove a key from the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Iterator to the element to be deleted [khint_t]
*/
#define kh_del(name, h, k) kh_del_##name(h, k)
/*! @function
@abstract Test whether a bucket contains data.
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return 1 if containing data; 0 otherwise [int]
*/
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
/*! @function
@abstract Get key given an iterator
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return Key [type of keys]
*/
#define kh_key(h, x) ((h)->keys[x])
/*! @function
@abstract Get value given an iterator
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return Value [type of values]
@discussion For hash sets, calling this results in segfault.
*/
#define kh_val(h, x) ((h)->vals[x])
/*! @function
@abstract Alias of kh_val()
*/
#define kh_value(h, x) ((h)->vals[x])
/*! @function
@abstract Get the start iterator
@param h Pointer to the hash table [khash_t(name)*]
@return The start iterator [khint_t]
*/
#define kh_begin(h) (khint_t)(0)
/*! @function
@abstract Get the end iterator
@param h Pointer to the hash table [khash_t(name)*]
@return The end iterator [khint_t]
*/
#define kh_end(h) ((h)->n_buckets)
/*! @function
@abstract Get the number of elements in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@return Number of elements in the hash table [khint_t]
*/
#define kh_size(h) ((h)->size)
/*! @function
@abstract Get the number of buckets in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@return Number of buckets in the hash table [khint_t]
*/
#define kh_n_buckets(h) ((h)->n_buckets)
/*! @function
@abstract Iterate over the entries in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@param kvar Variable to which key will be assigned
@param vvar Variable to which value will be assigned
@param code Block of code to execute
*/
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
if (!kh_exist(h,__i)) continue; \
(kvar) = kh_key(h,__i); \
(vvar) = kh_val(h,__i); \
code; \
} }
/*! @function
@abstract Iterate over the values in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@param vvar Variable to which value will be assigned
@param code Block of code to execute
*/
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
if (!kh_exist(h,__i)) continue; \
(vvar) = kh_val(h,__i); \
code; \
} }
/* More conenient interfaces */
/*! @function
@abstract Instantiate a hash map containing (void *) keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#ifdef __x86_64__
#define KHASH_MAP_INIT_PTR(name, khval_t) \
KHASH_INIT(name, void*, khval_t, 1, kh_ptr64_hash_func, kh_ptr64_hash_equal)
#else
#define KHASH_MAP_INIT_PTR(name, khval_t) \
KHASH_INIT(name, void*, khval_t, 1, kh_ptr_hash_func, kh_ptr_hash_equal)
#endif
/*! @function
@abstract Instantiate a hash set containing integer keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_INT(name) \
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
/*! @function
@abstract Instantiate a hash map containing integer keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_INT(name, khval_t) \
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
/*! @function
@abstract Instantiate a hash map containing 64-bit integer keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_INT64(name) \
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
/*! @function
@abstract Instantiate a hash map containing 64-bit integer keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_INT64(name, khval_t) \
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
typedef const char *kh_cstr_t;
/*! @function
@abstract Instantiate a hash map containing const char* keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_STR(name) \
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
/*! @function
@abstract Instantiate a hash map containing const char* keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_STR(name, khval_t) \
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
#endif /* __AC_KHASH_H */

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,9 @@
/** @file mdb.h
* @brief memory-mapped database library
/** @file lmdb.h
* @brief Lightning memory-mapped database library
*
* @mainpage MDB Memory-Mapped Database Manager
* @mainpage Lightning Memory-Mapped Database Manager (MDB)
*
* @section intro_sec Introduction
* MDB is a Btree-based database management library modeled loosely on the
* BerkeleyDB API, but much simplified. The entire database is exposed
* in a memory map, and all data fetches return data directly
@ -38,9 +40,69 @@
* corrupt the database. Of course if your application code is known to
* be bug-free (...) then this is not an issue.
*
* @section caveats_sec Caveats
* Troubleshooting the lock file, plus semaphores on BSD systems:
*
* - A broken lockfile can cause sync issues.
* Stale reader transactions left behind by an aborted program
* cause further writes to grow the database quickly, and
* stale locks can block further operation.
*
* Fix: Terminate all programs using the database, or make
* them close it. Next database user will reset the lockfile.
*
* - On BSD systems or others configured with MDB_USE_POSIX_SEM,
* startup can fail due to semaphores owned by another userid.
*
* Fix: Open and close the database as the user which owns the
* semaphores (likely last user) or as root, while no other
* process is using the database.
*
* Restrictions/caveats (in addition to those listed for some functions):
*
* - Only the database owner should normally use the database on
* BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
* Multiple users can cause startup to fail later, as noted above.
*
* - A thread can only use one transaction at a time, plus any child
* transactions. Each transaction belongs to one thread. See below.
* The #MDB_NOTLS flag changes this for read-only transactions.
*
* - Use an MDB_env* in the process which opened it, without fork()ing.
*
* - Do not have open an MDB database twice in the same process at
* the same time. Not even from a plain open() call - close()ing it
* breaks flock() advisory locking.
*
* - Avoid long-lived transactions. Read transactions prevent
* reuse of pages freed by newer write transactions, thus the
* database can grow quickly. Write transactions prevent
* other write transactions, since writes are serialized.
*
* - Avoid suspending a process with active transactions. These
* would then be "long-lived" as above. Also read transactions
* suspended when writers commit could sometimes see wrong data.
*
* ...when several processes can use a database concurrently:
*
* - Avoid aborting a process with an active transaction.
* The transaction becomes "long-lived" as above until the lockfile
* is reset, since the process may not remove it from the lockfile.
*
* - If you do that anyway, close the environment once in a while,
* so the lockfile can get reset.
*
* - Do not use MDB databases on remote filesystems, even between
* processes on the same host. This breaks flock() on some OSes,
* possibly memory map sync, and certainly sync between programs
* on different hosts.
*
* - Opening a database can fail if another process is opening or
* closing it at exactly the same time.
*
* @author Howard Chu, Symas Corporation.
*
* @copyright Copyright 2011-2012 Howard Chu, Symas Corp. All rights reserved.
* @copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
@ -50,7 +112,7 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*
* @par Derived From:
* @par Derived From:
* This code is derived from btree.c written by Martin Hedenfalk.
*
* Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
@ -67,8 +129,8 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef _MDB_H_
#define _MDB_H_
#ifndef _LMDB_H_
#define _LMDB_H_
#include <sys/types.h>
@ -76,8 +138,15 @@
extern "C" {
#endif
/** @defgroup public Public API
#ifdef _MSC_VER
typedef int mdb_mode_t;
#else
typedef mode_t mdb_mode_t;
#endif
/** @defgroup mdb MDB API
* @{
* @brief OpenLDAP Lightning Memory-Mapped Database Manager
*/
/** @defgroup Version Version Macros
* @{
@ -87,7 +156,7 @@ extern "C" {
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
#define MDB_VERSION_PATCH 4
#define MDB_VERSION_PATCH 6
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
@ -97,7 +166,7 @@ extern "C" {
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
#define MDB_VERSION_DATE "September 14, 2012"
#define MDB_VERSION_DATE "January 10, 2013"
/** A stringifier for the version info */
#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
@ -130,7 +199,17 @@ typedef unsigned int MDB_dbi;
/** @brief Opaque structure for navigating through a database */
typedef struct MDB_cursor MDB_cursor;
/** @brief Generic structure used for passing keys and data in and out of the database. */
/** @brief Generic structure used for passing keys and data in and out
* of the database.
*
* Key sizes must be between 1 and the liblmdb build-time constant
* #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The
* same applies to data sizes in databases with the #MDB_DUPSORT flag.
* Other data items can in theory be from 0 to 0xffffffff bytes long.
*
* Values returned from the database are valid only until a subsequent
* update operation, or the end of the transaction.
*/
typedef struct MDB_val {
size_t mv_size; /**< size of the data item */
void *mv_data; /**< address of the data item */
@ -156,12 +235,14 @@ typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
/** @defgroup mdb_env Environment Flags
*
* Values do not overlap Database Flags.
* @{
*/
/** mmap at a fixed address */
#define MDB_FIXEDMAP 0x01
/** mmap at a fixed address (experimental) */
#define MDB_FIXEDMAP 0x01
/** no environment directory */
#define MDB_NOSUBDIR 0x02
#define MDB_NOSUBDIR 0x4000
/** don't fsync after commit */
#define MDB_NOSYNC 0x10000
/** read only */
@ -170,11 +251,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
#define MDB_NOMETASYNC 0x40000
/** use writable mmap */
#define MDB_WRITEMAP 0x80000
/** use asynchronous msync */
/** use asynchronous msync when MDB_WRITEMAP is used */
#define MDB_MAPASYNC 0x100000
/** tie reader locktable slots to #MDB_txn objects instead of to threads */
#define MDB_NOTLS 0x200000
/** @} */
/** @defgroup mdb_open Database Flags
/** @defgroup mdb_dbi_open Database Flags
*
* Values do not overlap Environment Flags.
* @{
*/
/** use reverse string keys */
@ -281,13 +366,19 @@ typedef enum MDB_cursor_op {
#define MDB_READERS_FULL (-30790)
/** Too many TLS keys in use - Windows only */
#define MDB_TLS_FULL (-30789)
/** Nested txn has too many dirty pages */
/** Txn has too many dirty pages */
#define MDB_TXN_FULL (-30788)
/** Cursor stack too deep - internal error */
#define MDB_CURSOR_FULL (-30787)
/** Page has not enough space - internal error */
#define MDB_PAGE_FULL (-30786)
#define MDB_LAST_ERRCODE MDB_PAGE_FULL
/** Database contents grew beyond environment mapsize */
#define MDB_MAP_RESIZED (-30785)
/** Database flags changed or would change */
#define MDB_INCOMPATIBLE (-30784)
/** Invalid reuse of reader locktable slot */
#define MDB_BAD_RSLOT (-30783)
#define MDB_LAST_ERRCODE MDB_BAD_RSLOT
/** @} */
/** @brief Statistics for a database in the environment */
@ -301,6 +392,16 @@ typedef struct MDB_stat {
size_t ms_entries; /**< Number of data items */
} MDB_stat;
/** @brief Information about the environment */
typedef struct MDB_envinfo {
void *me_mapaddr; /**< Address of map, if fixed */
size_t me_mapsize; /**< Size of the data memory map */
size_t me_last_pgno; /**< ID of the last used page */
size_t me_last_txnid; /**< ID of the last committed transaction */
unsigned int me_maxreaders; /**< max reader slots in the environment */
unsigned int me_numreaders; /**< max reader slots used in the environment */
} MDB_envinfo;
/** @brief Return the mdb library version information.
*
* @param[out] major if non-NULL, the library major version number is copied here
@ -344,6 +445,7 @@ int mdb_env_create(MDB_env **env);
* @param[in] flags Special options for this environment. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* Flags set by mdb_env_set_flags() are also used.
* <ul>
* <li>#MDB_FIXEDMAP
* use a fixed address for the mmap region. This flag must be specified
@ -359,24 +461,52 @@ int mdb_env_create(MDB_env **env);
* under that directory. With this option, \b path is used as-is for
* the database main data file. The database lock file is the \b path
* with "-lock" appended.
* <li>#MDB_NOSYNC
* Don't perform a synchronous flush after committing a transaction. This means
* transactions will exhibit the ACI (atomicity, consistency, and isolation)
* properties, but not D (durability); that is database integrity will be
* maintained but it is possible some number of the most recently committed
* transactions may be undone after a system crash. The number of transactions
* at risk is governed by how often the system flushes dirty buffers to disk
* and how often #mdb_env_sync() is called. This flag may be changed
* at any time using #mdb_env_set_flags().
* <li>#MDB_NOMETASYNC
* Don't perform a synchronous flush of the meta page after committing
* a transaction. This is similar to the #MDB_NOSYNC case, but safer
* because the transaction data is still flushed. The meta page for any
* transaction N will be flushed by the data flush of transaction N+1.
* In case of a system crash, the last committed transaction may be
* lost. This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_RDONLY
* Open the environment in read-only mode. No write operations will be allowed.
* Open the environment in read-only mode. No write operations will be
* allowed. MDB will still modify the lock file - except on read-only
* filesystems, where MDB does not use locks.
* <li>#MDB_WRITEMAP
* Use a writeable memory map unless MDB_RDONLY is set. This is faster
* and uses fewer mallocs, but loses protection from application bugs
* like wild pointer writes and other bad updates into the database.
* Incompatible with nested transactions.
* <li>#MDB_NOMETASYNC
* Flush system buffers to disk only once per transaction, omit the
* metadata flush. Defer that until the system flushes files to disk,
* or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization
* maintains database integrity, but a system crash may undo the last
* committed transaction. I.e. it preserves the ACI (atomicity,
* consistency, isolation) but not D (durability) database property.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_NOSYNC
* Don't flush system buffers to disk when committing a transaction.
* This optimization means a system crash can corrupt the database or
* lose the last transactions if buffers are not yet flushed to disk.
* The risk is governed by how often the system flushes dirty buffers
* to disk and how often #mdb_env_sync() is called. However, if the
* filesystem preserves write order and the #MDB_WRITEMAP flag is not
* used, transactions exhibit ACI (atomicity, consistency, isolation)
* properties and only lose D (durability). I.e. database integrity
* is maintained, but a system crash may undo the final transactions.
* Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no
* hint for when to write transactions to disk, unless #mdb_env_sync()
* is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_MAPASYNC
* When using #MDB_WRITEMAP, use asynchronous flushes to disk.
* As with #MDB_NOSYNC, a system crash can then corrupt the
* database or lose the last transactions. Calling #mdb_env_sync()
* ensures on-disk database integrity until next commit.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_NOTLS
* Don't use Thread-Local Storage. Tie reader locktable slots to
* #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps
* the slot reseved for the #MDB_txn object. A thread may use parallel
* read-only transactions. A read-only transaction may span threads if
* the user synchronizes its use. Applications that multiplex many
* user threads over individual OS threads need this option. Such an
* application must also serialize the write transactions in an OS
* thread, since MDB's write locking is unaware of the user threads.
* </ul>
* @param[in] mode The UNIX permissions to set on created files. This parameter
* is ignored on Windows.
@ -385,13 +515,25 @@ int mdb_env_create(MDB_env **env);
* <ul>
* <li>#MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the
* version that created the database environment.
* <li>EINVAL - the environment file headers are corrupted.
* <li>#MDB_INVALID - the environment file headers are corrupted.
* <li>ENOENT - the directory specified by the path parameter doesn't exist.
* <li>EACCES - the user didn't have permission to access the environment files.
* <li>EAGAIN - the environment was locked by another process.
* </ul>
*/
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode);
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
/** @brief Copy an MDB environment to the specified path.
*
* This function may be used to make a backup of an existing environment.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] path The directory in which the copy will reside. This
* directory must already exist and be writable but must otherwise be
* empty.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_copy(MDB_env *env, const char *path);
/** @brief Return statistics about the MDB environment.
*
@ -401,16 +543,24 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mod
*/
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
/** @brief Return information about the MDB environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] stat The address of an #MDB_envinfo structure
* where the information will be copied
*/
int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
/** @brief Flush the data buffers to disk.
*
* Data is always written to disk when #mdb_txn_commit() is called,
* but the operating system may keep it buffered. MDB always flushes
* the OS buffers upon commit as well, unless the environment was
* opened with #MDB_NOSYNC.
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] force If non-zero, force the flush to occur. Otherwise
* @param[in] force If non-zero, force a synchronous flush. Otherwise
* if the environment has the #MDB_NOSYNC flag set the flushes
* will be omitted.
* will be omitted, and with #MDB_MAPASYNC they will be asynchronous.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
@ -432,7 +582,7 @@ void mdb_env_close(MDB_env *env);
/** @brief Set environment flags.
*
* This may be used to set some flags that weren't already set during
* This may be used to set some flags in addition to those from
* #mdb_env_open(), or to unset these flags.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] flags The flags to change, bitwise OR'ed together
@ -478,6 +628,9 @@ int mdb_env_get_path(MDB_env *env, const char **path);
* of the database. The value should be chosen as large as possible,
* to accommodate future growth of the database.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
* The size may be changed by closing and reopening the environment.
* Any attempt to set a size smaller than the space already consumed
* by the environment will be silently changed to the current size of the used space.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] size The size in bytes
* @return A non-zero error value on failure and 0 on success. Some possible
@ -488,13 +641,17 @@ int mdb_env_get_path(MDB_env *env, const char **path);
*/
int mdb_env_set_mapsize(MDB_env *env, size_t size);
/** @brief Set the maximum number of threads for the environment.
/** @brief Set the maximum number of threads/reader slots for the environment.
*
* This defines the number of slots in the lock table that is used to track readers in the
* the environment. The default is 126.
* Starting a read-only transaction normally ties a lock table slot to the
* current thread until the environment closes or the thread exits. If
* MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the
* MDB_txn object until it or the #MDB_env object is destroyed.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] readers The maximum number of threads
* @param[in] readers The maximum number of reader lock table slots
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
@ -503,7 +660,7 @@ int mdb_env_set_mapsize(MDB_env *env, size_t size);
*/
int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
/** @brief Get the maximum number of threads for the environment.
/** @brief Get the maximum number of threads/reader slots for the environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] readers Address of an integer to store the number of readers
@ -515,11 +672,11 @@ int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
*/
int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
/** @brief Set the maximum number of databases for the environment.
/** @brief Set the maximum number of named databases for the environment.
*
* This function is only needed if multiple databases will be used in the
* environment. Simpler applications that only use a single database can ignore
* this option.
* environment. Simpler applications that use the environment as a single
* unnamed database can ignore this option.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] dbs The maximum number of databases
@ -534,10 +691,10 @@ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
/** @brief Create a transaction for use with the environment.
*
* The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
* @note Transactions may not span threads; a transaction must only be used by a
* single thread. Also, a thread may only have a single transaction.
* @note Cursors may not span transactions; each cursor must be opened and closed
* within a single transaction.
* @note A transaction and its cursors must only be used by a single
* thread, and a thread may only have a single transaction at a time.
* If #MDB_NOTLS is in use, this does not apply to read-only transactions.
* @note Cursors may not span transactions.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] parent If this parameter is non-NULL, the new transaction
* will be a nested transaction, with the transaction indicated by \b parent
@ -557,16 +714,21 @@ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
* <ul>
* <li>#MDB_PANIC - a fatal error occurred earlier and the environment
* must be shut down.
* <li>ENOMEM - out of memory, or a read-only transaction was requested and
* <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's
* mapsize and the environment must be shut down.
* <li>#MDB_READERS_FULL - a read-only transaction was requested and
* the reader lock table is full. See #mdb_env_set_maxreaders().
* <li>ENOMEM - out of memory.
* </ul>
*/
int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn);
/** @brief Commit all the operations of a transaction into the database.
*
* All cursors opened within the transaction will be closed by this call. The cursors
* and transaction handle will be freed and must not be used again after this call.
* The transaction handle is freed. It and its cursors must not be used
* again after this call, except with #mdb_cursor_renew().
* @note Earlier documentation incorrectly said all cursors would be freed.
* Only write-transactions free cursors.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
@ -574,28 +736,32 @@ int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **
* <li>EINVAL - an invalid parameter was specified.
* <li>ENOSPC - no more disk space.
* <li>EIO - a low-level I/O error occurred while writing.
* <li>ENOMEM - the transaction is nested and could not be merged into its parent.
* <li>ENOMEM - out of memory.
* </ul>
*/
int mdb_txn_commit(MDB_txn *txn);
/** @brief Abandon all the operations of the transaction instead of saving them.
*
* All cursors opened within the transaction will be closed by this call. The cursors
* and transaction handle will be freed and must not be used again after this call.
* The transaction handle is freed. It and its cursors must not be used
* again after this call, except with #mdb_cursor_renew().
* @note Earlier documentation incorrectly said all cursors would be freed.
* Only write-transactions free cursors.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
*/
void mdb_txn_abort(MDB_txn *txn);
/** @brief Reset a read-only transaction.
*
* This releases the current reader lock but doesn't free the
* transaction handle, allowing it to be used again later by #mdb_txn_renew().
* It otherwise has the same effect as #mdb_txn_abort() but saves some memory
* allocation/deallocation overhead if a thread is going to start a new
* read-only transaction again soon.
* All cursors opened within the transaction must be closed before the transaction
* is reset.
* Abort the transaction like #mdb_txn_abort(), but keep the transaction
* handle. #mdb_txn_renew() may reuse the handle. This saves allocation
* overhead if the process will start a new read-only transaction soon,
* and also locking overhead if #MDB_NOTLS is in use. The reader table
* lock is released, but the table slot stays tied to its thread or
* #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free
* its lock table slot if MDB_NOTLS is in use.
* Cursors opened within the transaction must not be used
* again after this call, except with #mdb_cursor_renew().
* Reader locks generally don't interfere with writers, but they keep old
* versions of database pages allocated. Thus they prevent the old pages
* from being reused when writers commit new data, and so under heavy load
@ -620,12 +786,30 @@ void mdb_txn_reset(MDB_txn *txn);
*/
int mdb_txn_renew(MDB_txn *txn);
/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */
#define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi)
/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */
#define mdb_close(env,dbi) mdb_dbi_close(env,dbi)
/** @brief Open a database in the environment.
*
* The database handle may be discarded by calling #mdb_close(). The
* database handle resides in the shared environment, it is not owned
* by the given transaction. Only one thread should call this function;
* it is not mutex-protected in a read-only transaction.
* A database handle denotes the name and parameters of a database,
* independently of whether such a database exists.
* The database handle may be discarded by calling #mdb_dbi_close().
* The old database handle is returned if the database was already open.
* The handle must only be closed once.
* The database handle will be private to the current transaction until
* the transaction is successfully committed. If the transaction is
* aborted the handle will be closed automatically.
* After a successful commit the
* handle will reside in the shared environment, and may be used
* by other transactions. This function must not be called from
* multiple concurrent transactions. A transaction that uses this function
* must finish (either commit or abort) before any other transaction may
* use this function.
*
* To use named databases (with name != NULL), #mdb_env_set_maxdbs()
* must be called before opening the environment.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] name The name of the database to open. If only a single
* database is needed in the environment, this value may be NULL.
@ -667,15 +851,15 @@ int mdb_txn_renew(MDB_txn *txn);
* <ul>
* <li>#MDB_NOTFOUND - the specified database doesn't exist in the environment
* and #MDB_CREATE was not specified.
* <li>ENFILE - too many databases have been opened. See #mdb_env_set_maxdbs().
* <li>#MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs().
* </ul>
*/
int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
/** @brief Retrieve statistics for a database.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[out] stat The address of an #MDB_stat structure
* where the statistics will be copied
* @return A non-zero error value on failure and 0 on success. Some possible
@ -690,20 +874,21 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
*
* This call is not mutex protected. Handles should only be closed by
* a single thread, and only if no other threads are going to reference
* the database handle any further.
* the database handle or one of its cursors any further. Do not close
* a handle if an existing transaction has modified its database.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
*/
void mdb_close(MDB_env *env, MDB_dbi dbi);
void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);
/** @brief Delete a database and/or free all its pages.
*
* If the \b del parameter is non-zero the DB handle will be closed
* If the \b del parameter is 1, the DB handle will be closed
* and the DB will be deleted.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] del non-zero to delete the DB from the environment,
* otherwise just free its pages.
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] del 1 to delete the DB from the environment,
* 0 to just free its pages.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del);
@ -713,13 +898,13 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del);
* The comparison function is called whenever it is necessary to compare a
* key specified by the application with a key currently stored in the database.
* If no comparison function is specified, and no special key flags were specified
* with #mdb_open(), the keys are compared lexically, with shorter keys collating
* with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating
* before longer keys.
* @warning This function must be called before any data access functions are used,
* otherwise data corruption may occur. The same comparison function must be used by every
* program accessing the database, every time the database is used.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] cmp A #MDB_cmp_func function
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
@ -736,13 +921,13 @@ int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
* This function only takes effect if the database was opened with the #MDB_DUPSORT
* flag.
* If no comparison function is specified, and no special key flags were specified
* with #mdb_open(), the data items are compared lexically, with shorter items collating
* with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating
* before longer items.
* @warning This function must be called before any data access functions are used,
* otherwise data corruption may occur. The same comparison function must be used by every
* program accessing the database, every time the database is used.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] cmp A #MDB_cmp_func function
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
@ -762,7 +947,7 @@ int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
* Currently the relocation feature is unimplemented and setting
* this function has no effect.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] rel A #MDB_rel_func function
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
@ -776,7 +961,7 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel);
*
* See #mdb_set_relfunc and #MDB_rel_func for more details.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] ctx An arbitrary pointer for whatever the application needs.
* It will be passed to the callback function set by #mdb_set_relfunc
* as its \b relctx parameter whenever the callback is invoked.
@ -801,8 +986,10 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx);
* database. The caller need not dispose of the memory, and may not
* modify it in any way. For values returned in a read-only transaction
* any modification attempts will cause a SIGSEGV.
* @note Values returned from the database are valid only until a
* subsequent update operation, or the end of the transaction.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to search for in the database
* @param[out] data The data corresponding to the key
* @return A non-zero error value on failure and 0 on success. Some possible
@ -821,7 +1008,7 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
* if duplicates are disallowed, or adding a duplicate data item if
* duplicates are allowed (#MDB_DUPSORT).
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to store in the database
* @param[in,out] data The data to store
* @param[in] flags Special options for this operation. This parameter
@ -840,7 +1027,8 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
* parameter will be set to point to the existing item.
* <li>#MDB_RESERVE - reserve space for data of the given size, but
* don't copy the given data. Instead, return a pointer to the
* reserved space, which the caller can fill in later. This saves
* reserved space, which the caller can fill in later - before
* the next update operation or the transaction ends. This saves
* an extra memcpy if the data is being generated later.
* <li>#MDB_APPEND - append the given key/data pair to the end of the
* database. No key comparisons are performed. This option allows
@ -852,9 +1040,10 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
* <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to write in a read-only transaction.
* <li>EINVAL - an invalid parameter was specified.
* <li>ENOMEM - the database is full, see #mdb_env_set_mapsize().
* </ul>
*/
int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
@ -872,7 +1061,7 @@ int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
* This function will return #MDB_NOTFOUND if the specified key/data
* pair is not in the database.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to delete from the database
* @param[in] data The data to delete
* @return A non-zero error value on failure and 0 on success. Some possible
@ -886,10 +1075,19 @@ int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
/** @brief Create a cursor handle.
*
* Cursors are associated with a specific transaction and database and
* may not span threads.
* A cursor is associated with a specific transaction and database.
* A cursor cannot be used when its database handle is closed. Nor
* when its transaction has ended, except with #mdb_cursor_renew().
* It can be discarded with #mdb_cursor_close().
* A cursor in a write-transaction can be closed before its transaction
* ends, and will otherwise be closed when its transaction ends.
* A cursor in a read-only transaction must be closed explicitly, before
* or after its transaction ends. It can be reused with
* #mdb_cursor_renew() before finally closing it.
* @note Earlier documentation said that cursors in every transaction
* were closed when the transaction committed or aborted.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[out] cursor Address where the new #MDB_cursor handle will be stored
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
@ -902,17 +1100,19 @@ int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor);
/** @brief Close a cursor handle.
*
* The cursor handle will be freed and must not be used again after this call.
* Its transaction must still be live if it is a write-transaction.
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
*/
void mdb_cursor_close(MDB_cursor *cursor);
/** @brief Renew a cursor handle.
*
* Cursors are associated with a specific transaction and database and
* may not span threads. Cursors that are only used in read-only
* A cursor is associated with a specific transaction and database.
* Cursors that are only used in read-only
* transactions may be re-used, to avoid unnecessary malloc/free overhead.
* The cursor may be associated with a new read-only transaction, and
* referencing the same database handle as it was created with.
* This may be done whether the previous transaction is live or dead.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* @return A non-zero error value on failure and 0 on success. Some possible
@ -942,6 +1142,7 @@ MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor);
* case of the #MDB_SET option, in which the \b key object is unchanged), and
* the address and length of the data are returned in the object to which \b data
* refers.
* See #mdb_get() for restrictions on using the output values.
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* @param[in,out] key The key for a retrieved item
* @param[in,out] data The data of a retrieved item
@ -994,6 +1195,8 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
* <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to modify a read-only database.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
@ -1039,7 +1242,7 @@ int mdb_cursor_count(MDB_cursor *cursor, size_t *countp);
* This returns a comparison as if the two data items were keys in the
* specified database.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] a The first item to compare
* @param[in] b The second item to compare
* @return < 0 if a < b, 0 if a == b, > 0 if a > b
@ -1049,9 +1252,9 @@ int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
/** @brief Compare two data items according to a particular database.
*
* This returns a comparison as if the two items were data items of
* a sorted duplicates #MDB_DUPSORT database.
* the specified database. The database must have the #MDB_DUPSORT flag.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_open()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] a The first item to compare
* @param[in] b The second item to compare
* @return < 0 if a < b, 0 if a == b, > 0 if a > b
@ -1062,4 +1265,6 @@ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
#ifdef __cplusplus
}
#endif
#endif /* _MDB_H_ */
#endif /* _LMDB_H_ */
/* * http://gitorious.org/mdb/mdb/blobs/raw/b389341b4b2413804726276d01676a6a9d05346f/libraries/liblmdb/lmdb.h */

View file

@ -3,7 +3,7 @@
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
*
* Copyright 2000-2012 The OpenLDAP Foundation.
* Copyright 2000-2013 The OpenLDAP Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -18,6 +18,7 @@
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <assert.h>
#include "midl.h"
@ -59,7 +60,7 @@ static unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
return cursor;
}
}
if( val > 0 ) {
++cursor;
}
@ -105,7 +106,7 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
ids[2] = ids[ids[0]-1];
}
ids[0] = MDB_NOID;
} else {
/* insert id */
for (i=ids[0]; i>x; i--)
@ -117,22 +118,24 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
}
#endif
MDB_IDL mdb_midl_alloc()
MDB_IDL mdb_midl_alloc(int num)
{
MDB_IDL ids = malloc((MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
*ids++ = MDB_IDL_UM_MAX;
MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID));
if (ids)
*ids++ = num;
return ids;
}
void mdb_midl_free(MDB_IDL ids)
{
free(ids-1);
if (ids)
free(ids-1);
}
int mdb_midl_shrink( MDB_IDL *idp )
{
MDB_IDL ids = *idp;
if (ids[-1] > MDB_IDL_UM_MAX) {
if (*(--ids) > MDB_IDL_UM_MAX) {
ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
*ids++ = MDB_IDL_UM_MAX;
*idp = ids;
@ -141,19 +144,26 @@ int mdb_midl_shrink( MDB_IDL *idp )
return 0;
}
int mdb_midl_grow( MDB_IDL *idp, int num )
{
MDB_IDL idn = *idp-1;
/* grow it */
idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
if (!idn)
return ENOMEM;
*idn++ += num;
*idp = idn;
return 0;
}
int mdb_midl_append( MDB_IDL *idp, MDB_ID id )
{
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] >= ids[-1]) {
MDB_IDL idn = ids-1;
/* grow it */
idn = realloc(idn, (*idn + MDB_IDL_UM_MAX + 1) * sizeof(MDB_ID));
if (!idn)
return -1;
*idn++ += MDB_IDL_UM_MAX;
ids = idn;
*idp = ids;
if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
return ENOMEM;
ids = *idp;
}
ids[0]++;
ids[ids[0]] = id;
@ -165,14 +175,9 @@ int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app )
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] + app[0] >= ids[-1]) {
MDB_IDL idn = ids-1;
/* grow it */
idn = realloc(idn, (*idn + app[-1]) * sizeof(MDB_ID));
if (!idn)
return -1;
*idn++ += app[-1];
ids = idn;
*idp = ids;
if (mdb_midl_grow(idp, app[0]))
return ENOMEM;
ids = *idp;
}
memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID));
ids[0] += app[0];
@ -192,7 +197,7 @@ mdb_midl_sort( MDB_IDL ids )
int i,j,k,l,ir,jstack;
MDB_ID a, itmp;
ir = ids[0];
ir = (int)ids[0];
l = 1;
jstack = 0;
for(;;) {
@ -232,7 +237,7 @@ mdb_midl_sort( MDB_IDL ids )
ids[l+1] = ids[j];
ids[j] = a;
jstack += 2;
if (ir-i+1 >= j-1) {
if (ir-i+1 >= j-l) {
istack[jstack] = ir;
istack[jstack-1] = i;
ir = j-1;
@ -255,7 +260,7 @@ unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id )
unsigned base = 0;
unsigned cursor = 1;
int val = 0;
unsigned n = ids[0].mid;
unsigned n = (unsigned)ids[0].mid;
while( 0 < n ) {
unsigned pivot = n >> 1;
@ -304,7 +309,7 @@ int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id )
} else {
/* insert id */
ids[0].mid++;
for (i=ids[0].mid; i>x; i--)
for (i=(unsigned)ids[0].mid; i>x; i--)
ids[i] = ids[i-1];
ids[x] = *id;
}
@ -325,3 +330,5 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id )
/** @} */
/** @} */
/* http://gitorious.org/mdb/mdb/blobs/raw/mdb.master/libraries/liblmdb/midl.c */

View file

@ -11,7 +11,7 @@
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
*
* Copyright 2000-2012 The OpenLDAP Foundation.
* Copyright 2000-2013 The OpenLDAP Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -115,10 +115,10 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id );
#endif
/** Allocate an IDL.
* Allocates memory for an IDL of a default size.
* Allocates memory for an IDL of the given size.
* @return IDL on success, NULL on failure.
*/
MDB_IDL mdb_midl_alloc();
MDB_IDL mdb_midl_alloc(int num);
/** Free an IDL.
* @param[in] ids The IDL to free.
@ -132,6 +132,14 @@ void mdb_midl_free(MDB_IDL ids);
*/
int mdb_midl_shrink(MDB_IDL *idp);
/** Grow an IDL.
* Add room for num additional elements.
* @param[in,out] idp Address of the IDL to grow.
* @param[in] num Number of elements to add.
* @return 0 on success, -1 on failure.
*/
int mdb_midl_grow(MDB_IDL *idp, int num);
/** Append an ID onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The ID to append.
@ -193,3 +201,5 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id );
}
#endif
#endif /* _MDB_MIDL_H_ */
/* http://gitorious.org/mdb/mdb/blobs/raw/mdb.master/libraries/liblmdb/midl.h */

213
c_src/stats.h Normal file
View file

@ -0,0 +1,213 @@
/*
* stats: measure all the things
*
* Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
* Author: Gregory Burd <greg@basho.com> <greg@burd.me>
*
* This file is provided to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef __STATS_H__
#define __STATS_H__
#if defined(__cplusplus)
extern "C" {
#endif
#include "duration.h"
/**
* Calculate the log2 of 64bit unsigned integers.
*/
#ifdef __GCC__
#define LOG2(X) ((unsigned) ((8 * (sizeof(uint64_t) - 1)) - __builtin_clzll((X))))
#else
static unsigned int __log2_64(uint64_t x) {
static const int tab64[64] = {
63, 0, 58, 1, 59, 47, 53, 2,
60, 39, 48, 27, 54, 33, 42, 3,
61, 51, 37, 40, 49, 18, 28, 20,
55, 30, 34, 11, 43, 14, 22, 4,
62, 57, 46, 52, 38, 26, 32, 41,
50, 36, 17, 19, 29, 10, 13, 21,
56, 45, 25, 31, 35, 16, 9, 12,
44, 24, 15, 8, 23, 7, 6, 5};
if (x == 0) return 0;
uint64_t v = x;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
return tab64[((uint64_t)((v - (v >> 1)) * 0x07EDD5E59A4E28C2)) >> 58];
}
#define LOG2(X) __log2_64(X)
#endif
#define STAT_DEF(name) struct name ## _stat name ## _stat;
#define STAT_DECL(name, nsamples) \
struct name ## _stat { \
duration_t d; \
uint64_t histogram[64]; \
uint32_t h, n; \
uint64_t samples[nsamples]; \
uint64_t min, max; \
double mean; \
}; \
static inline double name ## _stat_mean(struct name ## _stat *s) { \
uint32_t t = s->h; \
uint32_t h = (s->h + 1) % nsamples; \
double mean = 0; \
while (h != t) { \
mean += s->samples[h]; \
h = (h + 1) % nsamples; \
} \
if (mean > 0) \
mean /= (double)(s->n < nsamples ? s->n : nsamples); \
return mean; \
} \
static inline double name ## _stat_mean_lg2(struct name ## _stat *s) { \
uint32_t i; \
double mean = 0; \
for (i = 0; i < 64; i++) \
mean += (s->histogram[i] * i); \
if (mean > 0) \
mean /= (double)s->n; \
return mean; \
} \
static inline uint64_t name ## _stat_tick(struct name ## _stat *s) \
{ \
uint64_t t = ts(s->d.unit); \
s->d.then = t; \
return t; \
} \
static inline void name ## _stat_reset(struct name ## _stat *s) \
{ \
s->min = ~0; \
s->max = 0; \
s->h = 0; \
memset(&s->histogram, 0, sizeof(uint64_t) * 64); \
memset(&s->samples, 0, sizeof(uint64_t) * nsamples); \
} \
static inline uint64_t name ## _stat_tock(struct name ## _stat *s) \
{ \
uint64_t now = ts(s->d.unit); \
uint64_t elapsed = now - s->d.then; \
uint32_t i = s->h; \
if (s->n == nsamples) { \
s->mean = (s->mean + name ## _stat_mean(s)) / 2.0; \
if (s->n >= 4294967295) \
name ## _stat_reset(s); \
} \
s->h = (s->h + 1) % nsamples; \
s->samples[i] = elapsed; \
if (elapsed < s->min) \
s->min = elapsed; \
if (elapsed > s->max) \
s->max = elapsed; \
s->histogram[LOG2(elapsed)]++; \
s->n++; \
s->d.then = ts(s->d.unit); \
return elapsed; \
} \
static void name ## _stat_print_histogram(struct name ## _stat *s, const char *mod) \
{ \
uint8_t logs[64]; \
uint8_t i, j, max_log = 0; \
double m = (s->mean + name ## _stat_mean(s) / 2.0); \
\
fprintf(stderr, "%s:async_nif request latency histogram:\n", mod); \
for (i = 0; i < 64; i++) { \
logs[i] = LOG2(s->histogram[i]); \
if (logs[i] > max_log) \
max_log = logs[i]; \
} \
for (i = max_log; i > 0; i--) { \
if (!(i % 10)) \
fprintf(stderr, "2^%2d ", i); \
else \
fprintf(stderr, " "); \
for(j = 0; j < 64; j++) \
fprintf(stderr, logs[j] >= i ? "" : " "); \
fprintf(stderr, "\n"); \
} \
if (max_log == 0) { \
fprintf(stderr, "[empty]\n"); \
} else { \
fprintf(stderr, " ns μs ms s ks\n"); \
fprintf(stderr, "min: "); \
if (s->min < 1000) \
fprintf(stderr, "%lu (ns)", s->min); \
else if (s->min < 1000000) \
fprintf(stderr, "%.2f (μs)", s->min / 1000.0); \
else if (s->min < 1000000000) \
fprintf(stderr, "%.2f (ms)", s->min / 1000000.0); \
else if (s->min < 1000000000000) \
fprintf(stderr, "%.2f (s)", s->min / 1000000000.0); \
fprintf(stderr, " max: "); \
if (s->max < 1000) \
fprintf(stderr, "%lu (ns)", s->max); \
else if (s->max < 1000000) \
fprintf(stderr, "%.2f (μs)", s->max / 1000.0); \
else if (s->max < 1000000000) \
fprintf(stderr, "%.2f (ms)", s->max / 1000000.0); \
else if (s->max < 1000000000000) \
fprintf(stderr, "%.2f (s)", s->max / 1000000000.0); \
fprintf(stderr, " mean: "); \
if (m < 1000) \
fprintf(stderr, "%.2f (ns)", m); \
else if (m < 1000000) \
fprintf(stderr, "%.2f (μs)", m / 1000.0); \
else if (m < 1000000000) \
fprintf(stderr, "%.2f (ms)", m / 1000000.0); \
else if (m < 1000000000000) \
fprintf(stderr, "%.2f (s)", m / 1000000000.0); \
fprintf(stderr, "\n"); \
} \
fflush(stderr); \
}
#define STAT_INIT(var, name) \
var->name ## _stat.min = ~0; \
var->name ## _stat.max = 0; \
var->name ## _stat.mean = 0.0; \
var->name ## _stat.h = 0; \
var->name ## _stat.d.then = 0; \
var->name ## _stat.d.unit = ns;
#define STAT_TICK(var, name) name ## _stat_tick(&var->name ## _stat)
#define STAT_TOCK(var, name) name ## _stat_tock(&var->name ## _stat)
#define STAT_RESET(var, name) name ## _stat_reset(&var->name ## _stat)
#define STAT_MEAN_LOG2_SAMPLE(var, name) \
name ## _stat_mean_lg2(&var->name ## _stat)
#define STAT_MEAN_SAMPLE(var, name) \
name ## _stat_mean(&var->name ## _stat)
#define STAT_PRINT(var, name, mod) \
name ## _stat_print_histogram(&var->name ## _stat, mod)
#if defined(__cplusplus)
}
#endif
#endif // __STATS_H__

Binary file not shown.

BIN
docs/mdm-slides.pdf Normal file

Binary file not shown.

View file

@ -1,18 +1,43 @@
%% -*- erlang -*-
%% ex: ft=erlang ts=4 sw=4 et
{erl_opts, [
{require_otp_vsn, "R1[567]"}.
{cover_enabled, true}.
{erl_opts, [{d,'DEBUG',true},
debug_info,
warnings_as_errors,
warn_export_all
]}.
fail_on_warning,
warn_unused_vars,
warn_export_all,
warn_shadow_vars,
warn_unused_import,
warn_unused_function,
warn_bif_clash,
warn_unused_record,
warn_deprecated_function,
warn_obsolete_guard,
warn_export_vars,
warn_exported_vars,
warn_untyped_record
%warn_missing_spec,
%strict_validation
]}.
{xref_checks, [undefined_function_calls, deprecated_function_calls]}.
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
{port_specs, [
{"unix", "priv/emdb_drv.so", ["c_src/*.c"]},
{"linux", "priv/emdb_drv.so", ["c_src/*.c"]},
{"darwin", "priv/emdb_drv.so", ["c_src/*.c"]},
{"win32", "priv/emdb_drv.dll", ["c_src/*.c"]}
{"unix", "priv/emdb.so", ["c_src/*.c"]},
{"linux", "priv/emdb.so", ["c_src/*.c"]},
{"darwin", "priv/emdb.so", ["c_src/*.c"]},
{"win32", "priv/emdb.dll", ["c_src/*.c"]}
]}.
{port_env, [
{".*", "CFLAGS", "-O2 -Wall"}
{"DRV_CFLAGS", "$DRV_CFLAGS -O3 -fPIC -march=native -mtune=native -Wall -Wextra -Werror"}
]}.
% for debugging use
% {"DRV_CFLAGS", "$DRV_CFLAGS -DMDB_DEBUG=2 -DMDB_PARANOID -DDEBUG -O0 -fPIC -Wall -Wextra -Werror"}

45
src/async_nif.hrl Normal file
View file

@ -0,0 +1,45 @@
%% -------------------------------------------------------------------
%%
%% async_nif: An async thread-pool layer for Erlang's NIF API
%%
%% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
%% Author: Gregory Burd <greg@basho.com> <greg@burd.me>
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-spec async_nif_enqueue(reference(), function(), [term()]) -> term() | {error, term()}.
async_nif_enqueue(R, F, A) ->
case erlang:apply(F, [R|A]) of
{ok, enqueued} ->
receive
{R, {error, eagain}} ->
%% Work unit was not queued, try again.
async_nif_enqueue(R, F, A);
{R, {error, shutdown}=Error} ->
%% Work unit was queued, but not executed.
Error;
{R, {error, _Reason}=Error} ->
%% Work unit returned an error.
Error;
{R, Reply} ->
Reply
end;
Other ->
Other
end.
-define(ASYNC_NIF_CALL(Fun, Args), async_nif_enqueue(erlang:make_ref(), Fun, Args)).

View file

@ -27,26 +27,48 @@
-module(emdb).
%%====================================================================
%% EXPORTS
%%====================================================================
-export([
open/1,
open/2,
open/3
open/3,
close/1,
put/3,
get/2,
del/2,
update/3, upd/3,
drop/1
]).
%% internal export (ex. spawn, apply)
-on_load(init/0).
%% config for testing
-ifdef(TEST).
-ifdef(EQC).
%include_lib("eqc/include/eqc.hrl").
-define(QC_OUT(P), eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
-endif.
-include_lib("eunit/include/eunit.hrl").
-endif.
%%====================================================================
%% Includes
%%====================================================================
-include("emdb.hrl").
-include("async_nif.hrl").
%%====================================================================
%% Macros
%% MACROS
%%====================================================================
-define(EMDB_DRIVER_NAME, "emdb").
-define(NOT_LOADED, not_loaded(?LINE)).
-define(MDB_MAP_SIZE, 10485760). %% 10MB
%%====================================================================
@ -59,21 +81,138 @@
%%--------------------------------------------------------------------
open(DirName) ->
open(DirName, ?MDB_MAP_SIZE).
open(DirName, MapSize) when is_integer(MapSize) andalso MapSize > 0 ->
open(DirName, MapSize)
when is_integer(MapSize)
andalso MapSize > 0 ->
open(DirName, MapSize, 0).
open(DirName, MapSize, EnvFlags) when is_integer(MapSize) andalso MapSize > 0 andalso is_integer(EnvFlags) andalso EnvFlags >= 0 ->
open(DirName, MapSize, EnvFlags)
when is_integer(MapSize) andalso MapSize > 0 andalso
is_integer(EnvFlags) andalso EnvFlags >= 0 ->
%% ensure directory exists
ok = filelib:ensure_dir(DirName ++ "/"),
decorate(emdb_drv:open(DirName, MapSize, EnvFlags)).
ok = filelib:ensure_dir(filename:join([DirName, "x"])),
?ASYNC_NIF_CALL(fun open/4, [DirName, MapSize, EnvFlags]).
open(_AsyncRef, _DirName, _MapSize, _EnvFlags) ->
?NOT_LOADED.
close(Handle) ->
?ASYNC_NIF_CALL(fun close/2, [Handle]).
close(_AsyncRef, _Handle) ->
?NOT_LOADED.
put(Handle, Key, Val)
when is_binary(Key) andalso is_binary(Val) ->
?ASYNC_NIF_CALL(fun put/4, [Handle, Key, Val]).
put(_AsyncRef, _Handle, _Key, _Val) ->
?NOT_LOADED.
get(Handle, Key)
when is_binary(Key) ->
?ASYNC_NIF_CALL(fun get/3, [Handle, Key]).
get(_AsyncRef, _Handle, _Key) ->
?NOT_LOADED.
del(Handle, Key)
when is_binary(Key) ->
?ASYNC_NIF_CALL(fun del/3, [Handle, Key]).
del(_AsyncRef, _Handle, _Key) ->
?NOT_LOADED.
upd(Handle, Key, Val) ->
update(Handle, Key, Val).
update(Handle, Key, Val)
when is_binary(Key) andalso is_binary(Val) ->
?ASYNC_NIF_CALL(fun update/4, [Handle, Key, Val]).
update(_AsyncRef, _Handle, _Key, _Val) ->
?NOT_LOADED.
drop(Handle) ->
?ASYNC_NIF_CALL(fun drop/2, [Handle]).
drop(_AsyncRef, _Handle) ->
?NOT_LOADED.
%%====================================================================
%% PRIVATE API
%%====================================================================
%% @private
decorate({ok, Handle}) ->
CDB = emdb_oop:new(Handle),
{ok, CDB};
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
init() ->
PrivDir = case code:priv_dir(?MODULE) of
{error, _} ->
EbinDir = filename:dirname(code:which(?MODULE)),
AppPath = filename:dirname(EbinDir),
filename:join(AppPath, "priv");
Path ->
Path
end,
erlang:load_nif(filename:join(PrivDir, ?EMDB_DRIVER_NAME), 0).
decorate(Error) ->
Error.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
not_loaded(Line) ->
erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
%% ===================================================================
%% EUnit tests
%% ===================================================================
-ifdef(TEST).
-define(TEST_DATA_DIR, "test/basics").
open_test_db(DataDir) ->
{ok, CWD} = file:get_cwd(),
Path = filename:join([CWD, DataDir]),
?cmd("rm -rf " ++ Path),
?assertMatch(ok, filelib:ensure_dir(filename:join([Path, "x"]))),
{ok, Handle} = ?MODULE:open(Path),
Handle.
basics_test_() ->
{setup,
fun() ->
open_test_db(?TEST_DATA_DIR)
end,
fun(Handle) ->
ok = ?MODULE:close(Handle)
end,
fun(Handle) ->
{inorder,
[{"open and close a database",
fun() ->
Handle = open_test_db(Handle)
end},
{"create, then drop an empty database",
fun() ->
Handle = open_test_db(Handle),
?assertMatch(ok, ?MODULE:drop(Handle))
end},
{"create, put an item, get it, then drop the database",
fun() ->
Handle = open_test_db(Handle),
?assertMatch(ok, ?MODULE:put(Handle, <<"a">>, <<"apple">>)),
?assertMatch(ok, ?MODULE:put(Handle, <<"b">>, <<"boy">>)),
?assertMatch(ok, ?MODULE:put(Handle, <<"c">>, <<"cat">>)),
?assertMatch({ok, <<"apple">>}, ?MODULE:get(Handle, <<"a">>)),
?assertMatch(ok, ?MODULE:update(Handle, <<"a">>, <<"ant">>)),
?assertMatch({ok, <<"ant">>}, ?MODULE:get(Handle, <<"a">>)),
?assertMatch(ok, ?MODULE:del(Handle, <<"a">>)),
?assertMatch(not_found, ?MODULE:get(Handle, <<"a">>)),
?assertMatch(ok, ?MODULE:drop(Handle))
end}
]}
end}.
-endif.

View file

@ -1,137 +0,0 @@
%%-------------------------------------------------------------------
%% This file is part of EMDB - Erlang MDB API
%%
%% Copyright (c) 2012 by Aleph Archives. All rights reserved.
%%
%%-------------------------------------------------------------------
%% Redistribution and use in source and binary forms, with or without
%% modification, are permitted only as authorized by the OpenLDAP
%% Public License.
%%
%% A copy of this license is available in the file LICENSE in the
%% top-level directory of the distribution or, alternatively, at
%% <http://www.OpenLDAP.org/license.html>.
%%
%% Permission to use, copy, modify, and distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
%%-------------------------------------------------------------------
-module(emdb_drv).
%%====================================================================
%% EXPORTS
%%====================================================================
-export([
open/3,
close/1,
put/3,
get/2,
del/2,
update/3,
drop/1
]).
%% internal export (ex. spawn, apply)
-on_load(init/0).
%%====================================================================
%% MACROS
%%====================================================================
-define(EMDB_DRIVER_NAME, "emdb_drv").
-define(NOT_LOADED, not_loaded(?LINE)).
%%====================================================================
%% PUBLIC API
%%====================================================================
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
open(_DirName, _MapSize, _EnvFlags) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
close(_Handle) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
put(_Handle, _Key, _Val) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
get(_Handle, _Key) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
del(_Handle, _Key) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
update(_Handle, _Key, _Val) ->
?NOT_LOADED.
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
drop(_Handle) ->
?NOT_LOADED.
%%====================================================================
%% PRIVATE API
%%====================================================================
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
init() ->
PrivDir = case code:priv_dir(?MODULE) of
{error, _} ->
EbinDir = filename:dirname(code:which(?MODULE)),
AppPath = filename:dirname(EbinDir),
filename:join(AppPath, "priv");
Path ->
Path
end,
erlang:load_nif(filename:join(PrivDir, ?EMDB_DRIVER_NAME), 0).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
not_loaded(Line) ->
erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).

View file

@ -1,97 +0,0 @@
%%-------------------------------------------------------------------
%% This file is part of EMDB - Erlang MDB API
%%
%% Copyright (c) 2012 by Aleph Archives. All rights reserved.
%%
%%-------------------------------------------------------------------
%% Redistribution and use in source and binary forms, with or without
%% modification, are permitted only as authorized by the OpenLDAP
%% Public License.
%%
%% A copy of this license is available in the file LICENSE in the
%% top-level directory of the distribution or, alternatively, at
%% <http://www.OpenLDAP.org/license.html>.
%%
%% Permission to use, copy, modify, and distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
%%-------------------------------------------------------------------
-module(emdb_oop, [Handle]).
%%====================================================================
%% EXPORTS
%%====================================================================
-export([
close/0,
put/2,
get/1,
del/1,
update/2,
drop/0
]).
%%====================================================================
%% PUBLIC API
%%====================================================================
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
close() ->
emdb_drv:close(Handle).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
put(Key, Val) when is_binary(Key) andalso is_binary(Val) ->
emdb_drv:put(Handle, Key, Val).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
get(Key) when is_binary(Key) ->
emdb_drv:get(Handle, Key).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
del(Key) when is_binary(Key) ->
emdb_drv:del(Handle, Key).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
update(Key, Val) when is_binary(Key) andalso is_binary(Val) ->
emdb_drv:update(Handle, Key, Val).
%%--------------------------------------------------------------------
%% @doc
%% @end
%%--------------------------------------------------------------------
drop() ->
emdb_drv:drop(Handle).
%%====================================================================
%% INTERNAL FUNCTIONS
%%====================================================================

56
src/emdb_sup.erl Normal file
View file

@ -0,0 +1,56 @@
%%-------------------------------------------------------------------
%% This file is part of EMDB - Erlang MDB API
%%
%% Copyright (c) 2013 by Basho Technologies. All rights reserved.
%%
%%-------------------------------------------------------------------
%% Redistribution and use in source and binary forms, with or without
%% modification, are permitted only as authorized by the OpenLDAP
%% Public License.
%%
%% A copy of this license is available in the file LICENSE in the
%% top-level directory of the distribution or, alternatively, at
%% <http://www.OpenLDAP.org/license.html>.
%%
%% Permission to use, copy, modify, and distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
%%-------------------------------------------------------------------
-module(emdb_sup).
-behaviour(supervisor).
%%====================================================================
%% EXPORTS
%%====================================================================
-export([start_link/0]). %% API
-export([init/1]). %% Supervisor callbacks
%%====================================================================
%% MACROS
%%====================================================================
%% Helper macro for declaring children of supervisor
-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}).
%% ===================================================================
%% API functions
%% ===================================================================
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
%% ===================================================================
%% Supervisor callbacks
%% ===================================================================
init([]) ->
{ok, {{one_for_one, 5, 10}, [?CHILD(emdb, worker)]}}.

View file

@ -0,0 +1,79 @@
-module(basho_bench_driver_emdb).
-record(state, {
handle
}).
-export([new/1,
run/4]).
-include_lib("basho_bench/include/basho_bench.hrl").
%% ====================================================================
%% API
%% ====================================================================
new(1) ->
%% Make sure emdb is available
case code:which(emdb) of
non_existing ->
?FAIL_MSG("~s requires emdb to be available on code path.\n",
[?MODULE]);
_ ->
ok
end,
%{ok, _} = emdb_sup:start_link(),
setup(1);
new(Id) ->
setup(Id).
setup(_Id) ->
%% Get the target directory
Dir = basho_bench_config:get(emdb_dir, "/tmp"),
%Config = basho_bench_config:get(emdb, []),
%% Start Lightning MDB
case emdb:open(Dir, 32212254720, 16#10000 bor 16#40000 bor 16#80000) of
{ok, H} ->
{ok, #state{handle=H}};
{error, Reason} ->
?FAIL_MSG("Failed to establish a Lightning MDB connection, emdb backend unable to start: ~p\n", [Reason]),
{error, Reason}
end.
run(get, KeyGen, _ValueGen, #state{handle=Handle}=State) ->
case emdb:get(Handle, KeyGen()) of
{ok, _Value} ->
{ok, State};
not_found ->
{ok, State};
{error, Reason} ->
{error, Reason}
end;
run(put, KeyGen, ValueGen, #state{handle=Handle}=State) ->
Key = KeyGen(),
Val = ValueGen(),
case emdb:upd(Handle, Key, Val) of
ok ->
{ok, State};
{error, Reason} ->
{error, Reason}
end;
run(delete, KeyGen, _ValueGen, #state{handle=Handle}=State) ->
case emdb:del(Handle, KeyGen()) of
ok ->
{ok, State};
not_found ->
{ok, State};
{error, Reason} ->
{error, Reason}
end.
%% config_value(Key, Config, Default) ->
%% case proplists:get_value(Key, Config) of
%% undefined ->
%% Default;
%% Value ->
%% Value
%% end.

37
tools/emdb.config Normal file
View file

@ -0,0 +1,37 @@
%%-*- mode: erlang -*-
%% ex: ft=erlang ts=4 sw=4 et
%% How to:
%% * put the emdb.config file into basho_bench/examples
%% * put the basho_bench_driver_emdb.erl into basho_bench/src
%% * make clean in basho_bench, then make
%% * edit examples/emdb.config
%% - change {code_paths, ["../emdb"]}. to be a relative path to your
%% emdb directory
%% - change {emdb_dir, "/home/gburd/ws/basho_bench/data"}. to a fully
%% qualified location for your test data files (mkdir that directory
%% yourself, if it doesn't exist the test will fail 'enoent')
%% * to run, replace this path with the proper path on your system:
%% LD_LIBRARY_PATH=/home/you/emdb/priv ./basho_bench examples/emdb.config
%% * the test should run for 10 minutes (as it is configured right now)
%% with 4 concurrent workers accessing the same table
%%
%% Note:
%% There are two config sections in wt.config {emdb, [ ... ]}. and
%% {emdb_, [ ... ]}. The one being used is named "emdb" the other
%% config is ignored. I setup an LSM and BTREE config and to choose
%% which is run you just rename those two sections (turn one off by
%% adding a "_" to the name and take the "_" out of the other's name).
{mode, max}.
{duration, 10}.
{concurrent, 8}.
{driver, basho_bench_driver_emdb}.
{key_generator, {int_to_bin_littleendian,{uniform_int, 5000000}}}.
{value_generator, {fixed_bin, 1024}}.
%{operations, [{get, 9}, {put, 9}, {delete, 2}]}.
{operations, [{put, 1}]}.
{code_paths, ["../emdb"]}.
{emdb_dir, "/home/gburd/ws/basho_bench/data"}.
{emdb, [ ]}.