Remove deprecated crypto:sha/1 in favor of crypto:hash/2.

Quel compiler warnings.
Update to LMDB@3f62b727ccf3424daca1cdc24bbf98c869f44699
2016-03-06 20:19:34 -05:00 · 2016-03-06 20:11:34 -05:00 · 2016-03-06 20:08:11 -05:00 · 2016-03-06 19:57:26 -05:00 · 2015-04-01 01:08:13 +02:00 · 2013-09-17 13:23:31 -04:00
17 changed files with 7810 additions and 3397 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ c_src/*.o
 deps/
 priv/
 *~
+.rebar
--- a/c_src/async_nif.h
+++ b/c_src/async_nif.h
@ -4,18 +4,16 @@
 * Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
 * Author: Gregory Burd <greg@basho.com> <greg@burd.me>
 *
- * This file is provided to you under the Apache License,
- * Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License.  You may obtain
- * a copy of the License at
+ * This file is provided to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
 * under the License.
 */

@ -27,18 +25,26 @@ extern "C" {
 #endif

 #include <assert.h>
-#include "fifo_q.h"
-#include "stats.h"

-#ifndef __UNUSED
-#define __UNUSED(v) ((void)(v))
+#include "queue.h"
+
+#ifndef UNUSED
+#define UNUSED(v) ((void)(v))
 #endif

-#define ASYNC_NIF_MAX_WORKERS 128
-#define ASYNC_NIF_WORKER_QUEUE_SIZE 500
-#define ASYNC_NIF_MAX_QUEUED_REQS 1000 * ASYNC_NIF_MAX_WORKERS
+#define ASYNC_NIF_MAX_WORKERS 1024
+#define ASYNC_NIF_MIN_WORKERS 2
+#define ASYNC_NIF_WORKER_QUEUE_SIZE 8192
+#define ASYNC_NIF_MAX_QUEUED_REQS ASYNC_NIF_WORKER_QUEUE_SIZE * ASYNC_NIF_MAX_WORKERS
+
+/* Atoms (initialized in on_load) */
+static ERL_NIF_TERM ATOM_EAGAIN;
+static ERL_NIF_TERM ATOM_ENOMEM;
+static ERL_NIF_TERM ATOM_ENQUEUED;
+static ERL_NIF_TERM ATOM_ERROR;
+static ERL_NIF_TERM ATOM_OK;
+static ERL_NIF_TERM ATOM_SHUTDOWN;

-STAT_DECL(qwait, 1000);

 struct async_nif_req_entry {
  ERL_NIF_TERM ref;
@ -47,14 +53,17 @@ struct async_nif_req_entry {
  void *args;
  void (*fn_work)(ErlNifEnv*, ERL_NIF_TERM, ErlNifPid*, unsigned int, void *);
  void (*fn_post)(void *);
+  STAILQ_ENTRY(async_nif_req_entry) entries;
 };
-DECL_FIFO_QUEUE(reqs, struct async_nif_req_entry);
+

 struct async_nif_work_queue {
-  STAT_DEF(qwait);
+  unsigned int num_workers;
+  unsigned int depth;
  ErlNifMutex *reqs_mutex;
  ErlNifCond *reqs_cnd;
-  FIFO_QUEUE_TYPE(reqs) reqs;
+  struct async_nif_work_queue *next;
+  STAILQ_HEAD(reqs, async_nif_req_entry) reqs;
 };

 struct async_nif_worker_entry {
@ -62,16 +71,17 @@ struct async_nif_worker_entry {
  unsigned int worker_id;
  struct async_nif_state *async_nif;
  struct async_nif_work_queue *q;
+  SLIST_ENTRY(async_nif_worker_entry) entries;
 };

 struct async_nif_state {
-  STAT_DEF(qwait);
  unsigned int shutdown;
-  unsigned int num_workers;
-  struct async_nif_worker_entry worker_entries[ASYNC_NIF_MAX_WORKERS];
+  ErlNifMutex *we_mutex;
+  unsigned int we_active;
+  SLIST_HEAD(joining, async_nif_worker_entry) we_joining;
  unsigned int num_queues;
  unsigned int next_q;
-  FIFO_QUEUE_TYPE(reqs) recycled_reqs;
+  STAILQ_HEAD(recycled_reqs, async_nif_req_entry) recycled_reqs;
  unsigned int num_reqs;
  ErlNifMutex *recycled_req_mutex;
  struct async_nif_work_queue queues[];
@ -80,39 +90,43 @@ struct async_nif_state {
 #define ASYNC_NIF_DECL(decl, frame, pre_block, work_block, post_block)  \
  struct decl ## _args frame;                                           \
  static void fn_work_ ## decl (ErlNifEnv *env, ERL_NIF_TERM ref, ErlNifPid *pid, unsigned int worker_id, struct decl ## _args *args) { \
-  __UNUSED(worker_id);                                                  \
+  UNUSED(worker_id);                                                    \
+  DPRINTF("async_nif: calling \"%s\"", __func__);                       \
  do work_block while(0);                                               \
+  DPRINTF("async_nif: returned from \"%s\"", __func__);                 \
  }                                                                     \
  static void fn_post_ ## decl (struct decl ## _args *args) {           \
-    __UNUSED(args);                                                     \
+    UNUSED(args);                                                       \
+    DPRINTF("async_nif: calling \"fn_post_%s\"", #decl);                \
    do post_block while(0);                                             \
+    DPRINTF("async_nif: returned from \"fn_post_%s\"", #decl);          \
  }                                                                     \
  static ERL_NIF_TERM decl(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv_in[]) { \
    struct decl ## _args on_stack_args;                                 \
    struct decl ## _args *args = &on_stack_args;                        \
    struct decl ## _args *copy_of_args;                                 \
    struct async_nif_req_entry *req = NULL;                             \
-    const char *affinity = NULL;                                        \
+    unsigned int affinity = 0;                                          \
    ErlNifEnv *new_env = NULL;                                          \
    /* argv[0] is a ref used for selective recv */                      \
    const ERL_NIF_TERM *argv = argv_in + 1;                             \
    argc -= 1;                                                          \
    /* Note: !!! this assumes that the first element of priv_data is ours */ \
    struct async_nif_state *async_nif = *(struct async_nif_state**)enif_priv_data(env); \
-    if (async_nif->shutdown)                                            \
-      return enif_make_tuple2(env, enif_make_atom(env, "error"),        \
-                              enif_make_atom(env, "shutdown"));         \
+    if (async_nif->shutdown)						\
+	return enif_make_tuple2(env, ATOM_ERROR, ATOM_SHUTDOWN);	\
    req = async_nif_reuse_req(async_nif);                               \
+    if (!req)								\
+        return enif_make_tuple2(env, ATOM_ERROR, ATOM_ENOMEM);		\
    new_env = req->env;                                                 \
-    if (!req)                                                           \
-      return enif_make_tuple2(env, enif_make_atom(env, "error"),        \
-                              enif_make_atom(env, "eagain"));           \
+    DPRINTF("async_nif: calling \"%s\"", __func__);                     \
    do pre_block while(0);                                              \
-    copy_of_args = (struct decl ## _args *)enif_alloc(sizeof(struct decl ## _args)); \
+    DPRINTF("async_nif: returned from \"%s\"", __func__);               \
+    copy_of_args = (struct decl ## _args *)malloc(sizeof(struct decl ## _args)); \
    if (!copy_of_args) {                                                \
      fn_post_ ## decl (args);                                          \
-      return enif_make_tuple2(env, enif_make_atom(env, "error"),        \
-                              enif_make_atom(env, "enomem"));           \
+      async_nif_recycle_req(req, async_nif);                            \
+      return enif_make_tuple2(env, ATOM_ERROR, ATOM_ENOMEM);		\
    }                                                                   \
    memcpy(copy_of_args, args, sizeof(struct decl ## _args));           \
    req->ref = enif_make_copy(new_env, argv_in[0]);                     \
@ -122,13 +136,13 @@ struct async_nif_state {
    req->fn_post = (void (*)(void *))fn_post_ ## decl;                 \
    int h = -1;                                                        \
    if (affinity)                                                      \
-        h = async_nif_str_hash_func(affinity) % async_nif->num_queues; \
+        h = ((unsigned int)affinity) % async_nif->num_queues;          \
    ERL_NIF_TERM reply = async_nif_enqueue_req(async_nif, req, h);     \
    if (!reply) {                                                      \
      fn_post_ ## decl (args);                                         \
-      enif_free(copy_of_args);                                         \
-      return enif_make_tuple2(env, enif_make_atom(env, "error"),       \
-                              enif_make_atom(env, "shutdown"));        \
+      async_nif_recycle_req(req, async_nif);                           \
+      free(copy_of_args);					       \
+      return enif_make_tuple2(env, ATOM_ERROR, ATOM_EAGAIN);	       \
    }                                                                  \
    return reply;                                                      \
  }
@ -136,16 +150,16 @@ struct async_nif_state {
 #define ASYNC_NIF_INIT(name)                                            \
        static ErlNifMutex *name##_async_nif_coord = NULL;

-#define ASYNC_NIF_LOAD(name, priv) do {                                 \
+#define ASYNC_NIF_LOAD(name, env, priv) do {				\
        if (!name##_async_nif_coord)                                    \
-            name##_async_nif_coord = enif_mutex_create(NULL);           \
+            name##_async_nif_coord = enif_mutex_create("nif_coord load"); \
        enif_mutex_lock(name##_async_nif_coord);                        \
-        priv = async_nif_load();                                        \
+        priv = async_nif_load(env);					\
        enif_mutex_unlock(name##_async_nif_coord);                      \
    } while(0);
 #define ASYNC_NIF_UNLOAD(name, env, priv) do {                          \
        if (!name##_async_nif_coord)                                    \
-            name##_async_nif_coord = enif_mutex_create(NULL);           \
+            name##_async_nif_coord = enif_mutex_create("nif_coord unload"); \
        enif_mutex_lock(name##_async_nif_coord);                        \
        async_nif_unload(env, priv);                                    \
        enif_mutex_unlock(name##_async_nif_coord);                      \
@ -154,7 +168,7 @@ struct async_nif_state {
    } while(0);
 #define ASYNC_NIF_UPGRADE(name, env) do {                               \
        if (!name##_async_nif_coord)                                    \
-            name##_async_nif_coord = enif_mutex_create(NULL);           \
+            name##_async_nif_coord = enif_mutex_create("nif_coord upgrade"); \
        enif_mutex_lock(name##_async_nif_coord);                        \
        async_nif_upgrade(env);                                         \
        enif_mutex_unlock(name##_async_nif_coord);                      \
@ -179,26 +193,26 @@ async_nif_reuse_req(struct async_nif_state *async_nif)
    ErlNifEnv *env = NULL;

    enif_mutex_lock(async_nif->recycled_req_mutex);
-    if (fifo_q_empty(reqs, async_nif->recycled_reqs)) {
+    if (STAILQ_EMPTY(&async_nif->recycled_reqs)) {
        if (async_nif->num_reqs < ASYNC_NIF_MAX_QUEUED_REQS) {
-            req = enif_alloc(sizeof(struct async_nif_req_entry));
+            req = malloc(sizeof(struct async_nif_req_entry));
            if (req) {
                memset(req, 0, sizeof(struct async_nif_req_entry));
                env = enif_alloc_env();
-                if (!env) {
-                    enif_free(req);
-                    req = NULL;
-                } else {
+                if (env) {
                    req->env = env;
-                    async_nif->num_reqs++;
+                    __sync_fetch_and_add(&async_nif->num_reqs, 1);
+                } else {
+                    free(req);
+                    req = NULL;
                }
            }
        }
    } else {
-        req = fifo_q_get(reqs, async_nif->recycled_reqs);
+        req = STAILQ_FIRST(&async_nif->recycled_reqs);
+        STAILQ_REMOVE(&async_nif->recycled_reqs, req, async_nif_req_entry, entries);
    }
    enif_mutex_unlock(async_nif->recycled_req_mutex);
-    STAT_TICK(async_nif, qwait);
    return req;
 }

@ -212,27 +226,59 @@ async_nif_reuse_req(struct async_nif_state *async_nif)
 void
 async_nif_recycle_req(struct async_nif_req_entry *req, struct async_nif_state *async_nif)
 {
-    STAT_TOCK(async_nif, qwait);
+    ErlNifEnv *env = NULL;
    enif_mutex_lock(async_nif->recycled_req_mutex);
-    fifo_q_put(reqs, async_nif->recycled_reqs, req);
+    enif_clear_env(req->env);
+    env = req->env;
+    memset(req, 0, sizeof(struct async_nif_req_entry));
+    req->env = env;
+    STAILQ_INSERT_TAIL(&async_nif->recycled_reqs, req, entries);
    enif_mutex_unlock(async_nif->recycled_req_mutex);
 }

+static void *async_nif_worker_fn(void *);
+
 /**
- * A string hash function.
- *
- * A basic hash function for strings of characters used during the
- * affinity association.
- *
- * s    a NULL terminated set of bytes to be hashed
- * ->   an integer hash encoding of the bytes
+ * Start up a worker thread.
 */
-static inline unsigned int
-async_nif_str_hash_func(const char *s)
+static int
+async_nif_start_worker(struct async_nif_state *async_nif, struct async_nif_work_queue *q)
 {
-  unsigned int h = (unsigned int)*s;
-  if (h) for (++s ; *s; ++s) h = (h << 5) - h + (unsigned int)*s;
-  return h;
+  struct async_nif_worker_entry *we;
+
+  if (0 == q)
+      return EINVAL;
+
+  enif_mutex_lock(async_nif->we_mutex);
+
+  we = SLIST_FIRST(&async_nif->we_joining);
+  while(we != NULL) {
+    struct async_nif_worker_entry *n = SLIST_NEXT(we, entries);
+    SLIST_REMOVE(&async_nif->we_joining, we, async_nif_worker_entry, entries);
+    void *exit_value = 0; /* We ignore the thread_join's exit value. */
+    enif_thread_join(we->tid, &exit_value);
+    free(we);
+    async_nif->we_active--;
+    we = n;
+  }
+
+  if (async_nif->we_active == ASYNC_NIF_MAX_WORKERS) {
+      enif_mutex_unlock(async_nif->we_mutex);
+      return EAGAIN;
+  }
+
+  we = malloc(sizeof(struct async_nif_worker_entry));
+  if (!we) {
+      enif_mutex_unlock(async_nif->we_mutex);
+      return ENOMEM;
+  }
+  memset(we, 0, sizeof(struct async_nif_worker_entry));
+  we->worker_id = async_nif->we_active++;
+  we->async_nif = async_nif;
+  we->q = q;
+
+  enif_mutex_unlock(async_nif->we_mutex);
+  return enif_thread_create(NULL,&we->tid, &async_nif_worker_fn, (void*)we, 0);
 }

 /**
@ -245,9 +291,9 @@ static ERL_NIF_TERM
 async_nif_enqueue_req(struct async_nif_state* async_nif, struct async_nif_req_entry *req, int hint)
 {
  /* Identify the most appropriate worker for this request. */
-  unsigned int qid = 0;
+  unsigned int i, last_qid, qid = 0;
  struct async_nif_work_queue *q = NULL;
-  unsigned int n = async_nif->num_queues;
+  double avg_depth = 0.0;

  /* Either we're choosing a queue based on some affinity/hinted value or we
     need to select the next queue in the rotation and atomically update that
@ -255,53 +301,76 @@ async_nif_enqueue_req(struct async_nif_state* async_nif, struct async_nif_req_en
  if (hint >= 0) {
      qid = (unsigned int)hint;
  } else {
-      qid = async_nif->next_q;
-      qid = (qid + 1) % async_nif->num_queues;
-      async_nif->next_q = qid;
+      do {
+          last_qid = __sync_fetch_and_add(&async_nif->next_q, 0);
+          qid = (last_qid + 1) % async_nif->num_queues;
+      } while (!__sync_bool_compare_and_swap(&async_nif->next_q, last_qid, qid));
  }

  /* Now we inspect and interate across the set of queues trying to select one
     that isn't too full or too slow. */
-  do {
+  for (i = 0; i < async_nif->num_queues; i++) {
+      /* Compute the average queue depth not counting queues which are empty or
+         the queue we're considering right now. */
+      unsigned int j, n = 0;
+      for (j = 0; j < async_nif->num_queues; j++) {
+          if (j != qid && async_nif->queues[j].depth != 0) {
+              n++;
+              avg_depth += async_nif->queues[j].depth;
+          }
+      }
+      if (avg_depth) avg_depth /= n;
+
+      /* Lock this queue under consideration, then check for shutdown.  While
+         we hold this lock either a) we're shutting down so exit now or b) this
+         queue will be valid until we release the lock. */
      q = &async_nif->queues[qid];
      enif_mutex_lock(q->reqs_mutex);

-      /* Now that we hold the lock, check for shutdown.  As long as we hold
-         this lock either a) we're shutting down so exit now or b) this queue
-         will be valid until we release the lock. */
-      if (async_nif->shutdown) {
-          enif_mutex_unlock(q->reqs_mutex);
-          return 0;
-      }
-      double await = STAT_MEAN_LOG2_SAMPLE(async_nif, qwait);
-      double await_inthisq = STAT_MEAN_LOG2_SAMPLE(q, qwait);
-      if (fifo_q_full(reqs, q->reqs) || await_inthisq > await) {
+      /* Try not to enqueue a request into a queue that isn't keeping up with
+         the request volume. */
+      if (q->depth <= avg_depth) break;
+      else {
          enif_mutex_unlock(q->reqs_mutex);
          qid = (qid + 1) % async_nif->num_queues;
-          q = &async_nif->queues[qid];
-      } else {
-          break;
      }
-      // TODO: at some point add in work sheading/stealing
-  } while(n-- > 0);
+  }

-  /* We hold the queue's lock, and we've seletect a reasonable queue for this
-     new request so add the request. */
-  STAT_TICK(q, qwait);
-  fifo_q_put(reqs, q->reqs, req);
+  /* If the for loop finished then we didn't find a suitable queue for this
+     request, meaning we're backed up so trigger eagain.  Note that if we left
+     the loop in this way we hold no lock. */
+  if (i == async_nif->num_queues) return 0;
+
+  /* Add the request to the queue. */
+  STAILQ_INSERT_TAIL(&q->reqs, req, entries);
+  __sync_fetch_and_add(&q->depth, 1);
+
+  /* We've selected a queue for this new request now check to make sure there are
+     enough workers actively processing requests on this queue. */
+  while (q->depth > q->num_workers) {
+      switch(async_nif_start_worker(async_nif, q)) {
+      case EINVAL: case ENOMEM: default: return 0;
+      case EAGAIN: continue;
+      case 0:      __sync_fetch_and_add(&q->num_workers, 1); goto done;
+      }
+  }done:;

  /* Build the term before releasing the lock so as not to race on the use of
     the req pointer (which will soon become invalid in another thread
     performing the request). */
-  ERL_NIF_TERM reply = enif_make_tuple2(req->env, enif_make_atom(req->env, "ok"),
-                                        enif_make_atom(req->env, "enqueued"));
-  enif_mutex_unlock(q->reqs_mutex);
+  double pct_full = (double)avg_depth / (double)ASYNC_NIF_WORKER_QUEUE_SIZE;
+  ERL_NIF_TERM reply = enif_make_tuple2(req->env, ATOM_OK,
+					enif_make_tuple2(req->env, ATOM_ENQUEUED,
+							 enif_make_double(req->env, pct_full)));
  enif_cond_signal(q->reqs_cnd);
+  enif_mutex_unlock(q->reqs_mutex);
  return reply;
 }

 /**
- * TODO:
+ * Worker threads execute this function.  Here each worker pulls requests of
+ * their respective queues, executes that work and continues doing that until
+ * they see the shutdown flag is set at which point they exit.
 */
 static void *
 async_nif_worker_fn(void *arg)
@ -311,6 +380,7 @@ async_nif_worker_fn(void *arg)
  struct async_nif_state *async_nif = we->async_nif;
  struct async_nif_work_queue *q = we->q;
  struct async_nif_req_entry *req = NULL;
+  unsigned int tries = async_nif->num_queues;

  for(;;) {
    /* Examine the request queue, are there things to be done? */
@ -320,26 +390,40 @@ async_nif_worker_fn(void *arg)
        enif_mutex_unlock(q->reqs_mutex);
        break;
    }
-    if (fifo_q_empty(reqs, q->reqs)) {
+    if (STAILQ_EMPTY(&q->reqs)) {
      /* Queue is empty so we wait for more work to arrive. */
-      STAT_RESET(q, qwait);
-      enif_cond_wait(q->reqs_cnd, q->reqs_mutex);
-      goto check_again_for_work;
+	enif_mutex_unlock(q->reqs_mutex);
+	if (tries == 0 && q == we->q) {
+	    if (q->num_workers > ASYNC_NIF_MIN_WORKERS) {
+		/* At this point we've tried to find/execute work on all queues
+		 * and there are at least MIN_WORKERS on this queue so we
+		 * leaving this loop (break) which leads to a thread exit/join. */
+		break;
+	    } else {
+		enif_mutex_lock(q->reqs_mutex);
+		enif_cond_wait(q->reqs_cnd, q->reqs_mutex);
+		goto check_again_for_work;
+	    }
+	} else {
+	    tries--;
+	    __sync_fetch_and_add(&q->num_workers, -1);
+	    q = q->next;
+	    __sync_fetch_and_add(&q->num_workers, 1);
+	    continue; // try next queue
+	}
    } else {
-      assert(fifo_q_size(reqs, q->reqs) > 0);
-      assert(fifo_q_size(reqs, q->reqs) < fifo_q_capacity(reqs, q->reqs));
      /* At this point the next req is ours to process and we hold the
         reqs_mutex lock.  Take the request off the queue. */
-      req = fifo_q_get(reqs, q->reqs);
-      enif_mutex_unlock(q->reqs_mutex);
+      req = STAILQ_FIRST(&q->reqs);
+      STAILQ_REMOVE(&q->reqs, req, async_nif_req_entry, entries);
+      __sync_fetch_and_add(&q->depth, -1);

-      /* Ensure that there is at least one other worker thread watching this
-         queue. */
+      /* Wake up other worker thread watching this queue to help process work. */
      enif_cond_signal(q->reqs_cnd);
+      enif_mutex_unlock(q->reqs_mutex);

      /* Perform the work. */
      req->fn_work(req->env, req->ref, &req->pid, worker_id, req->args);
-      STAT_TOCK(q, qwait);

      /* Now call the post-work cleanup function. */
      req->fn_post(req->args);
@ -348,13 +432,16 @@ async_nif_worker_fn(void *arg)
      req->ref = 0;
      req->fn_work = 0;
      req->fn_post = 0;
-      enif_free(req->args);
+      free(req->args);
      req->args = NULL;
-      enif_clear_env(req->env);
      async_nif_recycle_req(req, async_nif);
      req = NULL;
    }
  }
+  enif_mutex_lock(async_nif->we_mutex);
+  SLIST_INSERT_HEAD(&async_nif->we_joining, we, entries);
+  enif_mutex_unlock(async_nif->we_mutex);
+  __sync_fetch_and_add(&q->num_workers, -1);
  enif_thread_exit(0);
  return 0;
 }
@ -366,41 +453,44 @@ async_nif_unload(ErlNifEnv *env, struct async_nif_state *async_nif)
  unsigned int num_queues = async_nif->num_queues;
  struct async_nif_work_queue *q = NULL;
  struct async_nif_req_entry *req = NULL;
-  __UNUSED(env);
+  struct async_nif_worker_entry *we = NULL;
+  UNUSED(env);

-  STAT_PRINT(async_nif, qwait, "wterl");
-
-  /* Signal the worker threads, stop what you're doing and exit.  To
-     ensure that we don't race with the enqueue() process we first
-     lock all the worker queues, then set shutdown to true, then
-     unlock.  The enqueue function will take the queue mutex, then
-     test for shutdown condition, then enqueue only if not shutting
-     down. */
+  /* Signal the worker threads, stop what you're doing and exit.  To ensure
+     that we don't race with the enqueue() process we first lock all the worker
+     queues, then set shutdown to true, then unlock.  The enqueue function will
+     take the queue mutex, then test for shutdown condition, then enqueue only
+     if not shutting down. */
  for (i = 0; i < num_queues; i++) {
      q = &async_nif->queues[i];
      enif_mutex_lock(q->reqs_mutex);
  }
+  /* Set the shutdown flag so that worker threads will no continue
+     executing requests. */
  async_nif->shutdown = 1;
  for (i = 0; i < num_queues; i++) {
      q = &async_nif->queues[i];
-      enif_cond_broadcast(q->reqs_cnd);
      enif_mutex_unlock(q->reqs_mutex);
  }

  /* Join for the now exiting worker threads. */
-  for (i = 0; i < async_nif->num_workers; ++i) {
-    void *exit_value = 0; /* We ignore the thread_join's exit value. */
-    enif_thread_join(async_nif->worker_entries[i].tid, &exit_value);
+  while(async_nif->we_active > 0) {
+      for (i = 0; i < num_queues; i++)
+          enif_cond_broadcast(async_nif->queues[i].reqs_cnd);
+      enif_mutex_lock(async_nif->we_mutex);
+      we = SLIST_FIRST(&async_nif->we_joining);
+      while(we != NULL) {
+          struct async_nif_worker_entry *n = SLIST_NEXT(we, entries);
+          SLIST_REMOVE(&async_nif->we_joining, we, async_nif_worker_entry, entries);
+          void *exit_value = 0; /* We ignore the thread_join's exit value. */
+          enif_thread_join(we->tid, &exit_value);
+          free(we);
+          async_nif->we_active--;
+          we = n;
+      }
+      enif_mutex_unlock(async_nif->we_mutex);
  }
-
-  /* Free req structres sitting on the recycle queue. */
-  enif_mutex_lock(async_nif->recycled_req_mutex);
-  req = NULL;
-  fifo_q_foreach(reqs, async_nif->recycled_reqs, req, {
-      enif_free_env(req->env);
-      enif_free(req);
-  });
-  fifo_q_free(reqs, async_nif->recycled_reqs);
+  enif_mutex_destroy(async_nif->we_mutex);

  /* Cleanup in-flight requests, mutexes and conditions in each work queue. */
  for (i = 0; i < num_queues; i++) {
@ -408,32 +498,44 @@ async_nif_unload(ErlNifEnv *env, struct async_nif_state *async_nif)

      /* Worker threads are stopped, now toss anything left in the queue. */
      req = NULL;
-      fifo_q_foreach(reqs, q->reqs, req, {
+      req = STAILQ_FIRST(&q->reqs);
+      while(req != NULL) {
+          struct async_nif_req_entry *n = STAILQ_NEXT(req, entries);
          enif_clear_env(req->env);
          enif_send(NULL, &req->pid, req->env,
-                    enif_make_tuple2(req->env, enif_make_atom(req->env, "error"),
-                                     enif_make_atom(req->env, "shutdown")));
+		    enif_make_tuple2(req->env, ATOM_ERROR, ATOM_SHUTDOWN));
          req->fn_post(req->args);
          enif_free_env(req->env);
-          enif_free(req->args);
-          enif_free(req);
-          });
-      fifo_q_free(reqs, q->reqs);
+          free(req->args);
+          free(req);
+          req = n;
+      }
      enif_mutex_destroy(q->reqs_mutex);
      enif_cond_destroy(q->reqs_cnd);
  }

+  /* Free any req structures sitting unused on the recycle queue. */
+  enif_mutex_lock(async_nif->recycled_req_mutex);
+  req = NULL;
+  req = STAILQ_FIRST(&async_nif->recycled_reqs);
+  while(req != NULL) {
+      struct async_nif_req_entry *n = STAILQ_NEXT(req, entries);
+      enif_free_env(req->env);
+      free(req);
+      req = n;
+  }
+
  enif_mutex_unlock(async_nif->recycled_req_mutex);
  enif_mutex_destroy(async_nif->recycled_req_mutex);
  memset(async_nif, 0, sizeof(struct async_nif_state) + (sizeof(struct async_nif_work_queue) * async_nif->num_queues));
-  enif_free(async_nif);
+  free(async_nif);
 }

 static void *
-async_nif_load()
+async_nif_load(ErlNifEnv *env)
 {
  static int has_init = 0;
-  unsigned int i, j, num_queues;
+  unsigned int i, num_queues;
  ErlNifSysInfo info;
  struct async_nif_state *async_nif;

@ -441,6 +543,14 @@ async_nif_load()
  if (has_init) return 0;
  else has_init = 1;

+  /* Init some static references to commonly used atoms. */
+  ATOM_EAGAIN = enif_make_atom(env, "eagain");
+  ATOM_ENOMEM = enif_make_atom(env, "enomem");
+  ATOM_ENQUEUED = enif_make_atom(env, "enqueued");
+  ATOM_ERROR = enif_make_atom(env, "error");
+  ATOM_OK = enif_make_atom(env, "ok");
+  ATOM_SHUTDOWN = enif_make_atom(env, "shutdown");
+
  /* Find out how many schedulers there are. */
  enif_system_info(&info, sizeof(ErlNifSysInfo));

@ -458,62 +568,28 @@ async_nif_load()
  }

  /* Init our portion of priv_data's module-specific state. */
-  async_nif = enif_alloc(sizeof(struct async_nif_state) +
-                         sizeof(struct async_nif_work_queue) * num_queues);
+  async_nif = malloc(sizeof(struct async_nif_state) +
+		     sizeof(struct async_nif_work_queue) * num_queues);
  if (!async_nif)
      return NULL;
  memset(async_nif, 0, sizeof(struct async_nif_state) +
-         sizeof(struct async_nif_work_queue) * num_queues);
+                       sizeof(struct async_nif_work_queue) * num_queues);

  async_nif->num_queues = num_queues;
-  async_nif->num_workers = 2 * num_queues;
+  async_nif->we_active = 0;
  async_nif->next_q = 0;
  async_nif->shutdown = 0;
-  async_nif->recycled_reqs = fifo_q_new(reqs, ASYNC_NIF_MAX_QUEUED_REQS);
-  async_nif->recycled_req_mutex = enif_mutex_create(NULL);
-  STAT_INIT(async_nif, qwait);
+  STAILQ_INIT(&async_nif->recycled_reqs);
+  async_nif->recycled_req_mutex = enif_mutex_create("recycled_req");
+  async_nif->we_mutex = enif_mutex_create("we");
+  SLIST_INIT(&async_nif->we_joining);

  for (i = 0; i < async_nif->num_queues; i++) {
      struct async_nif_work_queue *q = &async_nif->queues[i];
-      q->reqs = fifo_q_new(reqs, ASYNC_NIF_WORKER_QUEUE_SIZE);
-      q->reqs_mutex = enif_mutex_create(NULL);
-      q->reqs_cnd = enif_cond_create(NULL);
-      STAT_INIT(q, qwait);
-  }
-
-  /* Setup the thread pool management. */
-  memset(async_nif->worker_entries, 0, sizeof(struct async_nif_worker_entry) * ASYNC_NIF_MAX_WORKERS);
-
-  /* Start the worker threads. */
-  for (i = 0; i < async_nif->num_workers; i++) {
-    struct async_nif_worker_entry *we = &async_nif->worker_entries[i];
-    we->async_nif = async_nif;
-    we->worker_id = i;
-    we->q = &async_nif->queues[i % async_nif->num_queues];
-    if (enif_thread_create(NULL, &async_nif->worker_entries[i].tid,
-                            &async_nif_worker_fn, (void*)we, NULL) != 0) {
-      async_nif->shutdown = 1;
-
-      for (j = 0; j < async_nif->num_queues; j++) {
-          struct async_nif_work_queue *q = &async_nif->queues[j];
-          enif_cond_broadcast(q->reqs_cnd);
-      }
-
-      while(i-- > 0) {
-        void *exit_value = 0; /* Ignore this. */
-        enif_thread_join(async_nif->worker_entries[i].tid, &exit_value);
-      }
-
-      for (j = 0; j < async_nif->num_queues; j++) {
-          struct async_nif_work_queue *q = &async_nif->queues[j];
-          enif_mutex_destroy(q->reqs_mutex);
-          enif_cond_destroy(q->reqs_cnd);
-      }
-
-      memset(async_nif->worker_entries, 0, sizeof(struct async_nif_worker_entry) * ASYNC_NIF_MAX_WORKERS);
-      enif_free(async_nif);
-      return NULL;
-    }
+      STAILQ_INIT(&q->reqs);
+      q->reqs_mutex = enif_mutex_create("reqs");
+      q->reqs_cnd = enif_cond_create("reqs");
+      q->next = &async_nif->queues[(i + 1) % num_queues];
  }
  return async_nif;
 }
@ -521,7 +597,7 @@ async_nif_load()
 static void
 async_nif_upgrade(ErlNifEnv *env)
 {
-     __UNUSED(env);
+     UNUSED(env);
    // TODO:
 }

--- a/c_src/common.h
+++ b/c_src/common.h
@ -24,24 +24,28 @@
 extern "C" {
 #endif

-#ifdef DEBUG
+#if !(__STDC_VERSION__ >= 199901L || defined(__GNUC__))
+# undef  DEBUG
+# define DEBUG		0
+# define DPRINTF	(void)	/* Vararg macros may be unsupported */
+#elif DEBUG
 #include <stdio.h>
 #include <stdarg.h>
-#ifndef DPRINTF
 #define DPRINTF(fmt, ...)							\
    do {									\
-	fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__);	\
+	fprintf(stderr, "%s:%d " fmt "\n", __FILE__, __LINE__, __VA_ARGS__);    \
 	fflush(stderr);								\
    } while(0)
-#endif
-#ifndef DPUTS
-#define DPUTS(arg)	DPRINTF("%s", arg)
-#endif
+#define DPUTS(arg)		DPRINTF("%s", arg)
 #else
 #define DPRINTF(fmt, ...)	((void) 0)
 #define DPUTS(arg)		((void) 0)
 #endif

+#ifndef __UNUSED
+#define __UNUSED(v) ((void)(v))
+#endif
+
 #ifndef COMPQUIET
 #define COMPQUIET(n, v) do {                                            \
        (n) = (v);                                                      \
@ -49,11 +53,12 @@ extern "C" {
 } while (0)
 #endif

-#ifndef __UNUSED
-#define __UNUSED(v) ((void)(v))
+#ifdef __APPLE__
+#define PRIuint64(x) (x)
+#else
+#define PRIuint64(x) (unsigned long long)(x)
 #endif

-
 #if defined(__cplusplus)
 }
 #endif
--- a/c_src/duration.h
+++ b/c_src/duration.h
@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2013, all rights reserved by Gregory Burd <greg@burd.me>
- *
- * This Source Code Form is subject to the terms of the Mozilla Public License,
- * version 2 (MPLv2).  If a copy of the MPL was not distributed with this file,
- * you can obtain one at: http://mozilla.org/MPL/2.0/
- *
- * NOTES:
- *    - on some platforms this will require -lrt
- */
-#include <stdio.h>
-#include <stdint.h>
-#include <time.h>
-#include <sys/timeb.h>
-
-typedef enum { ns = 0, mcs, ms, s } time_scale;
-struct scale_time {
-     const char *abbreviation;
-     const char *name;
-     uint64_t mul, div, overhead, ticks_per;
-};
-static const struct scale_time scale[] = {
-     { "ns",  "nanosecond",  1000000000LL, 1LL, 10, 2300000000000LL },
-     { "mcs", "microsecond", 1000000LL, 1000LL, 10, 2300000000LL },
-     { "ms",  "millisecond", 1000LL, 1000000LL, 10, 2300000LL },
-     { "sec", "second",      1LL, 1000000000LL, 10, 2300LL } };
-
-static uint64_t ts(time_scale unit)
-{
-    struct timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    return (((uint64_t)ts.tv_sec * scale[unit].mul) +
-            ((uint64_t)ts.tv_nsec / scale[unit].div));
-}
-
-#if 0
-//if defined(__i386__) || defined(__x86_64__)
-
-/**
- * cpu_clock_ticks()
- *
- * A measure provided by Intel x86 CPUs which provides the number of cycles
- * (aka "ticks") executed as a counter using the RDTSC instruction.
- */
-static inline uint64_t cpu_clock_ticks()
-{
-     uint32_t lo, hi;
-     __asm__ __volatile__ (
-          "xorl %%eax, %%eax\n"
-          "cpuid\n"
-          "rdtsc\n"
-          : "=a" (lo), "=d" (hi)
-          :
-          : "%ebx", "%ecx" );
-     return (uint64_t)hi << 32 | lo;
-}
-
-/**
- * cpu_clock_ticks()
- *
- * An approximation of elapsed [ns, mcs, ms, s] from CPU clock ticks.
- */
-static uint64_t elapsed_cpu_clock_ticks(uint64_t start, time_scale unit)
-{
-    return (cpu_clock_ticks() - start - scale[unit].overhead) * scale[unit].ticks_per;
-}
-
-#endif
-
-typedef struct {
-     uint64_t then;
-     time_scale unit;
-} duration_t;
-
-static inline uint64_t elapsed(duration_t *d)
-{
-     uint64_t now = ts(d->unit);
-     uint64_t elapsed = now - d->then;
-     d->then = now;
-     return elapsed;
-}
-
-#define DURATION(name, resolution) duration_t name =    \
-     {ts(resolution), resolution}
-
-#define ELAPSED_DURING(result, resolution, block)       \
-     do {                                               \
-          DURATION(__x, resolution);                    \
-          do block while(0);                            \
-          *result = elapsed(&__x);                      \
-     } while(0);
-
-#define CYCLES_DURING(result, block)                    \
-     do {                                               \
-         uint64_t __begin = cpu_clock_ticks();          \
-         do block while(0);                             \
-         *result = cpu_clock_ticks() - __begin;         \
-     } while(0);
--- a/c_src/fifo_q.h
+++ b/c_src/fifo_q.h
@ -1,93 +0,0 @@
-/*
- * fifo_q: a macro-based implementation of a FIFO Queue
- *
- * Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
- * Author: Gregory Burd <greg@basho.com> <greg@burd.me>
- *
- * This file is provided to you under the Apache License,
- * Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License.  You may obtain
- * a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef __FIFO_Q_H__
-#define __FIFO_Q_H__
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define FIFO_QUEUE_TYPE(name)             \
-  struct fifo_q__ ## name *
-#define DECL_FIFO_QUEUE(name, type)       \
-  struct fifo_q__ ## name {               \
-    unsigned int h, t, s;                 \
-    type *items[];                        \
-  };                                      \
-  static struct fifo_q__ ## name *fifo_q_ ## name ## _new(unsigned int n) { \
-    int sz = sizeof(struct fifo_q__ ## name) + ((n+1) * sizeof(type *));\
-    struct fifo_q__ ## name *q = enif_alloc(sz);                        \
-    if (!q)                                                             \
-        return 0;                                                       \
-    memset(q, 0, sz);                                                   \
-    q->s = n + 1;                                                       \
-    return q;                                                           \
-  }                                                                     \
-  static inline void fifo_q_ ## name ## _free(struct fifo_q__ ## name *q) {    \
-    memset(q, 0, sizeof(struct fifo_q__ ## name) + (q->s * sizeof(type *))); \
-    enif_free(q);                                                       \
-  }                                                                     \
-  static inline type *fifo_q_ ## name ## _put(struct fifo_q__ ## name *q, type *n) { \
-    q->items[q->h] = n;                                                 \
-    q->h = (q->h + 1) % q->s;                                           \
-    return n;                                                           \
-  }                                                                     \
-  static inline type *fifo_q_ ## name ## _get(struct fifo_q__ ## name *q) {    \
-    type *n = q->items[q->t];                                           \
-    q->items[q->t] = 0;                                                 \
-    q->t = (q->t + 1) % q->s;                                           \
-    return n;                                                           \
-  }                                                                     \
-  static inline unsigned int fifo_q_ ## name ## _size(struct fifo_q__ ## name *q) { \
-    return (q->h - q->t + q->s) % q->s;                                 \
-  }                                                                     \
-  static inline unsigned int fifo_q_ ## name ## _capacity(struct fifo_q__ ## name *q) { \
-    return q->s - 1;                                                    \
-  }                                                                     \
-  static inline int fifo_q_ ## name ## _empty(struct fifo_q__ ## name *q) {    \
-    return (q->t == q->h);                                              \
-  }                                                                     \
-  static inline int fifo_q_ ## name ## _full(struct fifo_q__ ## name *q) {     \
-    return ((q->h + 1) % q->s) == q->t;                                 \
-  }
-
-#define fifo_q_new(name, size) fifo_q_ ## name ## _new(size)
-#define fifo_q_free(name, queue) fifo_q_ ## name ## _free(queue)
-#define fifo_q_get(name, queue) fifo_q_ ## name ## _get(queue)
-#define fifo_q_put(name, queue, item) fifo_q_ ## name ## _put(queue, item)
-#define fifo_q_size(name, queue) fifo_q_ ## name ## _size(queue)
-#define fifo_q_capacity(name, queue) fifo_q_ ## name ## _capacity(queue)
-#define fifo_q_empty(name, queue) fifo_q_ ## name ## _empty(queue)
-#define fifo_q_full(name, queue) fifo_q_ ## name ## _full(queue)
-#define fifo_q_foreach(name, queue, item, task) do {                    \
-    while(!fifo_q_ ## name ## _empty(queue)) {                          \
-      item = fifo_q_ ## name ## _get(queue);                            \
-      do task while(0);                                                 \
-    }                                                                   \
-  } while(0);
-
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif // __FIFO_Q_H__
--- a/c_src/lmdb.c
+++ b/c_src/lmdb.c
--- a/c_src/lmdb.h
+++ b/c_src/lmdb.h
@ -1,10 +1,10 @@
 /** @file lmdb.h
 *	@brief Lightning memory-mapped database library
 *
- *	@mainpage	Lightning Memory-Mapped Database Manager (MDB)
+ *	@mainpage	Lightning Memory-Mapped Database Manager (LMDB)
 *
 *	@section intro_sec Introduction
- *	MDB is a Btree-based database management library modeled loosely on the
+ *	LMDB is a Btree-based database management library modeled loosely on the
 *	BerkeleyDB API, but much simplified. The entire database is exposed
 *	in a memory map, and all data fetches return data directly
 *	from the mapped memory, so no malloc's or memcpy's occur during
@ -26,10 +26,10 @@
 *	readers, and readers don't block writers.
 *
 *	Unlike other well-known database mechanisms which use either write-ahead
- *	transaction logs or append-only data writes, MDB requires no maintenance
+ *	transaction logs or append-only data writes, LMDB requires no maintenance
 *	during operation. Both write-ahead loggers and append-only databases
 *	require periodic checkpointing and/or compaction of their log or database
- *	files otherwise they grow without bound. MDB tracks free pages within
+ *	files otherwise they grow without bound. LMDB tracks free pages within
 *	the database and re-uses them for new write operations, so the database
 *	size does not grow without bound in normal use.
 *
@ -40,6 +40,9 @@
 *	corrupt the database. Of course if your application code is known to
 *	be bug-free (...) then this is not an issue.
 *
+ *	If this is your first time using a transactional embedded key/value
+ *	store, you may find the \ref starting page to be helpful.
+ *
 *	@section caveats_sec Caveats
 *	Troubleshooting the lock file, plus semaphores on BSD systems:
 *
@ -48,10 +51,17 @@
 *	  cause further writes to grow the database quickly, and
 *	  stale locks can block further operation.
 *
- *	  Fix: Terminate all programs using the database, or make
- *	  them close it.  Next database user will reset the lockfile.
+ *	  Fix: Check for stale readers periodically, using the
+ *	  #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool.
+ *	  Stale writers will be cleared automatically on most systems:
+ *	  - Windows - automatic
+ *	  - BSD, systems using SysV semaphores - automatic
+ *	  - Linux, systems using POSIX mutexes with Robust option - automatic
+ *	  Otherwise just make all programs using the database close it;
+ *	  the lockfile is always reset on first open of the environment.
 *
- *	- On BSD systems or others configured with MDB_USE_POSIX_SEM,
+ *	- On BSD systems or others configured with MDB_USE_SYSV_SEM or
+ *	  MDB_USE_POSIX_SEM,
 *	  startup can fail due to semaphores owned by another userid.
 *
 *	  Fix: Open and close the database as the user which owns the
@ -64,13 +74,32 @@
 *	  BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
 *	  Multiple users can cause startup to fail later, as noted above.
 *
+ *	- There is normally no pure read-only mode, since readers need write
+ *	  access to locks and lock file. Exceptions: On read-only filesystems
+ *	  or with the #MDB_NOLOCK flag described under #mdb_env_open().
+ *
+ *	- An LMDB configuration will often reserve considerable \b unused
+ *	  memory address space and maybe file size for future growth.
+ *	  This does not use actual memory or disk space, but users may need
+ *	  to understand the difference so they won't be scared off.
+ *
+ *	- By default, in versions before 0.9.10, unused portions of the data
+ *	  file might receive garbage data from memory freed by other code.
+ *	  (This does not happen when using the #MDB_WRITEMAP flag.) As of
+ *	  0.9.10 the default behavior is to initialize such memory before
+ *	  writing to the data file. Since there may be a slight performance
+ *	  cost due to this initialization, applications may disable it using
+ *	  the #MDB_NOMEMINIT flag. Applications handling sensitive data
+ *	  which must not be written should not use this flag. This flag is
+ *	  irrelevant when using #MDB_WRITEMAP.
+ *
 *	- A thread can only use one transaction at a time, plus any child
 *	  transactions.  Each transaction belongs to one thread.  See below.
 *	  The #MDB_NOTLS flag changes this for read-only transactions.
 *
 *	- Use an MDB_env* in the process which opened it, without fork()ing.
 *
- *	- Do not have open an MDB database twice in the same process at
+ *	- Do not have open an LMDB database twice in the same process at
 *	  the same time.  Not even from a plain open() call - close()ing it
 *	  breaks flock() advisory locking.
 *
@ -86,13 +115,17 @@
 *	...when several processes can use a database concurrently:
 *
 *	- Avoid aborting a process with an active transaction.
- *	  The transaction becomes "long-lived" as above until the lockfile
- *	  is reset, since the process may not remove it from the lockfile.
+ *	  The transaction becomes "long-lived" as above until a check
+ *	  for stale readers is performed or the lockfile is reset,
+ *	  since the process may not remove it from the lockfile.
 *
- *	- If you do that anyway, close the environment once in a while,
- *	  so the lockfile can get reset.
+ *	  This does not apply to write transactions if the system clears
+ *	  stale writers, see above.
 *
- *	- Do not use MDB databases on remote filesystems, even between
+ *	- If you do that anyway, do a periodic check for stale readers. Or
+ *	  close the environment once in a while, so the lockfile can get reset.
+ *
+ *	- Do not use LMDB databases on remote filesystems, even between
 *	  processes on the same host.  This breaks flock() on some OSes,
 *	  possibly memory map sync, and certainly sync between programs
 *	  on different hosts.
@ -102,7 +135,7 @@
 *
 *	@author	Howard Chu, Symas Corporation.
 *
- *	@copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved.
+ *	@copyright Copyright 2011-2016 Howard Chu, Symas Corp. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
@ -133,18 +166,37 @@
 #define _LMDB_H_

 #include <sys/types.h>
+#include <inttypes.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

+/** Unix permissions for creating files, or dummy definition for Windows */
 #ifdef _MSC_VER
 typedef	int	mdb_mode_t;
 #else
 typedef	mode_t	mdb_mode_t;
 #endif

-/** @defgroup mdb MDB API
+#ifdef MDB_VL32
+typedef uint64_t	mdb_size_t;
+#define mdb_env_create	mdb_env_create_vl32	/**< Prevent mixing with non-VL32 builds */
+#else
+typedef size_t	mdb_size_t;
+#endif
+
+/** An abstraction for a file handle.
+ *	On POSIX systems file handles are small integers. On Windows
+ *	they're opaque pointers.
+ */
+#ifdef _WIN32
+typedef	void *mdb_filehandle_t;
+#else
+typedef int mdb_filehandle_t;
+#endif
+
+/** @defgroup mdb LMDB API
 *	@{
 *	@brief OpenLDAP Lightning Memory-Mapped Database Manager
 */
@ -156,7 +208,7 @@ typedef	mode_t	mdb_mode_t;
 /** Library minor version */
 #define MDB_VERSION_MINOR	9
 /** Library patch version */
-#define MDB_VERSION_PATCH	6
+#define MDB_VERSION_PATCH	70

 /** Combine args a,b,c into a single integer for easy version comparisons */
 #define MDB_VERINT(a,b,c)	(((a) << 24) | ((b) << 16) | (c))
@ -166,10 +218,10 @@ typedef	mode_t	mdb_mode_t;
 	MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)

 /** The release date of this library version */
-#define MDB_VERSION_DATE	"January 10, 2013"
+#define MDB_VERSION_DATE	"December 19, 2015"

 /** A stringifier for the version info */
-#define MDB_VERSTR(a,b,c,d)	"MDB " #a "." #b "." #c ": (" d ")"
+#define MDB_VERSTR(a,b,c,d)	"LMDB " #a "." #b "." #c ": (" d ")"

 /** A helper for the stringifier macro */
 #define MDB_VERFOO(a,b,c,d)	MDB_VERSTR(a,b,c,d)
@ -202,13 +254,13 @@ typedef struct MDB_cursor MDB_cursor;
 /** @brief Generic structure used for passing keys and data in and out
 * of the database.
 *
- * Key sizes must be between 1 and the liblmdb build-time constant
- * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The
- * same applies to data sizes in databases with the #MDB_DUPSORT flag.
- * Other data items can in theory be from 0 to 0xffffffff bytes long.
- *
 * Values returned from the database are valid only until a subsequent
- * update operation, or the end of the transaction.
+ * update operation, or the end of the transaction. Do not modify or
+ * free them, they commonly point into the database itself.
+ *
+ * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive.
+ * The same applies to data sizes in databases with the #MDB_DUPSORT flag.
+ * Other data items can in theory be from 0 to 0xffffffff bytes long.
 */
 typedef struct MDB_val {
 	size_t		 mv_size;	/**< size of the data item */
@ -235,14 +287,12 @@ typedef int  (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
 typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);

 /** @defgroup	mdb_env	Environment Flags
- *
- *	Values do not overlap Database Flags.
 *	@{
 */
 	/** mmap at a fixed address (experimental) */
-#define MDB_FIXEDMAP		0x01
+#define MDB_FIXEDMAP	0x01
 	/** no environment directory */
-#define MDB_NOSUBDIR		0x4000
+#define MDB_NOSUBDIR	0x4000
 	/** don't fsync after commit */
 #define MDB_NOSYNC		0x10000
 	/** read only */
@ -251,27 +301,31 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
 #define MDB_NOMETASYNC		0x40000
 	/** use writable mmap */
 #define MDB_WRITEMAP		0x80000
-	/** use asynchronous msync when MDB_WRITEMAP is used */
+	/** use asynchronous msync when #MDB_WRITEMAP is used */
 #define MDB_MAPASYNC		0x100000
 	/** tie reader locktable slots to #MDB_txn objects instead of to threads */
 #define MDB_NOTLS		0x200000
+	/** don't do any locking, caller must manage their own locks */
+#define MDB_NOLOCK		0x400000
+	/** don't do readahead (no effect on Windows) */
+#define MDB_NORDAHEAD	0x800000
+	/** don't initialize malloc'd memory before writing to datafile */
+#define MDB_NOMEMINIT	0x1000000
 /** @} */

 /**	@defgroup	mdb_dbi_open	Database Flags
- *
- *	Values do not overlap Environment Flags.
 *	@{
 */
 	/** use reverse string keys */
 #define MDB_REVERSEKEY	0x02
 	/** use sorted duplicates */
 #define MDB_DUPSORT		0x04
-	/** numeric keys in native byte order.
+	/** numeric keys in native byte order: either unsigned int or size_t.
 	 *  The keys must all be of the same size. */
 #define MDB_INTEGERKEY	0x08
 	/** with #MDB_DUPSORT, sorted dup items have fixed size */
 #define MDB_DUPFIXED	0x10
-	/** with #MDB_DUPSORT, dups are numeric in native byte order */
+	/** with #MDB_DUPSORT, dups are #MDB_INTEGERKEY-style integers */
 #define MDB_INTEGERDUP	0x20
 	/** with #MDB_DUPSORT, use reverse string dups */
 #define MDB_REVERSEDUP	0x40
@ -299,10 +353,19 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
 #define MDB_APPEND	0x20000
 /** Duplicate data is being appended, don't split full pages. */
 #define MDB_APPENDDUP	0x40000
-/** Store multiple data items in one call. */
+/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */
 #define MDB_MULTIPLE	0x80000
 /*	@} */

+/**	@defgroup mdb_copy	Copy Flags
+ *	@{
+ */
+/** Compacting copy: Omit free space from copy, and renumber all
+ * pages sequentially.
+ */
+#define MDB_CP_COMPACT	0x01
+/*	@} */
+
 /** @brief Cursor Get operations.
 *
 *	This is the set of all operations for retrieving data
@ -315,26 +378,28 @@ typedef enum MDB_cursor_op {
 	MDB_GET_BOTH,			/**< Position at key/data pair. Only for #MDB_DUPSORT */
 	MDB_GET_BOTH_RANGE,		/**< position at key, nearest data. Only for #MDB_DUPSORT */
 	MDB_GET_CURRENT,		/**< Return key/data at current cursor position */
-	MDB_GET_MULTIPLE,		/**< Return all the duplicate data items at the current
-								 cursor position. Only for #MDB_DUPFIXED */
+	MDB_GET_MULTIPLE,		/**< Return key and up to a page of duplicate data items
+								from current cursor position. Move cursor to prepare
+								for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
 	MDB_LAST,				/**< Position at last key/data item */
 	MDB_LAST_DUP,			/**< Position at last data item of current key.
 								Only for #MDB_DUPSORT */
 	MDB_NEXT,				/**< Position at next data item */
 	MDB_NEXT_DUP,			/**< Position at next data item of current key.
 								Only for #MDB_DUPSORT */
-	MDB_NEXT_MULTIPLE,		/**< Return all duplicate data items at the next
-								cursor position. Only for #MDB_DUPFIXED */
-	MDB_NEXT_NODUP,			/**< Position at first data item of next key.
-								Only for #MDB_DUPSORT */
+	MDB_NEXT_MULTIPLE,		/**< Return key and up to a page of duplicate data items
+								from next cursor position. Move cursor to prepare
+								for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
+	MDB_NEXT_NODUP,			/**< Position at first data item of next key */
 	MDB_PREV,				/**< Position at previous data item */
 	MDB_PREV_DUP,			/**< Position at previous data item of current key.
 								Only for #MDB_DUPSORT */
-	MDB_PREV_NODUP,			/**< Position at last data item of previous key.
-								Only for #MDB_DUPSORT */
+	MDB_PREV_NODUP,			/**< Position at last data item of previous key */
 	MDB_SET,				/**< Position at specified key */
 	MDB_SET_KEY,			/**< Position at specified key, return key + data */
-	MDB_SET_RANGE			/**< Position at first key greater than or equal to specified key. */
+	MDB_SET_RANGE,			/**< Position at first key greater than or equal to specified key. */
+	MDB_PREV_MULTIPLE		/**< Position at previous page and return key and up to
+								a page of duplicate data items. Only for #MDB_DUPFIXED */
 } MDB_cursor_op;

 /** @defgroup  errors	Return Codes
@ -352,11 +417,11 @@ typedef enum MDB_cursor_op {
 #define MDB_PAGE_NOTFOUND	(-30797)
 	/** Located page was wrong type */
 #define MDB_CORRUPTED	(-30796)
-	/** Update of meta page failed, probably I/O error */
+	/** Update of meta page failed or environment had fatal error */
 #define MDB_PANIC		(-30795)
 	/** Environment version mismatch */
 #define MDB_VERSION_MISMATCH	(-30794)
-	/** File is not a valid MDB file */
+	/** File is not a valid LMDB file */
 #define MDB_INVALID	(-30793)
 	/** Environment mapsize reached */
 #define MDB_MAP_FULL	(-30792)
@ -374,11 +439,25 @@ typedef enum MDB_cursor_op {
 #define MDB_PAGE_FULL	(-30786)
 	/** Database contents grew beyond environment mapsize */
 #define MDB_MAP_RESIZED	(-30785)
-	/** Database flags changed or would change */
+	/** Operation and DB incompatible, or DB type changed. This can mean:
+	 *	<ul>
+	 *	<li>The operation expects an #MDB_DUPSORT / #MDB_DUPFIXED database.
+	 *	<li>Opening a named DB when the unnamed DB has #MDB_DUPSORT / #MDB_INTEGERKEY.
+	 *	<li>Accessing a data record as a database, or vice versa.
+	 *	<li>The database was dropped and recreated with different flags.
+	 *	</ul>
+	 */
 #define MDB_INCOMPATIBLE	(-30784)
 	/** Invalid reuse of reader locktable slot */
 #define MDB_BAD_RSLOT		(-30783)
-#define MDB_LAST_ERRCODE	MDB_BAD_RSLOT
+	/** Transaction must abort, has a child, or is invalid */
+#define MDB_BAD_TXN			(-30782)
+	/** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
+#define MDB_BAD_VALSIZE		(-30781)
+	/** The specified DBI was changed unexpectedly */
+#define MDB_BAD_DBI		(-30780)
+	/** The last defined error code */
+#define MDB_LAST_ERRCODE	MDB_BAD_DBI
 /** @} */

 /** @brief Statistics for a database in the environment */
@ -386,23 +465,23 @@ typedef struct MDB_stat {
 	unsigned int	ms_psize;			/**< Size of a database page.
 											This is currently the same for all databases. */
 	unsigned int	ms_depth;			/**< Depth (height) of the B-tree */
-	size_t		ms_branch_pages;	/**< Number of internal (non-leaf) pages */
-	size_t		ms_leaf_pages;		/**< Number of leaf pages */
-	size_t		ms_overflow_pages;	/**< Number of overflow pages */
-	size_t		ms_entries;			/**< Number of data items */
+	mdb_size_t		ms_branch_pages;	/**< Number of internal (non-leaf) pages */
+	mdb_size_t		ms_leaf_pages;		/**< Number of leaf pages */
+	mdb_size_t		ms_overflow_pages;	/**< Number of overflow pages */
+	mdb_size_t		ms_entries;			/**< Number of data items */
 } MDB_stat;

 /** @brief Information about the environment */
 typedef struct MDB_envinfo {
 	void	*me_mapaddr;			/**< Address of map, if fixed */
-	size_t	me_mapsize;				/**< Size of the data memory map */
-	size_t	me_last_pgno;			/**< ID of the last used page */
-	size_t	me_last_txnid;			/**< ID of the last committed transaction */
+	mdb_size_t	me_mapsize;				/**< Size of the data memory map */
+	mdb_size_t	me_last_pgno;			/**< ID of the last used page */
+	mdb_size_t	me_last_txnid;			/**< ID of the last committed transaction */
 	unsigned int me_maxreaders;		/**< max reader slots in the environment */
 	unsigned int me_numreaders;		/**< max reader slots used in the environment */
 } MDB_envinfo;

-	/** @brief Return the mdb library version information.
+	/** @brief Return the LMDB library version information.
 	 *
 	 * @param[out] major if non-NULL, the library major version number is copied here
 	 * @param[out] minor if non-NULL, the library minor version number is copied here
@ -416,14 +495,14 @@ char *mdb_version(int *major, int *minor, int *patch);
 	 * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3)
 	 * function. If the error code is greater than or equal to 0, then the string
 	 * returned by the system function strerror(3) is returned. If the error code
-	 * is less than 0, an error string corresponding to the MDB library error is
-	 * returned. See @ref errors for a list of MDB-specific error codes.
+	 * is less than 0, an error string corresponding to the LMDB library error is
+	 * returned. See @ref errors for a list of LMDB-specific error codes.
 	 * @param[in] err The error code
 	 * @retval "error message" The description of the error
 	 */
 char *mdb_strerror(int err);

-	/** @brief Create an MDB environment handle.
+	/** @brief Create an LMDB environment handle.
 	 *
 	 * This function allocates memory for a #MDB_env structure. To release
 	 * the allocated memory and discard the handle, call #mdb_env_close().
@ -456,20 +535,24 @@ int  mdb_env_create(MDB_env **env);
 	 *		how the operating system has allocated memory to shared libraries and other uses.
 	 *		The feature is highly experimental.
 	 *	<li>#MDB_NOSUBDIR
-	 *		By default, MDB creates its environment in a directory whose
+	 *		By default, LMDB creates its environment in a directory whose
 	 *		pathname is given in \b path, and creates its data and lock files
 	 *		under that directory. With this option, \b path is used as-is for
 	 *		the database main data file. The database lock file is the \b path
 	 *		with "-lock" appended.
 	 *	<li>#MDB_RDONLY
 	 *		Open the environment in read-only mode. No write operations will be
-	 *		allowed. MDB will still modify the lock file - except on read-only
-	 *		filesystems, where MDB does not use locks.
+	 *		allowed. LMDB will still modify the lock file - except on read-only
+	 *		filesystems, where LMDB does not use locks.
 	 *	<li>#MDB_WRITEMAP
-	 *		Use a writeable memory map unless MDB_RDONLY is set. This is faster
-	 *		and uses fewer mallocs, but loses protection from application bugs
+	 *		Use a writeable memory map unless MDB_RDONLY is set. This uses
+	 *		fewer mallocs but loses protection from application bugs
 	 *		like wild pointer writes and other bad updates into the database.
+	 *		This may be slightly faster for DBs that fit entirely in RAM, but
+	 *		is slower for DBs larger than RAM.
 	 *		Incompatible with nested transactions.
+	 *		Do not mix processes with and without MDB_WRITEMAP on the same
+	 *		environment.  This can defeat durability (#mdb_env_sync etc).
 	 *	<li>#MDB_NOMETASYNC
 	 *		Flush system buffers to disk only once per transaction, omit the
 	 *		metadata flush. Defer that until the system flushes files to disk,
@ -506,14 +589,46 @@ int  mdb_env_create(MDB_env **env);
 	 *		the user synchronizes its use. Applications that multiplex many
 	 *		user threads over individual OS threads need this option. Such an
 	 *		application must also serialize the write transactions in an OS
-	 *		thread, since MDB's write locking is unaware of the user threads.
+	 *		thread, since LMDB's write locking is unaware of the user threads.
+	 *	<li>#MDB_NOLOCK
+	 *		Don't do any locking. If concurrent access is anticipated, the
+	 *		caller must manage all concurrency itself. For proper operation
+	 *		the caller must enforce single-writer semantics, and must ensure
+	 *		that no readers are using old transactions while a writer is
+	 *		active. The simplest approach is to use an exclusive lock so that
+	 *		no readers may be active at all when a writer begins.
+	 *	<li>#MDB_NORDAHEAD
+	 *		Turn off readahead. Most operating systems perform readahead on
+	 *		read requests by default. This option turns it off if the OS
+	 *		supports it. Turning it off may help random read performance
+	 *		when the DB is larger than RAM and system RAM is full.
+	 *		The option is not implemented on Windows.
+	 *	<li>#MDB_NOMEMINIT
+	 *		Don't initialize malloc'd memory before writing to unused spaces
+	 *		in the data file. By default, memory for pages written to the data
+	 *		file is obtained using malloc. While these pages may be reused in
+	 *		subsequent transactions, freshly malloc'd pages will be initialized
+	 *		to zeroes before use. This avoids persisting leftover data from other
+	 *		code (that used the heap and subsequently freed the memory) into the
+	 *		data file. Note that many other system libraries may allocate
+	 *		and free memory from the heap for arbitrary uses. E.g., stdio may
+	 *		use the heap for file I/O buffers. This initialization step has a
+	 *		modest performance cost so some applications may want to disable
+	 *		it using this flag. This option can be a problem for applications
+	 *		which handle sensitive data like passwords, and it makes memory
+	 *		checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP,
+	 *		which writes directly to the mmap instead of using malloc for pages. The
+	 *		initialization is also skipped if #MDB_RESERVE is used; the
+	 *		caller is expected to overwrite all of the memory that was
+	 *		reserved in that case.
+	 *		This flag may be changed at any time using #mdb_env_set_flags().
 	 * </ul>
-	 * @param[in] mode The UNIX permissions to set on created files. This parameter
-	 * is ignored on Windows.
+	 * @param[in] mode The UNIX permissions to set on created files and semaphores.
+	 * This parameter is ignored on Windows.
 	 * @return A non-zero error value on failure and 0 on success. Some possible
 	 * errors are:
 	 * <ul>
-	 *	<li>#MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the
+	 *	<li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the
 	 *	version that created the database environment.
 	 *	<li>#MDB_INVALID - the environment file headers are corrupted.
 	 *	<li>ENOENT - the directory specified by the path parameter doesn't exist.
@ -523,9 +638,13 @@ int  mdb_env_create(MDB_env **env);
 	 */
 int  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);

-	/** @brief Copy an MDB environment to the specified path.
+	/** @brief Copy an LMDB environment to the specified path.
 	 *
 	 * This function may be used to make a backup of an existing environment.
+	 * No lockfile is created, since it gets recreated at need.
+	 * @note This call can trigger significant file size growth if run in
+	 * parallel with write transactions, because it employs a read-only
+	 * transaction. See long-lived transactions under @ref caveats_sec.
 	 * @param[in] env An environment handle returned by #mdb_env_create(). It
 	 * must have already been opened successfully.
 	 * @param[in] path The directory in which the copy will reside. This
@ -535,7 +654,65 @@ int  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t
 	 */
 int  mdb_env_copy(MDB_env *env, const char *path);

-	/** @brief Return statistics about the MDB environment.
+	/** @brief Copy an LMDB environment to the specified file descriptor.
+	 *
+	 * This function may be used to make a backup of an existing environment.
+	 * No lockfile is created, since it gets recreated at need.
+	 * @note This call can trigger significant file size growth if run in
+	 * parallel with write transactions, because it employs a read-only
+	 * transaction. See long-lived transactions under @ref caveats_sec.
+	 * @param[in] env An environment handle returned by #mdb_env_create(). It
+	 * must have already been opened successfully.
+	 * @param[in] fd The filedescriptor to write the copy to. It must
+	 * have already been opened for Write access.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
+
+	/** @brief Copy an LMDB environment to the specified path, with options.
+	 *
+	 * This function may be used to make a backup of an existing environment.
+	 * No lockfile is created, since it gets recreated at need.
+	 * @note This call can trigger significant file size growth if run in
+	 * parallel with write transactions, because it employs a read-only
+	 * transaction. See long-lived transactions under @ref caveats_sec.
+	 * @param[in] env An environment handle returned by #mdb_env_create(). It
+	 * must have already been opened successfully.
+	 * @param[in] path The directory in which the copy will reside. This
+	 * directory must already exist and be writable but must otherwise be
+	 * empty.
+	 * @param[in] flags Special options for this operation. This parameter
+	 * must be set to 0 or by bitwise OR'ing together one or more of the
+	 * values described here.
+	 * <ul>
+	 *	<li>#MDB_CP_COMPACT - Perform compaction while copying: omit free
+	 *		pages and sequentially renumber all pages in output. This option
+	 *		consumes more CPU and runs more slowly than the default.
+	 * </ul>
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags);
+
+	/** @brief Copy an LMDB environment to the specified file descriptor,
+	 *	with options.
+	 *
+	 * This function may be used to make a backup of an existing environment.
+	 * No lockfile is created, since it gets recreated at need. See
+	 * #mdb_env_copy2() for further details.
+	 * @note This call can trigger significant file size growth if run in
+	 * parallel with write transactions, because it employs a read-only
+	 * transaction. See long-lived transactions under @ref caveats_sec.
+	 * @param[in] env An environment handle returned by #mdb_env_create(). It
+	 * must have already been opened successfully.
+	 * @param[in] fd The filedescriptor to write the copy to. It must
+	 * have already been opened for Write access.
+	 * @param[in] flags Special options for this operation.
+	 * See #mdb_env_copy2() for options.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags);
+
+	/** @brief Return statistics about the LMDB environment.
 	 *
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[out] stat The address of an #MDB_stat structure
@ -543,7 +720,7 @@ int  mdb_env_copy(MDB_env *env, const char *path);
 	 */
 int  mdb_env_stat(MDB_env *env, MDB_stat *stat);

-	/** @brief Return information about the MDB environment.
+	/** @brief Return information about the LMDB environment.
 	 *
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[out] stat The address of an #MDB_envinfo structure
@ -554,9 +731,10 @@ int  mdb_env_info(MDB_env *env, MDB_envinfo *stat);
 	/** @brief Flush the data buffers to disk.
 	 *
 	 * Data is always written to disk when #mdb_txn_commit() is called,
-	 * but the operating system may keep it buffered. MDB always flushes
+	 * but the operating system may keep it buffered. LMDB always flushes
 	 * the OS buffers upon commit as well, unless the environment was
-	 * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
+	 * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. This call is
+	 * not valid if the environment was opened with #MDB_RDONLY.
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[in] force If non-zero, force a synchronous flush.  Otherwise
 	 *  if the environment has the #MDB_NOSYNC flag set the flushes
@ -564,6 +742,7 @@ int  mdb_env_info(MDB_env *env, MDB_envinfo *stat);
 	 * @return A non-zero error value on failure and 0 on success. Some possible
 	 * errors are:
 	 * <ul>
+	 *	<li>EACCES - the environment is read-only.
 	 *	<li>EINVAL - an invalid parameter was specified.
 	 *	<li>EIO - an error occurred during synchronization.
 	 * </ul>
@ -583,7 +762,8 @@ void mdb_env_close(MDB_env *env);
 	/** @brief Set environment flags.
 	 *
 	 * This may be used to set some flags in addition to those from
-	 * #mdb_env_open(), or to unset these flags.
+	 * #mdb_env_open(), or to unset these flags.  If several threads
+	 * change the flags at the same time, the result is undefined.
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[in] flags The flags to change, bitwise OR'ed together
 	 * @param[in] onoff A non-zero value sets the flags, zero clears them.
@ -621,14 +801,39 @@ int  mdb_env_get_flags(MDB_env *env, unsigned int *flags);
 	 */
 int  mdb_env_get_path(MDB_env *env, const char **path);

+	/** @brief Return the filedescriptor for the given environment.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor.
+	 * @return A non-zero error value on failure and 0 on success. Some possible
+	 * errors are:
+	 * <ul>
+	 *	<li>EINVAL - an invalid parameter was specified.
+	 * </ul>
+	 */
+int  mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
+
 	/** @brief Set the size of the memory map to use for this environment.
 	 *
 	 * The size should be a multiple of the OS page size. The default is
 	 * 10485760 bytes. The size of the memory map is also the maximum size
 	 * of the database. The value should be chosen as large as possible,
 	 * to accommodate future growth of the database.
-	 * This function may only be called after #mdb_env_create() and before #mdb_env_open().
-	 * The size may be changed by closing and reopening the environment.
+	 * This function should be called after #mdb_env_create() and before #mdb_env_open().
+	 * It may be called at later times if no transactions are active in
+	 * this process. Note that the library does not check for this condition,
+	 * the caller must ensure it explicitly.
+	 *
+	 * The new size takes effect immediately for the current process but
+	 * will not be persisted to any others until a write transaction has been
+	 * committed by the current process. Also, only mapsize increases are
+	 * persisted into the environment.
+	 *
+	 * If the mapsize is increased by another process, and data has grown
+	 * beyond the range of the current mapsize, #mdb_txn_begin() will
+	 * return #MDB_MAP_RESIZED. This function may be called with a size
+	 * of zero to adopt the new size.
+	 *
 	 * Any attempt to set a size smaller than the space already consumed
 	 * by the environment will be silently changed to the current size of the used space.
 	 * @param[in] env An environment handle returned by #mdb_env_create()
@ -636,10 +841,11 @@ int  mdb_env_get_path(MDB_env *env, const char **path);
 	 * @return A non-zero error value on failure and 0 on success. Some possible
 	 * errors are:
 	 * <ul>
-	 *	<li>EINVAL - an invalid parameter was specified, or the environment is already open.
+	 *	<li>EINVAL - an invalid parameter was specified, or the environment has
+	 *   	an active write transaction.
 	 * </ul>
 	 */
-int  mdb_env_set_mapsize(MDB_env *env, size_t size);
+int  mdb_env_set_mapsize(MDB_env *env, mdb_size_t size);

 	/** @brief Set the maximum number of threads/reader slots for the environment.
 	 *
@ -678,6 +884,10 @@ int  mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
 	 * environment. Simpler applications that use the environment as a single
 	 * unnamed database can ignore this option.
 	 * This function may only be called after #mdb_env_create() and before #mdb_env_open().
+	 *
+	 * Currently a moderate number of slots are cheap but a huge number gets
+	 * expensive: 7-120 words per transaction, and every #mdb_dbi_open()
+	 * does a linear search of the opened slots.
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[in] dbs The maximum number of databases
 	 * @return A non-zero error value on failure and 0 on success. Some possible
@ -688,6 +898,47 @@ int  mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
 	 */
 int  mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);

+	/** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write.
+	 *
+	 * Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511.
+	 * See @ref MDB_val.
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @return The maximum size of a key we can write
+	 */
+int  mdb_env_get_maxkeysize(MDB_env *env);
+
+	/** @brief Set application information associated with the #MDB_env.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @param[in] ctx An arbitrary pointer for whatever the application needs.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_set_userctx(MDB_env *env, void *ctx);
+
+	/** @brief Get the application information associated with the #MDB_env.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @return The pointer set by #mdb_env_set_userctx().
+	 */
+void *mdb_env_get_userctx(MDB_env *env);
+
+	/** @brief A callback function for most LMDB assert() failures,
+	 * called before printing the message and aborting.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create().
+	 * @param[in] msg The assertion message, not including newline.
+	 */
+typedef void MDB_assert_func(MDB_env *env, const char *msg);
+
+	/** Set or reset the assert() callback of the environment.
+	 * Disabled if liblmdb is buillt with NDEBUG.
+	 * @note This hack should become obsolete as lmdb's error handling matures.
+	 * @param[in] env An environment handle returned by #mdb_env_create().
+	 * @param[in] func An #MDB_assert_func function, or 0.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int  mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
+
 	/** @brief Create a transaction for use with the environment.
 	 *
 	 * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
@ -699,14 +950,18 @@ int  mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
 	 * @param[in] parent If this parameter is non-NULL, the new transaction
 	 * will be a nested transaction, with the transaction indicated by \b parent
 	 * as its parent. Transactions may be nested to any level. A parent
-	 * transaction may not issue any other operations besides mdb_txn_begin,
-	 * mdb_txn_abort, or mdb_txn_commit while it has active child transactions.
+	 * transaction and its cursors may not issue any other operations than
+	 * mdb_txn_commit and mdb_txn_abort while it has active child transactions.
 	 * @param[in] flags Special options for this transaction. This parameter
 	 * must be set to 0 or by bitwise OR'ing together one or more of the
 	 * values described here.
 	 * <ul>
 	 *	<li>#MDB_RDONLY
 	 *		This transaction will not perform any write operations.
+	 *	<li>#MDB_NOSYNC
+	 *		Don't flush system buffers to disk when committing this transaction.
+	 *	<li>#MDB_NOMETASYNC
+	 *		Flush system buffers but omit metadata flush when committing this transaction.
 	 * </ul>
 	 * @param[out] txn Address where the new #MDB_txn handle will be stored
 	 * @return A non-zero error value on failure and 0 on success. Some possible
@ -715,7 +970,8 @@ int  mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
 	 *	<li>#MDB_PANIC - a fatal error occurred earlier and the environment
 	 *		must be shut down.
 	 *	<li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's
-	 *		mapsize and the environment must be shut down.
+	 *		mapsize and this environment's map must be resized as well.
+	 *		See #mdb_env_set_mapsize().
 	 *	<li>#MDB_READERS_FULL - a read-only transaction was requested and
 	 *		the reader lock table is full. See #mdb_env_set_maxreaders().
 	 *	<li>ENOMEM - out of memory.
@ -723,6 +979,23 @@ int  mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
 	 */
 int  mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn);

+	/** @brief Returns the transaction's #MDB_env
+	 *
+	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+	 */
+MDB_env *mdb_txn_env(MDB_txn *txn);
+
+	/** @brief Return the transaction's ID.
+	 *
+	 * This returns the identifier associated with this transaction. For a
+	 * read-only transaction, this corresponds to the snapshot being read;
+	 * concurrent readers will frequently have the same transaction ID.
+	 *
+	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+	 * @return A transaction ID, valid if input is an active transaction.
+	 */
+mdb_size_t mdb_txn_id(MDB_txn *txn);
+
 	/** @brief Commit all the operations of a transaction into the database.
 	 *
 	 * The transaction handle is freed. It and its cursors must not be used
@ -797,19 +1070,23 @@ int  mdb_txn_renew(MDB_txn *txn);
 	 * independently of whether such a database exists.
 	 * The database handle may be discarded by calling #mdb_dbi_close().
 	 * The old database handle is returned if the database was already open.
-	 * The handle must only be closed once.
+	 * The handle may only be closed once.
+	 *
 	 * The database handle will be private to the current transaction until
 	 * the transaction is successfully committed. If the transaction is
 	 * aborted the handle will be closed automatically.
-	 * After a successful commit the
-	 * handle will reside in the shared environment, and may be used
-	 * by other transactions. This function must not be called from
-	 * multiple concurrent transactions. A transaction that uses this function
-	 * must finish (either commit or abort) before any other transaction may
-	 * use this function.
+	 * After a successful commit the handle will reside in the shared
+	 * environment, and may be used by other transactions.
+	 *
+	 * This function must not be called from multiple concurrent
+	 * transactions in the same process. A transaction that uses
+	 * this function must finish (either commit or abort) before
+	 * any other transaction in the process may use this function.
 	 *
 	 * To use named databases (with name != NULL), #mdb_env_set_maxdbs()
-	 * must be called before opening the environment.
+	 * must be called before opening the environment.  Database names are
+	 * keys in the unnamed database, and may be read but not written.
+	 *
 	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
 	 * @param[in] name The name of the database to open. If only a single
 	 * 	database is needed in the environment, this value may be NULL.
@ -826,9 +1103,9 @@ int  mdb_txn_renew(MDB_txn *txn);
 	 *		keys may have multiple data items, stored in sorted order.) By default
 	 *		keys must be unique and may have only a single data item.
 	 *	<li>#MDB_INTEGERKEY
-	 *		Keys are binary integers in native byte order. Setting this option
-	 *		requires all keys to be the same size, typically sizeof(int)
-	 *		or sizeof(size_t).
+	 *		Keys are binary integers in native byte order, either unsigned int
+	 *		or size_t, and will be sorted as such.
+	 *		The keys must all be of the same size.
 	 *	<li>#MDB_DUPFIXED
 	 *		This flag may only be used in combination with #MDB_DUPSORT. This option
 	 *		tells the library that the data items for this database are all the same
@ -836,8 +1113,8 @@ int  mdb_txn_renew(MDB_txn *txn);
 	 *		all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE
 	 *		cursor operations may be used to retrieve multiple items at once.
 	 *	<li>#MDB_INTEGERDUP
-	 *		This option specifies that duplicate data items are also integers, and
-	 *		should be sorted as such.
+	 *		This option specifies that duplicate data items are binary integers,
+	 *		similar to #MDB_INTEGERKEY keys.
 	 *	<li>#MDB_REVERSEDUP
 	 *		This option specifies that duplicate data items should be compared as
 	 *		strings in reverse order.
@ -870,25 +1147,40 @@ int  mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *d
 	 */
 int  mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);

-	/** @brief Close a database handle.
+	/** @brief Retrieve the DB flags for a database handle.
+	 *
+	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+	 * @param[in] dbi A database handle returned by #mdb_dbi_open()
+	 * @param[out] flags Address where the flags will be returned.
+	 * @return A non-zero error value on failure and 0 on success.
+	 */
+int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags);
+
+	/** @brief Close a database handle. Normally unnecessary. Use with care:
 	 *
 	 * This call is not mutex protected. Handles should only be closed by
 	 * a single thread, and only if no other threads are going to reference
 	 * the database handle or one of its cursors any further. Do not close
 	 * a handle if an existing transaction has modified its database.
+	 * Doing so can cause misbehavior from database corruption to errors
+	 * like MDB_BAD_VALSIZE (since the DB name is gone).
+	 *
+	 * Closing a database handle is not necessary, but lets #mdb_dbi_open()
+	 * reuse the handle value.  Usually it's better to set a bigger
+	 * #mdb_env_set_maxdbs(), unless that value would be large.
+	 *
 	 * @param[in] env An environment handle returned by #mdb_env_create()
 	 * @param[in] dbi A database handle returned by #mdb_dbi_open()
 	 */
 void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);

-	/** @brief Delete a database and/or free all its pages.
+	/** @brief Empty or delete+close a database.
 	 *
-	 * If the \b del parameter is 1, the DB handle will be closed
-	 * and the DB will be deleted.
+	 * See #mdb_dbi_close() for restrictions about closing the DB handle.
 	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
 	 * @param[in] dbi A database handle returned by #mdb_dbi_open()
-	 * @param[in] del 1 to delete the DB from the environment,
-	 * 0 to just free its pages.
+	 * @param[in] del 0 to empty the DB, 1 to delete it from the
+	 * environment and close the DB handle.
 	 * @return A non-zero error value on failure and 0 on success.
 	 */
 int  mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del);
@ -1030,11 +1322,13 @@ int  mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
 	 *		reserved space, which the caller can fill in later - before
 	 *		the next update operation or the transaction ends. This saves
 	 *		an extra memcpy if the data is being generated later.
+	 *		LMDB does nothing else with this memory, the caller is expected
+	 *		to modify all of the space requested. This flag must not be
+	 *		specified if the database was opened with #MDB_DUPSORT.
 	 *	<li>#MDB_APPEND - append the given key/data pair to the end of the
-	 *		database. No key comparisons are performed. This option allows
-	 *		fast bulk loading when keys are already known to be in the
-	 *		correct order. Loading unsorted keys with this flag will cause
-	 *		data corruption.
+	 *		database. This option allows fast bulk loading when keys are
+	 *		already known to be in the correct order. Loading unsorted keys
+	 *		with this flag will cause a #MDB_KEYEXIST error.
 	 *	<li>#MDB_APPENDDUP - as above, but for sorted dup data.
 	 * </ul>
 	 * @return A non-zero error value on failure and 0 on success. Some possible
@ -1160,18 +1454,21 @@ int  mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
 	/** @brief Store by cursor.
 	 *
 	 * This function stores key/data pairs into the database.
-	 * If the function fails for any reason, the state of the cursor will be
-	 * unchanged. If the function succeeds and an item is inserted into the
-	 * database, the cursor is always positioned to refer to the newly inserted item.
+	 * The cursor is positioned at the new item, or on failure usually near it.
+	 * @note Earlier documentation incorrectly said errors would leave the
+	 * state of the cursor unchanged.
 	 * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
 	 * @param[in] key The key operated on.
 	 * @param[in] data The data operated on.
 	 * @param[in] flags Options for this operation. This parameter
 	 * must be set to 0 or one of the values described here.
 	 * <ul>
-	 *	<li>#MDB_CURRENT - overwrite the data of the key/data pair to which
-	 *		the cursor refers with the specified data item. The \b key
-	 *		parameter is ignored.
+	 *	<li>#MDB_CURRENT - replace the item at the current cursor position.
+	 *		The \b key parameter must still be provided, and must match it.
+	 *		If using sorted duplicates (#MDB_DUPSORT) the data item must still
+	 *		sort into the same place. This is intended to be used when the
+	 *		new data is the same size as the old. Otherwise it will simply
+	 *		perform a delete of the old record followed by an insert.
 	 *	<li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
 	 *		already appear in the database. This flag may only be specified
 	 *		if the database was opened with #MDB_DUPSORT. The function will
@ -1183,21 +1480,33 @@ int  mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
 	 *		the database supports duplicates (#MDB_DUPSORT).
 	 *	<li>#MDB_RESERVE - reserve space for data of the given size, but
 	 *		don't copy the given data. Instead, return a pointer to the
-	 *		reserved space, which the caller can fill in later. This saves
-	 *		an extra memcpy if the data is being generated later.
+	 *		reserved space, which the caller can fill in later - before
+	 *		the next update operation or the transaction ends. This saves
+	 *		an extra memcpy if the data is being generated later. This flag
+	 *		must not be specified if the database was opened with #MDB_DUPSORT.
 	 *	<li>#MDB_APPEND - append the given key/data pair to the end of the
 	 *		database. No key comparisons are performed. This option allows
 	 *		fast bulk loading when keys are already known to be in the
 	 *		correct order. Loading unsorted keys with this flag will cause
-	 *		data corruption.
+	 *		a #MDB_KEYEXIST error.
 	 *	<li>#MDB_APPENDDUP - as above, but for sorted dup data.
+	 *	<li>#MDB_MULTIPLE - store multiple contiguous data elements in a
+	 *		single request. This flag may only be specified if the database
+	 *		was opened with #MDB_DUPFIXED. The \b data argument must be an
+	 *		array of two MDB_vals. The mv_size of the first MDB_val must be
+	 *		the size of a single data element. The mv_data of the first MDB_val
+	 *		must point to the beginning of the array of contiguous data elements.
+	 *		The mv_size of the second MDB_val must be the count of the number
+	 *		of data elements to store. On return this field will be set to
+	 *		the count of the number of elements actually written. The mv_data
+	 *		of the second MDB_val is unused.
 	 * </ul>
 	 * @return A non-zero error value on failure and 0 on success. Some possible
 	 * errors are:
 	 * <ul>
 	 *	<li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
 	 *	<li>#MDB_TXN_FULL - the transaction has too many dirty pages.
-	 *	<li>EACCES - an attempt was made to modify a read-only database.
+	 *	<li>EACCES - an attempt was made to write in a read-only transaction.
 	 *	<li>EINVAL - an invalid parameter was specified.
 	 * </ul>
 	 */
@ -1217,7 +1526,7 @@ int  mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
 	 * @return A non-zero error value on failure and 0 on success. Some possible
 	 * errors are:
 	 * <ul>
-	 *	<li>EACCES - an attempt was made to modify a read-only database.
+	 *	<li>EACCES - an attempt was made to write in a read-only transaction.
 	 *	<li>EINVAL - an invalid parameter was specified.
 	 * </ul>
 	 */
@ -1235,7 +1544,7 @@ int  mdb_cursor_del(MDB_cursor *cursor, unsigned int flags);
 	 *	<li>EINVAL - cursor is not initialized, or an invalid parameter was specified.
 	 * </ul>
 	 */
-int  mdb_cursor_count(MDB_cursor *cursor, size_t *countp);
+int  mdb_cursor_count(MDB_cursor *cursor, mdb_size_t *countp);

 	/** @brief Compare two data items according to a particular database.
 	 *
@ -1260,11 +1569,42 @@ int  mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
 	 * @return < 0 if a < b, 0 if a == b, > 0 if a > b
 	 */
 int  mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
+
+	/** @brief A callback function used to print a message from the library.
+	 *
+	 * @param[in] msg The string to be printed.
+	 * @param[in] ctx An arbitrary context pointer for the callback.
+	 * @return < 0 on failure, >= 0 on success.
+	 */
+typedef int (MDB_msg_func)(const char *msg, void *ctx);
+
+	/** @brief Dump the entries in the reader lock table.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @param[in] func A #MDB_msg_func function
+	 * @param[in] ctx Anything the message function needs
+	 * @return < 0 on failure, >= 0 on success.
+	 */
+int	mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
+
+	/** @brief Check for stale entries in the reader lock table.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create()
+	 * @param[out] dead Number of stale slots that were cleared
+	 * @return 0 on success, non-zero on failure.
+	 */
+int	mdb_reader_check(MDB_env *env, int *dead);
 /**	@} */

 #ifdef __cplusplus
 }
 #endif
-#endif /* _LMDB_H_ */
+/** @page tools LMDB Command Line Tools
+	The following describes the command line tools that are available for LMDB.
+	\li \ref mdb_copy_1
+	\li \ref mdb_dump_1
+	\li \ref mdb_load_1
+	\li \ref mdb_stat_1
+*/

-/*  * http://gitorious.org/mdb/mdb/blobs/raw/b389341b4b2413804726276d01676a6a9d05346f/libraries/liblmdb/lmdb.h */
+#endif /* _LMDB_H_ */
--- a/c_src/lmdb_nif.c
+++ b/c_src/lmdb_nif.c
@ -2,7 +2,7 @@
 * This file is part of LMDB - Erlang Lightning MDB API
 *
 * Copyright (c) 2012 by Aleph Archives. All rights reserved.
-%% Copyright (c) 2013 by Basho Technologies, Inc. All rights reserved.
+ * Copyright (c) 2013 by Basho Technologies, Inc. All rights reserved.
 *
 * -------------------------------------------------------------------------
 * Redistribution and use in source and binary forms, with or without
@ -28,6 +28,7 @@

 #include <stdio.h>
 #include <string.h>
+#include <inttypes.h>
 #include <errno.h>
 #include <sys/param.h>
 #include <erl_nif.h>
@ -35,22 +36,14 @@

 #include "common.h"
 #include "async_nif.h"
-#include "stats.h"
 #include "lmdb.h"

-STAT_DECL(lmdb_get, 1000);
-STAT_DECL(lmdb_put, 1000);
-STAT_DECL(lmdb_del, 1000);
-STAT_DECL(lmdb_upd, 1000);
-
 static ErlNifResourceType *lmdb_RESOURCE;
 struct lmdb {
    MDB_env *env;
+    MDB_txn *txn;
+    MDB_cursor *cursor;
    MDB_dbi dbi;
-    STAT_DEF(lmdb_get);
-    STAT_DEF(lmdb_put);
-    STAT_DEF(lmdb_del);
-    STAT_DEF(lmdb_upd);
 };

 struct lmdb_priv_data {
@ -83,6 +76,9 @@ static ERL_NIF_TERM ATOM_MAP_RESIZED;
 static ERL_NIF_TERM ATOM_INCOMPATIBLE;
 static ERL_NIF_TERM ATOM_BAD_RSLOT;

+static ERL_NIF_TERM ATOM_TXN_STARTED;
+static ERL_NIF_TERM ATOM_TXN_NOT_STARTED;
+
 #define CHECK(expr, label)						\
    if (MDB_SUCCESS != (ret = (expr))) {				\
 	DPRINTF("CHECK(\"%s\") failed \"%s\" at %s:%d in %s()\n",	\
@ -106,7 +102,7 @@ static ERL_NIF_TERM ATOM_BAD_RSLOT;
 static ERL_NIF_TERM
 __strerror_term(ErlNifEnv* env, int err)
 {
-    ERL_NIF_TERM term;
+    ERL_NIF_TERM term = 0;

    if (err < MDB_LAST_ERRCODE && err > MDB_KEYEXIST) {
 	switch (err) {
@ -213,11 +209,6 @@ ASYNC_NIF_DECL(
      if ((handle = enif_alloc_resource(lmdb_RESOURCE, sizeof(struct lmdb))) == NULL)
 	  FAIL_ERR(ENOMEM, err3);

-      STAT_INIT(handle, lmdb_get);
-      STAT_INIT(handle, lmdb_put);
-      STAT_INIT(handle, lmdb_upd);
-      STAT_INIT(handle, lmdb_del);
-
      CHECK(mdb_env_create(&(handle->env)), err2);

      if (mdb_env_set_mapsize(handle->env, args->mapsize)) {
@ -230,6 +221,9 @@ ASYNC_NIF_DECL(
      CHECK(mdb_open(txn, NULL, 0, &(handle->dbi)), err1);
      CHECK(mdb_txn_commit(txn), err1);

+      handle->txn = NULL;
+      handle->cursor = NULL;
+
      ERL_NIF_TERM term = enif_make_resource(env, handle);
      enif_release_resource(handle);
      ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_OK, term));
@ -271,15 +265,7 @@ ASYNC_NIF_DECL(
  },
  { // work

-      STAT_PRINT(args->handle, lmdb_get, "lmdb");
-      STAT_PRINT(args->handle, lmdb_put, "lmdb");
-      STAT_PRINT(args->handle, lmdb_del, "lmdb");
-      STAT_PRINT(args->handle, lmdb_upd, "lmdb");
      mdb_env_close(args->handle->env);
-      STAT_RESET(args->handle, lmdb_get);
-      STAT_RESET(args->handle, lmdb_put);
-      STAT_RESET(args->handle, lmdb_del);
-      STAT_RESET(args->handle, lmdb_upd);
      args->handle->env = NULL;
      ASYNC_NIF_REPLY(ATOM_OK);
      return;
@ -315,7 +301,6 @@ ASYNC_NIF_DECL(
      }
      if (!args->handle->env)
 	  ASYNC_NIF_RETURN_BADARG();
-      STAT_TICK(args->handle, lmdb_put);
      enif_keep_resource((void*)args->handle);
      args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
      args->val = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[2]);
@ -343,7 +328,11 @@ ASYNC_NIF_DECL(
      mkey.mv_data  = key.data;
      mdata.mv_size = val.size;
      mdata.mv_data = val.data;
-      CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
+      if(args->handle->txn == NULL) {
+          CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
+      } else {
+          txn = args->handle->txn;
+      }

      ret = mdb_put(txn, args->handle->dbi, &mkey, &mdata, MDB_NOOVERWRITE);
      if (MDB_KEYEXIST == ret) {
@ -353,8 +342,8 @@ ASYNC_NIF_DECL(
      if (ret != 0)
 	  FAIL_ERR(ret, err1);

-      CHECK(mdb_txn_commit(txn), err1);
-      STAT_TOCK(args->handle, lmdb_put);
+      if(args->handle->txn == NULL)
+          CHECK(mdb_txn_commit(txn), err1);
      ASYNC_NIF_REPLY(ATOM_OK);
      return;

@ -369,7 +358,6 @@ ASYNC_NIF_DECL(
    enif_release_resource((void*)args->handle);
  });

-
 /**
 * Update and existin value indexed by key.
 *
@ -395,7 +383,6 @@ ASYNC_NIF_DECL(
      }
      if (!args->handle->env)
 	  ASYNC_NIF_RETURN_BADARG();
-      STAT_TICK(args->handle, lmdb_upd);
      enif_keep_resource((void*)args->handle);
      args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
      args->val = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[2]);
@ -424,10 +411,16 @@ ASYNC_NIF_DECL(
      mdata.mv_size = val.size;
      mdata.mv_data = val.data;

-      CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
+      if(args->handle->txn == NULL) {
+          CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err2);
+      } else {
+          txn = args->handle->txn;
+      }
+
      CHECK(mdb_put(txn, args->handle->dbi, &mkey, &mdata, 0), err1);
-      CHECK(mdb_txn_commit(txn), err1);
-      STAT_TOCK(args->handle, lmdb_upd);
+
+      if(args->handle->txn == NULL)
+          CHECK(mdb_txn_commit(txn), err1);
      ASYNC_NIF_REPLY(ATOM_OK);
      return;

@ -465,7 +458,6 @@ ASYNC_NIF_DECL(
      }
      if (!args->handle->env)
 	  ASYNC_NIF_RETURN_BADARG();
-      STAT_TICK(args->handle, lmdb_get);
      enif_keep_resource((void*)args->handle);
      args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
  },
@ -488,10 +480,15 @@ ASYNC_NIF_DECL(
      mkey.mv_size  = key.size;
      mkey.mv_data  = key.data;

-      CHECK(mdb_txn_begin(args->handle->env, NULL, 0, &txn), err);
+      if(args->handle->txn == NULL) {
+          CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err);
+      } else {
+          txn = args->handle->txn;
+      }

      ret = mdb_get(txn, args->handle->dbi, &mkey, &mdata);
-      mdb_txn_abort(txn);
+      if(args->handle->txn == NULL)
+          mdb_txn_abort(txn);
      if (MDB_NOTFOUND == ret) {
 	  ASYNC_NIF_REPLY(ATOM_NOT_FOUND);
 	  return;
@ -505,7 +502,6 @@ ASYNC_NIF_DECL(
 	  FAIL_ERR(ENOMEM, err);
      memcpy(bin, mdata.mv_data, mdata.mv_size);

-      STAT_TOCK(args->handle, lmdb_get);
      ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_OK, val));
      return;

@ -541,7 +537,6 @@ ASYNC_NIF_DECL(
      }
      if (!args->handle->env)
 	  ASYNC_NIF_RETURN_BADARG();
-      STAT_TICK(args->handle, lmdb_del);
      enif_keep_resource((void*)args->handle);
      args->key = enif_make_copy(ASYNC_NIF_WORK_ENV, argv[1]);
  },
@ -561,17 +556,22 @@ ASYNC_NIF_DECL(
      mkey.mv_size  = key.size;
      mkey.mv_data  = key.data;

-      CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err);
+      if(args->handle->txn == NULL) {
+          CHECK(mdb_txn_begin(args->handle->env, NULL, 0, & txn), err);
+      } else {
+          txn = args->handle->txn;
+      }
+
      ret = mdb_del(txn, args->handle->dbi, &mkey, NULL);

      if(MDB_NOTFOUND == ret) {
-	  mdb_txn_abort(txn);
+          if(args->handle->txn == NULL)
+              mdb_txn_abort(txn);
 	  ASYNC_NIF_REPLY(ATOM_NOT_FOUND);
 	  return;
      }
-
-      CHECK(mdb_txn_commit(txn), err);
-      STAT_TOCK(args->handle, lmdb_del);
+      if(args->handle->txn == NULL)
+          CHECK(mdb_txn_commit(txn), err);
      ASYNC_NIF_REPLY(ATOM_OK);
      return;

@ -630,7 +630,109 @@ ASYNC_NIF_DECL(
    enif_release_resource((void*)args->handle);
  });

+ASYNC_NIF_DECL(
+  lmdb_txn_begin,
+  { // struct

+      struct lmdb *handle;
+  },
+  { // pre
+
+      if (!(argc == 1 &&
+        enif_get_resource(env, argv[0], lmdb_RESOURCE, (void**)&args->handle))) {
+      ASYNC_NIF_RETURN_BADARG();
+      }
+      if (!args->handle->env)
+      ASYNC_NIF_RETURN_BADARG();
+      enif_keep_resource((void*)args->handle);
+  },
+  { // work
+
+      ERL_NIF_TERM err;
+      int ret;
+      if(args->handle->txn == NULL) {
+          CHECK(mdb_txn_begin(args->handle->env, NULL, 0, &(args->handle->txn)), err2);
+          ASYNC_NIF_REPLY(ATOM_OK);
+      } else
+          ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_ERROR, ATOM_TXN_STARTED));
+      return;
+
+  err2:
+      ASYNC_NIF_REPLY(err);
+      return;
+  },
+  { // post
+
+    enif_release_resource((void*)args->handle);
+  });
+
+ASYNC_NIF_DECL(
+  lmdb_txn_commit,
+  { // struct
+
+      struct lmdb *handle;
+  },
+  { // pre
+
+      if (!(argc == 1 &&
+        enif_get_resource(env, argv[0], lmdb_RESOURCE, (void**)&args->handle))) {
+      ASYNC_NIF_RETURN_BADARG();
+      }
+      if (!args->handle->env)
+      ASYNC_NIF_RETURN_BADARG();
+      enif_keep_resource((void*)args->handle);
+  },
+  { // work
+
+      ERL_NIF_TERM err;
+      int ret;
+      if(args->handle->txn != NULL) {
+          CHECK(mdb_txn_commit(args->handle->txn), err2);
+          args->handle->txn = NULL;
+          ASYNC_NIF_REPLY(ATOM_OK);
+      } else
+          ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_ERROR, ATOM_TXN_NOT_STARTED));
+      return;
+
+  err2:
+      ASYNC_NIF_REPLY(err);
+      return;
+  },
+  { // post
+
+    enif_release_resource((void*)args->handle);
+  });
+
+ASYNC_NIF_DECL(
+  lmdb_txn_abort,
+  { // struct
+
+      struct lmdb *handle;
+  },
+  { // pre
+
+      if (!(argc == 1 &&
+        enif_get_resource(env, argv[0], lmdb_RESOURCE, (void**)&args->handle))) {
+      ASYNC_NIF_RETURN_BADARG();
+      }
+      if (!args->handle->env)
+      ASYNC_NIF_RETURN_BADARG();
+      enif_keep_resource((void*)args->handle);
+  },
+  { // work
+
+      if(args->handle->txn != NULL) {
+          mdb_txn_abort(args->handle->txn);
+          args->handle->txn = NULL;
+          ASYNC_NIF_REPLY(ATOM_OK);
+      } else
+          ASYNC_NIF_REPLY(enif_make_tuple(env, 2, ATOM_ERROR, ATOM_TXN_NOT_STARTED));
+      return;
+  },
+  { // post
+
+    enif_release_resource((void*)args->handle);
+  });

 static int lmdb_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
 {
@ -645,7 +747,7 @@ static int lmdb_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)

    /* Note: !!! the first element of our priv_data struct *must* be the
       pointer to the async_nif's private data which we set here. */
-    ASYNC_NIF_LOAD(lmdb, priv->async_nif_priv);
+    ASYNC_NIF_LOAD(lmdb, env, priv->async_nif_priv);
    if (!priv)
        return ENOMEM;
    *priv_data = priv;
@ -671,6 +773,9 @@ static int lmdb_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
    ATOM_INCOMPATIBLE = enif_make_atom(env, "incompatible");
    ATOM_BAD_RSLOT = enif_make_atom(env, "bad_rslot");

+    ATOM_TXN_STARTED = enif_make_atom(env, "txn_started");
+    ATOM_TXN_NOT_STARTED = enif_make_atom(env, "txn_not_started");
+
    lmdb_RESOURCE = enif_open_resource_type(env, NULL, "lmdb_resource",
 					    NULL, flags, NULL);
    return (0);
@ -711,7 +816,15 @@ static ErlNifFunc nif_funcs [] = {
    {"get",         3, lmdb_get},
    {"del",         3, lmdb_del},
    {"update",      4, lmdb_update},
-    {"drop",        2, lmdb_drop}
+    {"drop",        2, lmdb_drop},
+
+    {"txn_begin",   2, lmdb_txn_begin},
+    {"txn_commit",  2, lmdb_txn_commit},
+    {"txn_abort",   2, lmdb_txn_abort}/*,
+
+    {"cursor_open",  2, lmdb_cursor_open},
+    {"cursor_close", 2, lmdb_cursor_close} */
+
 };

 /* driver entry point */
--- a/c_src/midl.c
+++ b/c_src/midl.c
@ -3,7 +3,7 @@
 /* $OpenLDAP$ */
 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
 *
- * Copyright 2000-2013 The OpenLDAP Foundation.
+ * Copyright 2000-2016 The OpenLDAP Foundation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@ -20,10 +20,9 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <sys/types.h>
-#include <assert.h>
 #include "midl.h"

-/** @defgroup internal	MDB Internals
+/** @defgroup internal	LMDB Internals
 *	@{
 */
 /** @defgroup idls	ID List Management
@ -31,8 +30,7 @@
 */
 #define CMP(x,y)	 ( (x) < (y) ? -1 : (x) > (y) )

-#if 0	/* superseded by append/sort */
-static unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
+unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
 {
 	/*
 	 * binary search of id in ids
@ -67,21 +65,11 @@ static unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
 	return cursor;
 }

+#if 0	/* superseded by append/sort */
 int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 {
 	unsigned x, i;

-	if (MDB_IDL_IS_RANGE( ids )) {
-		/* if already in range, treat as a dup */
-		if (id >= MDB_IDL_RANGE_FIRST(ids) && id <= MDB_IDL_RANGE_LAST(ids))
-			return -1;
-		if (id < MDB_IDL_RANGE_FIRST(ids))
-			ids[1] = id;
-		else if (id > MDB_IDL_RANGE_LAST(ids))
-			ids[2] = id;
-		return 0;
-	}
-
 	x = mdb_midl_search( ids, id );
 	assert( x > 0 );

@ -97,15 +85,9 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 	}

 	if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
-		if( id < ids[1] ) {
-			ids[1] = id;
-			ids[2] = ids[ids[0]-1];
-		} else if ( ids[ids[0]-1] < id ) {
-			ids[2] = id;
-		} else {
-			ids[2] = ids[ids[0]-1];
-		}
-		ids[0] = MDB_NOID;
+		/* no room */
+		--ids[0];
+		return -2;

 	} else {
 		/* insert id */
@ -121,8 +103,10 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
 MDB_IDL mdb_midl_alloc(int num)
 {
 	MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID));
-	if (ids)
+	if (ids) {
 		*ids++ = num;
+		*ids = 0;
+	}
 	return ids;
 }

@ -132,19 +116,18 @@ void mdb_midl_free(MDB_IDL ids)
 		free(ids-1);
 }

-int mdb_midl_shrink( MDB_IDL *idp )
+void mdb_midl_shrink( MDB_IDL *idp )
 {
 	MDB_IDL ids = *idp;
-	if (*(--ids) > MDB_IDL_UM_MAX) {
-		ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID));
+	if (*(--ids) > MDB_IDL_UM_MAX &&
+		(ids = realloc(ids, (MDB_IDL_UM_MAX+2) * sizeof(MDB_ID))))
+	{
 		*ids++ = MDB_IDL_UM_MAX;
 		*idp = ids;
-		return 1;
 	}
-	return 0;
 }

-int mdb_midl_grow( MDB_IDL *idp, int num )
+static int mdb_midl_grow( MDB_IDL *idp, int num )
 {
 	MDB_IDL idn = *idp-1;
 	/* grow it */
@ -156,6 +139,20 @@ int mdb_midl_grow( MDB_IDL *idp, int num )
 	return 0;
 }

+int mdb_midl_need( MDB_IDL *idp, unsigned num )
+{
+	MDB_IDL ids = *idp;
+	num += ids[0];
+	if (num > ids[-1]) {
+		num = (num + num/4 + (256 + 2)) & -256;
+		if (!(ids = realloc(ids-1, num * sizeof(MDB_ID))))
+			return ENOMEM;
+		*ids++ = num - 2;
+		*idp = ids;
+	}
+	return 0;
+}
+
 int mdb_midl_append( MDB_IDL *idp, MDB_ID id )
 {
 	MDB_IDL ids = *idp;
@ -184,10 +181,40 @@ int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app )
 	return 0;
 }

+int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n )
+{
+	MDB_ID *ids = *idp, len = ids[0];
+	/* Too big? */
+	if (len + n > ids[-1]) {
+		if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
+			return ENOMEM;
+		ids = *idp;
+	}
+	ids[0] = len + n;
+	ids += len;
+	while (n)
+		ids[n--] = id++;
+	return 0;
+}
+
+void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge )
+{
+	MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i+j, total = k;
+	idl[0] = (MDB_ID)-1;		/* delimiter for idl scan below */
+	old_id = idl[j];
+	while (i) {
+		merge_id = merge[i--];
+		for (; old_id < merge_id; old_id = idl[--j])
+			idl[k--] = old_id;
+		idl[k--] = merge_id;
+	}
+	idl[0] = total;
+}
+
 /* Quicksort + Insertion sort for small arrays */

 #define SMALL	8
-#define	SWAP(a,b)	{ itmp=(a); (a)=(b); (b)=itmp; }
+#define	MIDL_SWAP(a,b)	{ itmp=(a); (a)=(b); (b)=itmp; }

 void
 mdb_midl_sort( MDB_IDL ids )
@ -215,15 +242,15 @@ mdb_midl_sort( MDB_IDL ids )
 			l = istack[jstack--];
 		} else {
 			k = (l + ir) >> 1;	/* Choose median of left, center, right */
-			SWAP(ids[k], ids[l+1]);
+			MIDL_SWAP(ids[k], ids[l+1]);
 			if (ids[l] < ids[ir]) {
-				SWAP(ids[l], ids[ir]);
+				MIDL_SWAP(ids[l], ids[ir]);
 			}
 			if (ids[l+1] < ids[ir]) {
-				SWAP(ids[l+1], ids[ir]);
+				MIDL_SWAP(ids[l+1], ids[ir]);
 			}
 			if (ids[l] < ids[l+1]) {
-				SWAP(ids[l], ids[l+1]);
+				MIDL_SWAP(ids[l], ids[l+1]);
 			}
 			i = l+1;
 			j = ir;
@ -232,7 +259,7 @@ mdb_midl_sort( MDB_IDL ids )
 				do i++; while(ids[i] > a);
 				do j--; while(ids[j] < a);
 				if (j < i) break;
-				SWAP(ids[i],ids[j]);
+				MIDL_SWAP(ids[i],ids[j]);
 			}
 			ids[l+1] = ids[j];
 			ids[j] = a;
@ -290,7 +317,6 @@ int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id )
 	unsigned x, i;

 	x = mdb_mid2l_search( ids, id->mid );
-	assert( x > 0 );

 	if( x < 1 ) {
 		/* internal error */
@ -328,7 +354,67 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id )
 	return 0;
 }

-/** @} */
-/** @} */
+#ifdef MDB_VL32
+unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id )
+{
+	/*
+	 * binary search of id in ids
+	 * if found, returns position of id
+	 * if not found, returns first position greater than id
+	 */
+	unsigned base = 0;
+	unsigned cursor = 1;
+	int val = 0;
+	unsigned n = (unsigned)ids[0].mid;

-/* http://gitorious.org/mdb/mdb/blobs/raw/mdb.master/libraries/liblmdb/midl.c */
+	while( 0 < n ) {
+		unsigned pivot = n >> 1;
+		cursor = base + pivot + 1;
+		val = CMP( id, ids[cursor].mid );
+
+		if( val < 0 ) {
+			n = pivot;
+
+		} else if ( val > 0 ) {
+			base = cursor;
+			n -= pivot + 1;
+
+		} else {
+			return cursor;
+		}
+	}
+
+	if( val > 0 ) {
+		++cursor;
+	}
+	return cursor;
+}
+
+int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id )
+{
+	unsigned x, i;
+
+	x = mdb_mid3l_search( ids, id->mid );
+
+	if( x < 1 ) {
+		/* internal error */
+		return -2;
+	}
+
+	if ( x <= ids[0].mid && ids[x].mid == id->mid ) {
+		/* duplicate */
+		return -1;
+	}
+
+	/* insert id */
+	ids[0].mid++;
+	for (i=(unsigned)ids[0].mid; i>x; i--)
+		ids[i] = ids[i-1];
+	ids[x] = *id;
+
+	return 0;
+}
+#endif /* MDB_VL32 */
+
+/** @} */
+/** @} */
--- a/c_src/midl.h
+++ b/c_src/midl.h
@ -1,5 +1,5 @@
 /**	@file midl.h
- *	@brief mdb ID List header file.
+ *	@brief LMDB ID List header file.
 *
 *	This file was originally part of back-bdb but has been
 *	modified for use in libmdb. Most of the macros defined
@ -11,7 +11,7 @@
 /* $OpenLDAP$ */
 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
 *
- * Copyright 2000-2013 The OpenLDAP Foundation.
+ * Copyright 2000-2016 The OpenLDAP Foundation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@ -27,22 +27,27 @@
 #define _MDB_MIDL_H_

 #include <stddef.h>
+#include <inttypes.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

-/** @defgroup internal	MDB Internals
+/** @defgroup internal	LMDB Internals
 *	@{
 */

 /** @defgroup idls	ID List Management
 *	@{
 */
-	/** A generic ID number. These were entryIDs in back-bdb.
+	/** A generic unsigned ID number. These were entryIDs in back-bdb.
 	 *	Preferably it should have the same size as a pointer.
 	 */
+#ifdef MDB_VL32
+typedef uint64_t MDB_ID;
+#else
 typedef size_t MDB_ID;
+#endif

 	/** An IDL is an ID List, a sorted array of IDs. The first
 	 * element of the array is a counter for how many actual
@ -52,67 +57,41 @@ typedef size_t MDB_ID;
 	 */
 typedef MDB_ID *MDB_IDL;

-#define	MDB_NOID	(~(MDB_ID)0)
-
 /* IDL sizes - likely should be even bigger
 *   limiting factors: sizeof(ID), thread stack size
 */
+#ifdef MDB_VL32
+#define	MDB_IDL_LOGN	10	/* DB_SIZE is 2^10, UM_SIZE is 2^11 */
+#else
 #define	MDB_IDL_LOGN	16	/* DB_SIZE is 2^16, UM_SIZE is 2^17 */
+#endif
 #define MDB_IDL_DB_SIZE		(1<<MDB_IDL_LOGN)
 #define MDB_IDL_UM_SIZE		(1<<(MDB_IDL_LOGN+1))
-#define MDB_IDL_UM_SIZEOF	(MDB_IDL_UM_SIZE * sizeof(MDB_ID))

 #define MDB_IDL_DB_MAX		(MDB_IDL_DB_SIZE-1)
-
 #define MDB_IDL_UM_MAX		(MDB_IDL_UM_SIZE-1)

-#define MDB_IDL_IS_RANGE(ids)	((ids)[0] == MDB_NOID)
-#define MDB_IDL_RANGE_SIZE		(3)
-#define MDB_IDL_RANGE_SIZEOF	(MDB_IDL_RANGE_SIZE * sizeof(MDB_ID))
-#define MDB_IDL_SIZEOF(ids)		((MDB_IDL_IS_RANGE(ids) \
-	? MDB_IDL_RANGE_SIZE : ((ids)[0]+1)) * sizeof(MDB_ID))
-
-#define MDB_IDL_RANGE_FIRST(ids)	((ids)[1])
-#define MDB_IDL_RANGE_LAST(ids)		((ids)[2])
-
-#define MDB_IDL_RANGE( ids, f, l ) \
-	do { \
-		(ids)[0] = MDB_NOID; \
-		(ids)[1] = (f);  \
-		(ids)[2] = (l);  \
-	} while(0)
-
-#define MDB_IDL_ZERO(ids) \
-	do { \
-		(ids)[0] = 0; \
-		(ids)[1] = 0; \
-		(ids)[2] = 0; \
-	} while(0)
-
+#define MDB_IDL_SIZEOF(ids)		(((ids)[0]+1) * sizeof(MDB_ID))
 #define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 )
-#define MDB_IDL_IS_ALL( range, ids ) ( (ids)[0] == MDB_NOID \
-	&& (ids)[1] <= (range)[1] && (range)[2] <= (ids)[2] )
-
 #define MDB_IDL_CPY( dst, src ) (memcpy( dst, src, MDB_IDL_SIZEOF( src ) ))
-
-#define MDB_IDL_ID( bdb, ids, id ) MDB_IDL_RANGE( ids, id, ((bdb)->bi_lastid) )
-#define MDB_IDL_ALL( bdb, ids ) MDB_IDL_RANGE( ids, 1, ((bdb)->bi_lastid) )
-
 #define MDB_IDL_FIRST( ids )	( (ids)[1] )
-#define MDB_IDL_LAST( ids )		( MDB_IDL_IS_RANGE(ids) \
-	? (ids)[2] : (ids)[(ids)[0]] )
+#define MDB_IDL_LAST( ids )		( (ids)[(ids)[0]] )

-#define MDB_IDL_N( ids )		( MDB_IDL_IS_RANGE(ids) \
-	? ((ids)[2]-(ids)[1])+1 : (ids)[0] )
+	/** Current max length of an #mdb_midl_alloc()ed IDL */
+#define MDB_IDL_ALLOCLEN( ids )	( (ids)[-1] )

-#if 0	/* superseded by append/sort */
-	/** Insert an ID into an IDL.
-	 * @param[in,out] ids	The IDL to insert into.
-	 * @param[in] id	The ID to insert.
-	 * @return	0 on success, -1 if the ID was already present in the IDL.
+	/** Append ID to IDL. The IDL must be big enough. */
+#define mdb_midl_xappend(idl, id) do { \
+		MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
+		xidl[xlen] = (id); \
+	} while (0)
+
+	/** Search for an ID in an IDL.
+	 * @param[in] ids	The IDL to search.
+	 * @param[in] id	The ID to search for.
+	 * @return	The index of the first ID greater than or equal to \b id.
 	 */
-int mdb_midl_insert( MDB_IDL ids, MDB_ID id );
-#endif
+unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id );

 	/** Allocate an IDL.
 	 * Allocates memory for an IDL of the given size.
@ -128,32 +107,44 @@ void mdb_midl_free(MDB_IDL ids);
 	/** Shrink an IDL.
 	 * Return the IDL to the default size if it has grown larger.
 	 * @param[in,out] idp	Address of the IDL to shrink.
-	 * @return	0 on no change, non-zero if shrunk.
 	 */
-int mdb_midl_shrink(MDB_IDL *idp);
+void mdb_midl_shrink(MDB_IDL *idp);

-	/** Grow an IDL.
-	 * Add room for num additional elements.
-	 * @param[in,out] idp	Address of the IDL to grow.
-	 * @param[in] num	Number of elements to add.
-	 * @return	0 on success, -1 on failure.
+	/** Make room for num additional elements in an IDL.
+	 * @param[in,out] idp	Address of the IDL.
+	 * @param[in] num	Number of elements to make room for.
+	 * @return	0 on success, ENOMEM on failure.
 	 */
-int mdb_midl_grow(MDB_IDL *idp, int num);
+int mdb_midl_need(MDB_IDL *idp, unsigned num);

 	/** Append an ID onto an IDL.
 	 * @param[in,out] idp	Address of the IDL to append to.
 	 * @param[in] id	The ID to append.
-	 * @return	0 on success, -1 if the IDL is too large.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
 	 */
 int mdb_midl_append( MDB_IDL *idp, MDB_ID id );

 	/** Append an IDL onto an IDL.
 	 * @param[in,out] idp	Address of the IDL to append to.
 	 * @param[in] app	The IDL to append.
-	 * @return	0 on success, -1 if the IDL is too large.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
 	 */
 int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app );

+	/** Append an ID range onto an IDL.
+	 * @param[in,out] idp	Address of the IDL to append to.
+	 * @param[in] id	The lowest ID to append.
+	 * @param[in] n		Number of IDs to append.
+	 * @return	0 on success, ENOMEM if the IDL is too large.
+	 */
+int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n );
+
+	/** Merge an IDL onto an IDL. The destination IDL must be big enough.
+	 * @param[in] idl	The IDL to merge into.
+	 * @param[in] merge	The IDL to merge.
+	 */
+void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge );
+
 	/** Sort an IDL.
 	 * @param[in,out] ids	The IDL to sort.
 	 */
@ -195,11 +186,23 @@ int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id );
 	 */
 int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id );

+#ifdef MDB_VL32
+typedef struct MDB_ID3 {
+	MDB_ID mid;		/**< The ID */
+	void *mptr;		/**< The pointer */
+	unsigned int mcnt;		/**< Number of pages */
+	unsigned int mref;		/**< Refcounter */
+} MDB_ID3;
+
+typedef MDB_ID3 *MDB_ID3L;
+
+unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id );
+int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id );
+
+#endif /* MDB_VL32 */
 /** @} */
 /** @} */
 #ifdef __cplusplus
 }
 #endif
 #endif	/* _MDB_MIDL_H_ */
-
-/* http://gitorious.org/mdb/mdb/blobs/raw/mdb.master/libraries/liblmdb/midl.h */
--- a/c_src/queue.h
+++ b/c_src/queue.h
@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)queue.h	8.5 (Berkeley) 8/20/94
+ * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $
+ */
+
+#ifndef	_DB_QUEUE_H_
+#define	_DB_QUEUE_H_
+
+#ifndef __offsetof
+#define __offsetof(st, m) \
+          ((size_t) ( (char *)&((st *)0)->m - (char *)0 ))
+#endif
+
+#ifndef __containerof
+#define __containerof(ptr, type, member) ({ \
+          const typeof( ((type *)0)->member ) *__mptr = (ptr);              \
+                (type *)( (char *)__mptr - __offsetof(type,member) );})
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction.  Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ *			SLIST	LIST	STAILQ	TAILQ
+ * _HEAD		+	+	+	+
+ * _HEAD_INITIALIZER	+	+	+	+
+ * _ENTRY		+	+	+	+
+ * _INIT		+	+	+	+
+ * _EMPTY		+	+	+	+
+ * _FIRST		+	+	+	+
+ * _NEXT		+	+	+	+
+ * _PREV		-	-	-	+
+ * _LAST		-	-	+	+
+ * _FOREACH		+	+	+	+
+ * _FOREACH_REVERSE	-	-	-	+
+ * _INSERT_HEAD		+	+	+	+
+ * _INSERT_BEFORE	-	+	-	+
+ * _INSERT_AFTER	+	+	+	+
+ * _INSERT_TAIL		-	-	+	+
+ * _CONCAT		-	-	+	+
+ * _REMOVE_HEAD		+	-	+	-
+ * _REMOVE		+	+	+	+
+ *
+ */
+
+/*
+ * XXX
+ * We #undef all of the macros because there are incompatible versions of this
+ * file and these macros on various systems.  What makes the problem worse is
+ * they are included and/or defined by system include files which we may have
+ * already loaded into Berkeley DB before getting here.  For example, FreeBSD's
+ * <rpc/rpc.h> includes its system <sys/queue.h>, and VxWorks UnixLib.h defines
+ * several of the LIST_XXX macros.  Visual C.NET 7.0 also defines some of these
+ * same macros in Vc7\PlatformSDK\Include\WinNT.h.  Make sure we use ours.
+ */
+#undef LIST_EMPTY
+#undef LIST_ENTRY
+#undef LIST_FIRST
+#undef LIST_FOREACH
+#undef LIST_HEAD
+#undef LIST_HEAD_INITIALIZER
+#undef LIST_INIT
+#undef LIST_INSERT_AFTER
+#undef LIST_INSERT_BEFORE
+#undef LIST_INSERT_HEAD
+#undef LIST_NEXT
+#undef LIST_REMOVE
+#undef QMD_TRACE_ELEM
+#undef QMD_TRACE_HEAD
+#undef QUEUE_MACRO_DEBUG
+#undef SLIST_EMPTY
+#undef SLIST_ENTRY
+#undef SLIST_FIRST
+#undef SLIST_FOREACH
+#undef SLIST_FOREACH_PREVPTR
+#undef SLIST_HEAD
+#undef SLIST_HEAD_INITIALIZER
+#undef SLIST_INIT
+#undef SLIST_INSERT_AFTER
+#undef SLIST_INSERT_HEAD
+#undef SLIST_NEXT
+#undef SLIST_REMOVE
+#undef SLIST_REMOVE_HEAD
+#undef STAILQ_CONCAT
+#undef STAILQ_EMPTY
+#undef STAILQ_ENTRY
+#undef STAILQ_FIRST
+#undef STAILQ_FOREACH
+#undef STAILQ_HEAD
+#undef STAILQ_HEAD_INITIALIZER
+#undef STAILQ_INIT
+#undef STAILQ_INSERT_AFTER
+#undef STAILQ_INSERT_HEAD
+#undef STAILQ_INSERT_TAIL
+#undef STAILQ_LAST
+#undef STAILQ_NEXT
+#undef STAILQ_REMOVE
+#undef STAILQ_REMOVE_HEAD
+#undef STAILQ_REMOVE_HEAD_UNTIL
+#undef TAILQ_CONCAT
+#undef TAILQ_EMPTY
+#undef TAILQ_ENTRY
+#undef TAILQ_FIRST
+#undef TAILQ_FOREACH
+#undef TAILQ_FOREACH_REVERSE
+#undef TAILQ_HEAD
+#undef TAILQ_HEAD_INITIALIZER
+#undef TAILQ_INIT
+#undef TAILQ_INSERT_AFTER
+#undef TAILQ_INSERT_BEFORE
+#undef TAILQ_INSERT_HEAD
+#undef TAILQ_INSERT_TAIL
+#undef TAILQ_LAST
+#undef TAILQ_NEXT
+#undef TAILQ_PREV
+#undef TAILQ_REMOVE
+#undef TRACEBUF
+#undef TRASHIT
+
+#define	QUEUE_MACRO_DEBUG 0
+#if QUEUE_MACRO_DEBUG
+/* Store the last 2 places the queue element or head was altered */
+struct qm_trace {
+	char * lastfile;
+	int lastline;
+	char * prevfile;
+	int prevline;
+};
+
+#define	TRACEBUF	struct qm_trace trace;
+#define	TRASHIT(x)	do {(x) = (void *)-1;} while (0)
+
+#define	QMD_TRACE_HEAD(head) do {					\
+	(head)->trace.prevline = (head)->trace.lastline;		\
+	(head)->trace.prevfile = (head)->trace.lastfile;		\
+	(head)->trace.lastline = __LINE__;				\
+	(head)->trace.lastfile = __FILE__;				\
+} while (0)
+
+#define	QMD_TRACE_ELEM(elem) do {					\
+	(elem)->trace.prevline = (elem)->trace.lastline;		\
+	(elem)->trace.prevfile = (elem)->trace.lastfile;		\
+	(elem)->trace.lastline = __LINE__;				\
+	(elem)->trace.lastfile = __FILE__;				\
+} while (0)
+
+#else
+#define	QMD_TRACE_ELEM(elem)
+#define	QMD_TRACE_HEAD(head)
+#define	TRACEBUF
+#define	TRASHIT(x)
+#endif	/* QUEUE_MACRO_DEBUG */
+
+/*
+ * Singly-linked List declarations.
+ */
+#define	SLIST_HEAD(name, type)						\
+struct name {								\
+	struct type *slh_first;	/* first element */			\
+}
+
+#define	SLIST_HEAD_INITIALIZER(head)					\
+	{ NULL }
+
+#define	SLIST_ENTRY(type)						\
+struct {								\
+	struct type *sle_next;	/* next element */			\
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define	SLIST_EMPTY(head)	((head)->slh_first == NULL)
+
+#define	SLIST_FIRST(head)	((head)->slh_first)
+
+#define	SLIST_FOREACH(var, head, field)					\
+	for ((var) = SLIST_FIRST((head));				\
+	    (var);							\
+	    (var) = SLIST_NEXT((var), field))
+
+#define	SLIST_FOREACH_PREVPTR(var, varp, head, field)			\
+	for ((varp) = &SLIST_FIRST((head));				\
+	    ((var) = *(varp)) != NULL;					\
+	    (varp) = &SLIST_NEXT((var), field))
+
+#define	SLIST_INIT(head) do {						\
+	SLIST_FIRST((head)) = NULL;					\
+} while (0)
+
+#define	SLIST_INSERT_AFTER(slistelm, elm, field) do {			\
+	SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field);	\
+	SLIST_NEXT((slistelm), field) = (elm);				\
+} while (0)
+
+#define	SLIST_INSERT_HEAD(head, elm, field) do {			\
+	SLIST_NEXT((elm), field) = SLIST_FIRST((head));			\
+	SLIST_FIRST((head)) = (elm);					\
+} while (0)
+
+#define	SLIST_NEXT(elm, field)	((elm)->field.sle_next)
+
+#define	SLIST_REMOVE(head, elm, type, field) do {			\
+	if (SLIST_FIRST((head)) == (elm)) {				\
+		SLIST_REMOVE_HEAD((head), field);			\
+	}								\
+	else {								\
+		struct type *curelm = SLIST_FIRST((head));		\
+		while (SLIST_NEXT(curelm, field) != (elm))		\
+			curelm = SLIST_NEXT(curelm, field);		\
+		SLIST_NEXT(curelm, field) =				\
+		    SLIST_NEXT(SLIST_NEXT(curelm, field), field);	\
+	}								\
+} while (0)
+
+#define	SLIST_REMOVE_HEAD(head, field) do {				\
+	SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field);	\
+} while (0)
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define	STAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *stqh_first;/* first element */			\
+	struct type **stqh_last;/* addr of last next element */		\
+}
+
+#define	STAILQ_HEAD_INITIALIZER(head)					\
+	{ NULL, &(head).stqh_first }
+
+#define	STAILQ_ENTRY(type)						\
+struct {								\
+	struct type *stqe_next;	/* next element */			\
+}
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define	STAILQ_CONCAT(head1, head2) do {				\
+	if (!STAILQ_EMPTY((head2))) {					\
+		*(head1)->stqh_last = (head2)->stqh_first;		\
+		(head1)->stqh_last = (head2)->stqh_last;		\
+		STAILQ_INIT((head2));					\
+	}								\
+} while (0)
+
+#define	STAILQ_EMPTY(head)	((head)->stqh_first == NULL)
+
+#define	STAILQ_FIRST(head)	((head)->stqh_first)
+
+#define	STAILQ_FOREACH(var, head, field)				\
+	for ((var) = STAILQ_FIRST((head));				\
+	   (var);							\
+	   (var) = STAILQ_NEXT((var), field))
+
+#define	STAILQ_INIT(head) do {						\
+	STAILQ_FIRST((head)) = NULL;					\
+	(head)->stqh_last = &STAILQ_FIRST((head));			\
+} while (0)
+
+#define	STAILQ_INSERT_AFTER(head, tqelm, elm, field) do {		\
+	if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+	STAILQ_NEXT((tqelm), field) = (elm);				\
+} while (0)
+
+#define	STAILQ_INSERT_HEAD(head, elm, field) do {			\
+	if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL)	\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+	STAILQ_FIRST((head)) = (elm);					\
+} while (0)
+
+#define	STAILQ_INSERT_TAIL(head, elm, field) do {			\
+	STAILQ_NEXT((elm), field) = NULL;				\
+	*(head)->stqh_last = (elm);					\
+	(head)->stqh_last = &STAILQ_NEXT((elm), field);			\
+} while (0)
+
+#define	STAILQ_LAST(head, type, field)					\
+	(STAILQ_EMPTY((head)) ?						\
+		NULL :							\
+		((struct type *)					\
+		((char *)((head)->stqh_last) - __offsetof(struct type, field))))
+
+#define	STAILQ_NEXT(elm, field)	((elm)->field.stqe_next)
+
+#define	STAILQ_REMOVE(head, elm, type, field) do {			\
+	if (STAILQ_FIRST((head)) == (elm)) {				\
+		STAILQ_REMOVE_HEAD((head), field);			\
+	}								\
+	else {								\
+		struct type *curelm = STAILQ_FIRST((head));		\
+		while (STAILQ_NEXT(curelm, field) != (elm))		\
+			curelm = STAILQ_NEXT(curelm, field);		\
+		if ((STAILQ_NEXT(curelm, field) =			\
+		     STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\
+			(head)->stqh_last = &STAILQ_NEXT((curelm), field);\
+	}								\
+} while (0)
+
+#define	STAILQ_REMOVE_HEAD(head, field) do {				\
+	if ((STAILQ_FIRST((head)) =					\
+	     STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL)		\
+		(head)->stqh_last = &STAILQ_FIRST((head));		\
+} while (0)
+
+#define	STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do {			\
+	if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL)	\
+		(head)->stqh_last = &STAILQ_FIRST((head));		\
+} while (0)
+
+/*
+ * List declarations.
+ */
+#define	LIST_HEAD(name, type)						\
+struct name {								\
+	struct type *lh_first;	/* first element */			\
+}
+
+#define	LIST_HEAD_INITIALIZER(head)					\
+	{ NULL }
+
+#define	LIST_ENTRY(type)						\
+struct {								\
+	struct type *le_next;	/* next element */			\
+	struct type **le_prev;	/* address of previous next element */	\
+}
+
+/*
+ * List functions.
+ */
+
+#define	LIST_EMPTY(head)	((head)->lh_first == NULL)
+
+#define	LIST_FIRST(head)	((head)->lh_first)
+
+#define	LIST_FOREACH(var, head, field)					\
+	for ((var) = LIST_FIRST((head));				\
+	    (var);							\
+	    (var) = LIST_NEXT((var), field))
+
+#define	LIST_INIT(head) do {						\
+	LIST_FIRST((head)) = NULL;					\
+} while (0)
+
+#define	LIST_INSERT_AFTER(listelm, elm, field) do {			\
+	if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\
+		LIST_NEXT((listelm), field)->field.le_prev =		\
+		    &LIST_NEXT((elm), field);				\
+	LIST_NEXT((listelm), field) = (elm);				\
+	(elm)->field.le_prev = &LIST_NEXT((listelm), field);		\
+} while (0)
+
+#define	LIST_INSERT_BEFORE(listelm, elm, field) do {			\
+	(elm)->field.le_prev = (listelm)->field.le_prev;		\
+	LIST_NEXT((elm), field) = (listelm);				\
+	*(listelm)->field.le_prev = (elm);				\
+	(listelm)->field.le_prev = &LIST_NEXT((elm), field);		\
+} while (0)
+
+#define	LIST_INSERT_HEAD(head, elm, field) do {				\
+	if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL)	\
+		LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\
+	LIST_FIRST((head)) = (elm);					\
+	(elm)->field.le_prev = &LIST_FIRST((head));			\
+} while (0)
+
+#define	LIST_NEXT(elm, field)	((elm)->field.le_next)
+
+#define	LIST_REMOVE(elm, field) do {					\
+	if (LIST_NEXT((elm), field) != NULL)				\
+		LIST_NEXT((elm), field)->field.le_prev =		\
+		    (elm)->field.le_prev;				\
+	*(elm)->field.le_prev = LIST_NEXT((elm), field);		\
+} while (0)
+
+/*
+ * Tail queue declarations.
+ */
+#define	TAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *tqh_first;	/* first element */			\
+	struct type **tqh_last;	/* addr of last next element */		\
+	TRACEBUF							\
+}
+
+#define	TAILQ_HEAD_INITIALIZER(head)					\
+	{ NULL, &(head).tqh_first }
+
+#define	TAILQ_ENTRY(type)						\
+struct {								\
+	struct type *tqe_next;	/* next element */			\
+	struct type **tqe_prev;	/* address of previous next element */	\
+	TRACEBUF							\
+}
+
+/*
+ * Tail queue functions.
+ */
+#define	TAILQ_CONCAT(head1, head2, field) do {				\
+	if (!TAILQ_EMPTY(head2)) {					\
+		*(head1)->tqh_last = (head2)->tqh_first;		\
+		(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last;	\
+		(head1)->tqh_last = (head2)->tqh_last;			\
+		TAILQ_INIT((head2));					\
+		QMD_TRACE_HEAD(head);					\
+		QMD_TRACE_HEAD(head2);					\
+	}								\
+} while (0)
+
+#define	TAILQ_EMPTY(head)	((head)->tqh_first == NULL)
+
+#define	TAILQ_FIRST(head)	((head)->tqh_first)
+
+#define	TAILQ_FOREACH(var, head, field)					\
+	for ((var) = TAILQ_FIRST((head));				\
+	    (var);							\
+	    (var) = TAILQ_NEXT((var), field))
+
+#define	TAILQ_FOREACH_REVERSE(var, head, headname, field)		\
+	for ((var) = TAILQ_LAST((head), headname);			\
+	    (var);							\
+	    (var) = TAILQ_PREV((var), headname, field))
+
+#define	TAILQ_INIT(head) do {						\
+	TAILQ_FIRST((head)) = NULL;					\
+	(head)->tqh_last = &TAILQ_FIRST((head));			\
+	QMD_TRACE_HEAD(head);						\
+} while (0)
+
+#define	TAILQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
+		TAILQ_NEXT((elm), field)->field.tqe_prev =		\
+		    &TAILQ_NEXT((elm), field);				\
+	else {								\
+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\
+		QMD_TRACE_HEAD(head);					\
+	}								\
+	TAILQ_NEXT((listelm), field) = (elm);				\
+	(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field);		\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+	QMD_TRACE_ELEM(&listelm->field);				\
+} while (0)
+
+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
+	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
+	TAILQ_NEXT((elm), field) = (listelm);				\
+	*(listelm)->field.tqe_prev = (elm);				\
+	(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field);		\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+	QMD_TRACE_ELEM(&listelm->field);				\
+} while (0)
+
+#define	TAILQ_INSERT_HEAD(head, elm, field) do {			\
+	if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL)	\
+		TAILQ_FIRST((head))->field.tqe_prev =			\
+		    &TAILQ_NEXT((elm), field);				\
+	else								\
+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\
+	TAILQ_FIRST((head)) = (elm);					\
+	(elm)->field.tqe_prev = &TAILQ_FIRST((head));			\
+	QMD_TRACE_HEAD(head);						\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+#define	TAILQ_INSERT_TAIL(head, elm, field) do {			\
+	TAILQ_NEXT((elm), field) = NULL;				\
+	(elm)->field.tqe_prev = (head)->tqh_last;			\
+	*(head)->tqh_last = (elm);					\
+	(head)->tqh_last = &TAILQ_NEXT((elm), field);			\
+	QMD_TRACE_HEAD(head);						\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+#define	TAILQ_LAST(head, headname)					\
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define	TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define	TAILQ_PREV(elm, headname, field)				\
+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define	TAILQ_REMOVE(head, elm, field) do {				\
+	if ((TAILQ_NEXT((elm), field)) != NULL)				\
+		TAILQ_NEXT((elm), field)->field.tqe_prev =		\
+		    (elm)->field.tqe_prev;				\
+	else {								\
+		(head)->tqh_last = (elm)->field.tqe_prev;		\
+		QMD_TRACE_HEAD(head);					\
+	}								\
+	*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field);		\
+	TRASHIT((elm)->field.tqe_next);					\
+	TRASHIT((elm)->field.tqe_prev);					\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+/*
+ * Circular queue definitions.
+ */
+#define	CIRCLEQ_HEAD(name, type)					\
+struct name {								\
+	struct type *cqh_first;		/* first element */		\
+	struct type *cqh_last;		/* last element */		\
+}
+
+#define	CIRCLEQ_HEAD_INITIALIZER(head)					\
+	{ (void *)&head, (void *)&head }
+
+#define	CIRCLEQ_ENTRY(type)						\
+struct {								\
+	struct type *cqe_next;		/* next element */		\
+	struct type *cqe_prev;		/* previous element */		\
+}
+
+/*
+ * Circular queue functions.
+ */
+#define	CIRCLEQ_INIT(head) do {						\
+	(head)->cqh_first = (void *)(head);				\
+	(head)->cqh_last = (void *)(head);				\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	(elm)->field.cqe_next = (listelm)->field.cqe_next;		\
+	(elm)->field.cqe_prev = (listelm);				\
+	if ((listelm)->field.cqe_next == (void *)(head))		\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(listelm)->field.cqe_next->field.cqe_prev = (elm);	\
+	(listelm)->field.cqe_next = (elm);				\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do {		\
+	(elm)->field.cqe_next = (listelm);				\
+	(elm)->field.cqe_prev = (listelm)->field.cqe_prev;		\
+	if ((listelm)->field.cqe_prev == (void *)(head))		\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(listelm)->field.cqe_prev->field.cqe_next = (elm);	\
+	(listelm)->field.cqe_prev = (elm);				\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_INSERT_HEAD(head, elm, field) do {			\
+	(elm)->field.cqe_next = (head)->cqh_first;			\
+	(elm)->field.cqe_prev = (void *)(head);				\
+	if ((head)->cqh_last == (void *)(head))				\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(head)->cqh_first->field.cqe_prev = (elm);		\
+	(head)->cqh_first = (elm);					\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_INSERT_TAIL(head, elm, field) do {			\
+	(elm)->field.cqe_next = (void *)(head);				\
+	(elm)->field.cqe_prev = (head)->cqh_last;			\
+	if ((head)->cqh_first == (void *)(head))			\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(head)->cqh_last->field.cqe_next = (elm);		\
+	(head)->cqh_last = (elm);					\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_REMOVE(head, elm, field) do {				\
+	if ((elm)->field.cqe_next == (void *)(head))			\
+		(head)->cqh_last = (elm)->field.cqe_prev;		\
+	else								\
+		(elm)->field.cqe_next->field.cqe_prev =			\
+		    (elm)->field.cqe_prev;				\
+	if ((elm)->field.cqe_prev == (void *)(head))			\
+		(head)->cqh_first = (elm)->field.cqe_next;		\
+	else								\
+		(elm)->field.cqe_prev->field.cqe_next =			\
+		    (elm)->field.cqe_next;				\
+} while (/*CONSTCOND*/0)
+
+#define	CIRCLEQ_FOREACH(var, head, field)				\
+	for ((var) = ((head)->cqh_first);				\
+		(var) != (const void *)(head);				\
+		(var) = ((var)->field.cqe_next))
+
+#define	CIRCLEQ_FOREACH_REVERSE(var, head, field)			\
+	for ((var) = ((head)->cqh_last);				\
+		(var) != (const void *)(head);				\
+		(var) = ((var)->field.cqe_prev))
+
+/*
+ * Circular queue access methods.
+ */
+#define	CIRCLEQ_EMPTY(head)		((head)->cqh_first == (void *)(head))
+#define	CIRCLEQ_FIRST(head)		((head)->cqh_first)
+#define	CIRCLEQ_LAST(head)		((head)->cqh_last)
+#define	CIRCLEQ_NEXT(elm, field)	((elm)->field.cqe_next)
+#define	CIRCLEQ_PREV(elm, field)	((elm)->field.cqe_prev)
+
+#define CIRCLEQ_LOOP_NEXT(head, elm, field)				\
+	(((elm)->field.cqe_next == (void *)(head))			\
+	    ? ((head)->cqh_first)					\
+	    : (elm->field.cqe_next))
+#define CIRCLEQ_LOOP_PREV(head, elm, field)				\
+	(((elm)->field.cqe_prev == (void *)(head))			\
+	    ? ((head)->cqh_last)					\
+	    : (elm->field.cqe_prev))
+
+
+#if defined(__cplusplus)
+}
+#endif
+#endif	/* !_DB_QUEUE_H_ */
--- a/c_src/stats.h
+++ b/c_src/stats.h
@ -1,217 +0,0 @@
-/*
- * stats: measure all the things
- *
- * Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
- * Author: Gregory Burd <greg@basho.com> <greg@burd.me>
- *
- * This file is provided to you under the Apache License,
- * Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License.  You may obtain
- * a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef __STATS_H__
-#define __STATS_H__
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include "duration.h"
-
-/**
- * Calculate the log2 of 64bit unsigned integers.
- */
-#ifdef __GCC__
-#define LOG2(X) ((unsigned) ((8 * (sizeof(uint64_t) - 1))  - __builtin_clzll((X))))
-#else
-static unsigned int __log2_64(uint64_t x) {
-     static const int tab64[64] = {
-          63,  0, 58,  1, 59, 47, 53,  2,
-          60, 39, 48, 27, 54, 33, 42,  3,
-          61, 51, 37, 40, 49, 18, 28, 20,
-          55, 30, 34, 11, 43, 14, 22,  4,
-          62, 57, 46, 52, 38, 26, 32, 41,
-          50, 36, 17, 19, 29, 10, 13, 21,
-          56, 45, 25, 31, 35, 16,  9, 12,
-          44, 24, 15,  8, 23,  7,  6,  5};
-     if (x == 0) return 0;
-     uint64_t v = x;
-     v |= v >> 1;
-     v |= v >> 2;
-     v |= v >> 4;
-     v |= v >> 8;
-     v |= v >> 16;
-     v |= v >> 32;
-     return tab64[((uint64_t)((v - (v >> 1)) * 0x07EDD5E59A4E28C2)) >> 58];
-}
-#define LOG2(X) __log2_64(X)
-#endif
-
-#define STAT_DEF(name) struct name ## _stat name ## _stat;
-
-#define STAT_DECL(name, nsamples)                                       \
-     struct name ## _stat {                                             \
-         duration_t d;                                                  \
-         uint64_t histogram[64];                                        \
-         uint32_t h, n;                                                 \
-         uint64_t samples[nsamples];                                    \
-         uint64_t min, max;                                             \
-         double mean;                                                   \
-     };                                                                 \
-     static inline double name ## _stat_mean(struct name ## _stat *s) { \
-         uint32_t t = s->h;                                             \
-         uint32_t h = (s->h + 1) % nsamples;                            \
-         double mean = 0;                                               \
-         while (h != t) {                                               \
-             mean += s->samples[h];                                     \
-             h = (h + 1) % nsamples;                                    \
-         }                                                              \
-         if (mean > 0)                                                  \
-             mean /= (double)(s->n < nsamples ? s->n : nsamples);       \
-         return mean;                                                   \
-     }                                                                  \
-     static inline double name ## _stat_mean_lg2(struct name ## _stat *s) { \
-         uint32_t i;                                                    \
-         double mean = 0;                                               \
-         for (i = 0; i < 64; i++)                                       \
-             mean += (s->histogram[i] * i);                             \
-         if (mean > 0)                                                  \
-             mean /= (double)s->n;                                      \
-         return mean;                                                   \
-     }                                                                  \
-     static inline uint64_t name ## _stat_tick(struct name ## _stat *s) \
-     {                                                                  \
-         uint64_t t = ts(s->d.unit);                                    \
-         s->d.then = t;                                                 \
-         return t;                                                      \
-     }                                                                  \
-     static inline void name ## _stat_reset(struct name ## _stat *s)    \
-     {                                                                  \
-         s->min = ~0;                                                   \
-         s->max = 0;                                                    \
-         s->h = 0;                                                      \
-         memset(&s->histogram, 0, sizeof(uint64_t) * 64);               \
-         memset(&s->samples, 0, sizeof(uint64_t) * nsamples);           \
-     }                                                                  \
-     static inline uint64_t name ## _stat_tock(struct name ## _stat *s) \
-     {                                                                  \
-         uint64_t now = ts(s->d.unit);                                  \
-         uint64_t elapsed = now - s->d.then;                            \
-         uint32_t i = s->h;                                             \
-         if (s->n == nsamples) {                                        \
-             s->mean = (s->mean + name ## _stat_mean(s)) / 2.0;         \
-             if (s->n >= 4294967295)                                    \
-                 name ## _stat_reset(s);                                \
-         }                                                              \
-         s->h = (s->h + 1) % nsamples;                                  \
-         s->samples[i] = elapsed;                                       \
-         if (elapsed < s->min)                                          \
-             s->min = elapsed;                                          \
-         if (elapsed > s->max)                                          \
-             s->max = elapsed;                                          \
-         s->histogram[LOG2(elapsed)]++;                                 \
-         s->n++;                                                        \
-         s->d.then = ts(s->d.unit);                                     \
-         return elapsed;                                                \
-     }                                                                  \
-     static void name ## _stat_print_histogram(struct name ## _stat *s, const char *mod) \
-     {                                                                  \
-         uint8_t logs[64];                                              \
-         uint8_t i, j, max_log = 0;                                     \
-         double m = 0.0;						\
-                                                                        \
-	 if (s->n < nsamples)						\
-	     return;							\
-									\
-         fprintf(stderr, "\n%s:async_nif request latency histogram:\n", mod); \
-	 m = (s->mean + name ## _stat_mean(s) / 2.0);			\
-         for (i = 0; i < 64; i++) {                                     \
-             logs[i] = LOG2(s->histogram[i]);                           \
-             if (logs[i] > max_log)                                     \
-                 max_log = logs[i];                                     \
-         }                                                              \
-         for (i = max_log; i > 0; i--) {                                \
-             if (!(i % 10))                                             \
-                 fprintf(stderr, "2^%2d ", i);                          \
-             else                                                       \
-                 fprintf(stderr, "     ");                              \
-             for(j = 0; j < 64; j++)                                    \
-                 fprintf(stderr, logs[j] >= i ?  "•" : " ");            \
-             fprintf(stderr, "\n");                                     \
-         }                                                              \
-         if (max_log == 100) {                                          \
-             fprintf(stderr, "[empty]\n");                              \
-         } else {                                                       \
-             fprintf(stderr, "     ns        μs        ms        s         ks\n"); \
-             fprintf(stderr, "min: ");                                  \
-             if (s->min < 1000)                                         \
-                 fprintf(stderr, "%lu (ns)", s->min);                   \
-             else if (s->min < 1000000)                                 \
-                 fprintf(stderr, "%.2f (μs)", s->min / 1000.0);         \
-             else if (s->min < 1000000000)                              \
-                 fprintf(stderr, "%.2f (ms)", s->min / 1000000.0);      \
-             else if (s->min < 1000000000000)                           \
-                 fprintf(stderr, "%.2f (s)", s->min / 1000000000.0);    \
-             fprintf(stderr, "  max: ");                                \
-             if (s->max < 1000)                                         \
-                 fprintf(stderr, "%lu (ns)", s->max);                   \
-             else if (s->max < 1000000)                                 \
-                 fprintf(stderr, "%.2f (μs)", s->max / 1000.0);         \
-             else if (s->max < 1000000000)                              \
-                 fprintf(stderr, "%.2f (ms)", s->max / 1000000.0);      \
-             else if (s->max < 1000000000000)                           \
-                 fprintf(stderr, "%.2f (s)", s->max / 1000000000.0);    \
-             fprintf(stderr, "  mean: ");                               \
-             if (m < 1000)                                              \
-                 fprintf(stderr, "%.2f (ns)", m);                       \
-             else if (m < 1000000)                                      \
-                 fprintf(stderr, "%.2f (μs)", m / 1000.0);              \
-             else if (m < 1000000000)                                   \
-                 fprintf(stderr, "%.2f (ms)", m / 1000000.0);           \
-             else if (m < 1000000000000)                                \
-                 fprintf(stderr, "%.2f (s)", m / 1000000000.0);         \
-             fprintf(stderr, "\n");                                     \
-         }                                                              \
-         fflush(stderr);                                                \
-     }
-
-
-#define STAT_INIT(var, name)                                            \
-     var->name ## _stat.min = ~0;                                       \
-     var->name ## _stat.max = 0;                                        \
-     var->name ## _stat.mean = 0.0;                                     \
-     var->name ## _stat.h = 0;                                          \
-     var->name ## _stat.d.then = 0;                                     \
-     var->name ## _stat.d.unit = ns;
-
-#define STAT_TICK(var, name) name ## _stat_tick(&var->name ## _stat)
-
-#define STAT_TOCK(var, name) name ## _stat_tock(&var->name ## _stat)
-
-#define STAT_RESET(var, name) name ## _stat_reset(&var->name ## _stat)
-
-#define STAT_MEAN_LOG2_SAMPLE(var, name)                                \
-    name ## _stat_mean_lg2(&var->name ## _stat)
-
-#define STAT_MEAN_SAMPLE(var, name)                                     \
-    name ## _stat_mean(&var->name ## _stat)
-
-#define STAT_PRINT(var, name, mod)                                      \
-    name ## _stat_print_histogram(&var->name ## _stat, mod)
-
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif // __STATS_H__
--- a/rebar.config
+++ b/rebar.config
@ -1,7 +1,7 @@
 %% -*- erlang -*-
 %% ex: ft=erlang ts=4 sw=4 et

-{require_otp_vsn, "R1[567]"}.
+{require_otp_vsn, "R1[56]|1[78]"}.

 {cover_enabled, true}.

@ -29,14 +29,11 @@
 {eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.

 {port_specs, [
-              {"unix",   "priv/lmdb.so",  ["c_src/*.c"]},
-              {"linux",  "priv/lmdb.so",  ["c_src/*.c"]},
-              {"darwin", "priv/lmdb.so",  ["c_src/*.c"]},
-              {"win32",  "priv/lmdb.dll", ["c_src/*.c"]}
+	      {"priv/lmdb.so", ["c_src/*.c"]}
             ]}.

 {port_env, [
-    {"DRV_CFLAGS",  "$DRV_CFLAGS -O3 -fPIC -march=native -mtune=native -Wall -Wextra -Werror"}
+    {"DRV_CFLAGS",  "$DRV_CFLAGS -O3 -fPIC -march=native -mtune=native -Wall -Wextra"}
 ]}.

 % for debugging use
--- a/src/async_nif.hrl
+++ b/src/async_nif.hrl
@ -1,43 +1,54 @@
-%% ---------------------------------------------------------------------------
+%% -------------------------------------------------------------------
 %%
 %% async_nif: An async thread-pool layer for Erlang's NIF API
 %%
-%% Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved.
+%% Copyright (c) 2012 Basho Technologies, Inc. All Rights Reserved.
 %% Author: Gregory Burd <greg@basho.com> <greg@burd.me>
 %%
-%% This file is provided to you under the Apache License, Version 2.0 (the
-%% "License"); you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at:
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License.  You may obtain
+%% a copy of the License at
 %%
 %%   http://www.apache.org/licenses/LICENSE-2.0
 %%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
-%% License for the specific language governing permissions and limitations
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied.  See the License for the
+%% specific language governing permissions and limitations
 %% under the License.
 %%
-%% ---------------------------------------------------------------------------
+%% -------------------------------------------------------------------

-spec async_nif_enqueue(reference(), function(), [term()]) -> term() | {error, term()}.
-async_nif_enqueue(R, F, A) ->
-    case erlang:apply(F, [R|A]) of
-        {ok, enqueued} ->
-            receive
-                {R, {error, eagain}} ->
-                    %% Work unit was not queued, try again.
-                    async_nif_enqueue(R, F, A);
-                {R, {error, shutdown}=Error} ->
-                    %% Work unit was queued, but not executed.
-                    Error;
-                {R, {error, _Reason}=Error} ->
-                    %% Work unit returned an error.
-                    Error;
-                {R, Reply} ->
-                    Reply
-            end;
-        Other ->
-            Other
-    end.
-
-define(ASYNC_NIF_CALL(Fun, Args), async_nif_enqueue(erlang:make_ref(), Fun, Args)).
+-define(ASYNC_NIF_CALL(Fun, Args),
+	F = fun(F, T) ->
+		    R = erlang:make_ref(),
+		    case erlang:apply(Fun, [R|Args]) of
+			{ok, {enqueued, PctBusy}} ->
+			    if
+				PctBusy > 0.25 andalso PctBusy =< 1.0 ->
+				    erlang:bump_reductions(erlang:trunc(2000 * PctBusy));
+			       true ->
+				    ok
+			    end,
+			    receive
+				{R, {error, shutdown}=Error} ->
+				    %% Work unit was queued, but not executed.
+				    Error;
+				{R, {error, _Reason}=Error} ->
+				    %% Work unit returned an error.
+				    Error;
+				{R, Reply} ->
+				    Reply
+			    end;
+			{error, eagain} ->
+			    case T of
+				3 -> not_found;
+				_ -> F(F, T + 1)
+			    end;
+			Other ->
+			    Other
+		    end
+	    end,
+	F(F, 1)).
--- a/src/lmdb.erl
+++ b/src/lmdb.erl
@ -32,7 +32,7 @@
 %% EXPORTS
 %%====================================================================
 -export([
-         %open/1,
+         open/1,
         open/2,
         open/3,

@ -40,8 +40,11 @@

         put/3,
         get/2,
+         txn_begin/1,
+         txn_commit/1,
+         txn_abort/1,
         del/2,
-	 update/3, upd/3,
+	     update/3, upd/3,

         drop/1
        ]).
@ -80,8 +83,8 @@
 %% @doc Create a new MDB database
 %% @end
 %%--------------------------------------------------------------------
-%open(DirName) ->
-%    open(DirName, ?MDB_MAP_SIZE).
+open(DirName) ->
+    open(DirName, ?MDB_MAP_SIZE).
 open(DirName, MapSize)
  when is_integer(MapSize)
       andalso MapSize > 0 ->
@ -116,6 +119,24 @@ get(Handle, Key)
 get(_AsyncRef, _Handle, _Key) ->
    ?NOT_LOADED.

+txn_begin(Handle) ->
+    ?ASYNC_NIF_CALL(fun txn_begin/2, [Handle]).
+
+txn_begin(_AsyncRef, _Handle) ->
+    ?NOT_LOADED.
+
+txn_commit(Handle) ->
+    ?ASYNC_NIF_CALL(fun txn_commit/2, [Handle]).
+
+txn_commit(_AsyncRef, _Handle) ->
+    ?NOT_LOADED.
+
+txn_abort(Handle) ->
+    ?ASYNC_NIF_CALL(fun txn_abort/2, [Handle]).
+
+txn_abort(_AsyncRef, _Handle) ->
+    ?NOT_LOADED.
+
 del(Handle, Key)
  when is_binary(Key) ->
    ?ASYNC_NIF_CALL(fun del/3, [Handle, Key]).
@ -177,9 +198,9 @@ open_test_db() ->
    ?cmd("rm -rf " ++ DataDir),
    ?assertMatch(ok, filelib:ensure_dir(filename:join([DataDir, "x"]))),
    {ok, Handle} = ?MODULE:open(DataDir, 2147483648),
-    [?MODULE:upd(Handle, crypto:sha(<<X>>),
+    [?MODULE:upd(Handle, crypto:hash(sha, <<X>>),
 		 crypto:rand_bytes(crypto:rand_uniform(128, 4096))) ||
-	X <- lists:seq(1, 100)],
+	X <- lists:seq(1, 10)],
    Handle.

 basics_test_() ->
--- a/tools/lmdb.config
+++ b/tools/lmdb.config
@ -24,13 +24,14 @@
 %% adding a "_" to the name and take the "_" out of the other's name).

 {mode, max}.
-{duration, 10}.
-{concurrent, 8}.
+{duration, 480}.
+{concurrent, 32}.
 {driver, basho_bench_driver_lmdb}.
-{key_generator, {int_to_bin_littleendian,{uniform_int, 5000000}}}.
-{value_generator, {fixed_bin, 1024}}.
-%{operations, [{get, 9}, {put, 9}, {delete, 2}]}.
-{operations, [{put, 1}]}.
+{key_generator, {int_to_bin_littleendian,{uniform_int, 5000000000}}}.
+{value_generator, {highly_compressible_bin, 2048}}.
+%{value_generator, {fixed_bin, 1024}}.
+{operations, [{get, 25}, {put, 70}, {delete, 5}]}.
+%{operations, [{put, 1}]}.
 {code_paths, ["../lmdb"]}.
 {lmdb_dir, "/home/gburd/ws/basho_bench/data"}.

--- a/tools/update-mdb
+++ b/tools/update-mdb
@ -0,0 +1,6 @@
+#!/bin/bash
+
+for file in lmdb.h mdb.c midl.h midl.c; do
+  curl -O https://raw.githubusercontent.com/LMDB/lmdb/mdb.master/libraries/liblmdb/$file
+done
+
Author	SHA1	Message	Date
Greg Burd	982a7b6be3	Remove deprecated crypto:sha/1 in favor of crypto:hash/2.	2016-03-06 20:19:34 -05:00
Greg Burd	e0ea7f2504	Quel compiler warnings.	2016-03-06 20:11:34 -05:00
Greg Burd	5af4f8e7a1	Update to LMDB@3f62b727ccf3424daca1cdc24bbf98c869f44699	2016-03-06 20:08:11 -05:00
Greg Burd	6a0608b01f	Return code from pwrite() shouldn't be ignored.	2016-03-06 19:57:26 -05:00
Dima Aleksandrov	8515231647	add transactions	2015-04-01 01:08:13 +02:00
Gregory Burd	0c98a25ade	Update async_nif to latest version.	2013-09-17 13:23:31 -04:00
Gregory Burd	85295c7890	Update to latest async_nif code.	2013-07-04 20:52:20 -04:00
Gregory Burd	af7dbbcc9a	Fix a few mistakes.	2013-05-19 13:33:19 -04:00
Gregory Burd	ba1287ade2	A bit cleaner rebar.config.	2013-05-19 13:30:04 -04:00
Gregory Burd	94d6ee017a	Merge pull request #2 from basho-labs/gsb-rename-as-lmdb Rename to lmdb	2013-05-19 07:51:36 -07:00