From f1b7d8322da904a3385b97456819afd63ff41afe Mon Sep 17 00:00:00 2001 From: Gregory Burd Date: Tue, 4 Jun 2013 14:45:23 -0400 Subject: [PATCH] WIP: replcae the kbtree with khash, we don't need the tree features (yet, if ever) and hash is faster; add a most-recently-used stash for contexts as it's highly likely that worker threads will do many operations with the same shape/signature of session/cursors/tables/config and that path can be lock-free as well making it much faster (one would hope); somewhere something is stepping on read-only ErlNifBinary data and so a crc check is failing and causing the runtime to abort, that's the latest item to find/fix. --- c_src/async_nif.h | 6 +- c_src/common.h | 2 +- c_src/kbtree.h | 507 ----------------------------- c_src/wterl.c | 349 ++++++++++---------- src/wterl.erl | 2 +- tools/basho_bench_driver_wterl.erl | 6 +- 6 files changed, 176 insertions(+), 696 deletions(-) delete mode 100644 c_src/kbtree.h diff --git a/c_src/async_nif.h b/c_src/async_nif.h index 9483433..44a0906 100644 --- a/c_src/async_nif.h +++ b/c_src/async_nif.h @@ -34,7 +34,7 @@ extern "C" { #define UNUSED(v) ((void)(v)) #endif -#define ASYNC_NIF_MAX_WORKERS 1024 +#define ASYNC_NIF_MAX_WORKERS 512 #define ASYNC_NIF_WORKER_QUEUE_SIZE 500 #define ASYNC_NIF_MAX_QUEUED_REQS 1000 * ASYNC_NIF_MAX_WORKERS @@ -278,8 +278,8 @@ async_nif_enqueue_req(struct async_nif_state* async_nif, struct async_nif_req_en performing the request). */ ERL_NIF_TERM reply = enif_make_tuple2(req->env, enif_make_atom(req->env, "ok"), enif_make_atom(req->env, "enqueued")); - enif_mutex_unlock(q->reqs_mutex); enif_cond_signal(q->reqs_cnd); + enif_mutex_unlock(q->reqs_mutex); return reply; } @@ -314,11 +314,11 @@ async_nif_worker_fn(void *arg) /* At this point the next req is ours to process and we hold the reqs_mutex lock. Take the request off the queue. */ req = fifo_q_get(reqs, q->reqs); - enif_mutex_unlock(q->reqs_mutex); /* Ensure that there is at least one other worker thread watching this queue. */ enif_cond_signal(q->reqs_cnd); + enif_mutex_unlock(q->reqs_mutex); /* Perform the work. */ req->fn_work(req->env, req->ref, &req->pid, worker_id, req->args); diff --git a/c_src/common.h b/c_src/common.h index 70aea07..82db007 100644 --- a/c_src/common.h +++ b/c_src/common.h @@ -24,7 +24,7 @@ extern "C" { #endif - +#define DEBUG 1 #if !(__STDC_VERSION__ >= 199901L || defined(__GNUC__)) # undef DEBUG # define DEBUG 0 diff --git a/c_src/kbtree.h b/c_src/kbtree.h deleted file mode 100644 index 09184cc..0000000 --- a/c_src/kbtree.h +++ /dev/null @@ -1,507 +0,0 @@ -/*- - * Copyright 1997-1999, 2001, John-Mark Gurney. - * 2008-2009, Attractive Chaos - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef __AC_KBTREE_H -#define __AC_KBTREE_H - -#include -#include -#include - -typedef struct { - int32_t is_internal:1, n:31; -} kbnode_t; - -#define __KB_KEY(type, x) ((type*)((char*)x + 4)) -#define __KB_PTR(btr, x) ((kbnode_t**)((char*)x + btr->off_ptr)) - -#define __KB_TREE_T(name) \ - typedef struct { \ - kbnode_t *root; \ - int off_key, off_ptr, ilen, elen; \ - int n, t; \ - int n_keys, n_nodes; \ - } kbtree_##name##_t; - -#define __KB_INIT(name, key_t) \ - kbtree_##name##_t *kb_init_##name(int size) \ - { \ - kbtree_##name##_t *b; \ - b = (kbtree_##name##_t*)calloc(1, sizeof(kbtree_##name##_t)); \ - b->t = ((size - 4 - sizeof(void*)) / (sizeof(void*) + sizeof(key_t)) + 1) >> 1; \ - if (b->t < 2) { \ - free(b); return 0; \ - } \ - b->n = 2 * b->t - 1; \ - b->off_ptr = 4 + b->n * sizeof(key_t); \ - b->ilen = (4 + sizeof(void*) + b->n * (sizeof(void*) + sizeof(key_t)) + 3) >> 2 << 2; \ - b->elen = (b->off_ptr + 3) >> 2 << 2; \ - b->root = (kbnode_t*)calloc(1, b->ilen); \ - ++b->n_nodes; \ - return b; \ - } - -#define __kb_destroy(b) do { \ - int i, max = 8; \ - kbnode_t *x, **top, **stack = 0; \ - if (b) { \ - top = stack = (kbnode_t**)calloc(max, sizeof(kbnode_t*)); \ - *top++ = (b)->root; \ - while (top != stack) { \ - x = *--top; \ - if (x->is_internal == 0) { free(x); continue; } \ - for (i = 0; i <= x->n; ++i) \ - if (__KB_PTR(b, x)[i]) { \ - if (top - stack == max) { \ - max <<= 1; \ - stack = (kbnode_t**)realloc(stack, max * sizeof(kbnode_t*)); \ - top = stack + (max>>1); \ - } \ - *top++ = __KB_PTR(b, x)[i]; \ - } \ - free(x); \ - } \ - } \ - free(b); free(stack); \ - } while (0) - -#define __kb_get_first(key_t, b, ret) do { \ - kbnode_t *__x = (b)->root; \ - while (__KB_PTR(b, __x)[0] != 0) \ - __x = __KB_PTR(b, __x)[0]; \ - (ret) = __KB_KEY(key_t, __x)[0]; \ - } while (0) - -#define __KB_GET_AUX0(name, key_t, __cmp) \ - static inline int __kb_get_aux_##name(const kbnode_t * __restrict x, const key_t * __restrict k, int *r) \ - { \ - int tr, *rr, begin, end, n = x->n >> 1; \ - if (x->n == 0) return -1; \ - if (__cmp(*k, __KB_KEY(key_t, x)[n]) < 0) { \ - begin = 0; end = n; \ - } else { begin = n; end = x->n - 1; } \ - rr = r? r : &tr; \ - n = end; \ - while (n >= begin && (*rr = __cmp(*k, __KB_KEY(key_t, x)[n])) < 0) --n; \ - return n; \ - } - -#define __KB_GET_AUX1(name, key_t, __cmp) \ - static inline int __kb_getp_aux_##name(const kbnode_t * __restrict x, const key_t * __restrict k, int *r) \ - { \ - int tr, *rr, begin = 0, end = x->n; \ - if (x->n == 0) return -1; \ - rr = r? r : &tr; \ - while (begin < end) { \ - int mid = (begin + end) >> 1; \ - if (__cmp(__KB_KEY(key_t, x)[mid], *k) < 0) begin = mid + 1; \ - else end = mid; \ - } \ - if (begin == x->n) { *rr = 1; return x->n - 1; } \ - if ((*rr = __cmp(*k, __KB_KEY(key_t, x)[begin])) < 0) --begin; \ - return begin; \ - } - -#define __KB_GET(name, key_t) \ - static key_t *kb_getp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \ - { \ - int i, r = 0; \ - kbnode_t *x = b->root; \ - while (x) { \ - i = __kb_getp_aux_##name(x, k, &r); \ - if (i >= 0 && r == 0) return &__KB_KEY(key_t, x)[i]; \ - if (x->is_internal == 0) return 0; \ - x = __KB_PTR(b, x)[i + 1]; \ - } \ - return 0; \ - } \ - static inline key_t *kb_get_##name(kbtree_##name##_t *b, const key_t k) \ - { \ - return kb_getp_##name(b, &k); \ - } - -#define __KB_INTERVAL(name, key_t) \ - static void kb_intervalp_##name(kbtree_##name##_t *b, const key_t * __restrict k, key_t **lower, key_t **upper) \ - { \ - int i, r = 0; \ - kbnode_t *x = b->root; \ - *lower = *upper = 0; \ - while (x) { \ - i = __kb_getp_aux_##name(x, k, &r); \ - if (i >= 0 && r == 0) { \ - *lower = *upper = &__KB_KEY(key_t, x)[i]; \ - return; \ - } \ - if (i >= 0) *lower = &__KB_KEY(key_t, x)[i]; \ - if (i < x->n - 1) *upper = &__KB_KEY(key_t, x)[i + 1]; \ - if (x->is_internal == 0) return; \ - x = __KB_PTR(b, x)[i + 1]; \ - } \ - } \ - static inline void kb_interval_##name(kbtree_##name##_t *b, const key_t k, key_t **lower, key_t **upper) \ - { \ - kb_intervalp_##name(b, &k, lower, upper); \ - } - -#define __KB_PUT(name, key_t, __cmp) \ - /* x must be an internal node */ \ - static void __kb_split_##name(kbtree_##name##_t *b, kbnode_t *x, int i, kbnode_t *y) \ - { \ - kbnode_t *z; \ - z = (kbnode_t*)calloc(1, y->is_internal? b->ilen : b->elen); \ - ++b->n_nodes; \ - z->is_internal = y->is_internal; \ - z->n = b->t - 1; \ - memcpy(__KB_KEY(key_t, z), __KB_KEY(key_t, y) + b->t, sizeof(key_t) * (b->t - 1)); \ - if (y->is_internal) memcpy(__KB_PTR(b, z), __KB_PTR(b, y) + b->t, sizeof(void*) * b->t); \ - y->n = b->t - 1; \ - memmove(__KB_PTR(b, x) + i + 2, __KB_PTR(b, x) + i + 1, sizeof(void*) * (x->n - i)); \ - __KB_PTR(b, x)[i + 1] = z; \ - memmove(__KB_KEY(key_t, x) + i + 1, __KB_KEY(key_t, x) + i, sizeof(key_t) * (x->n - i)); \ - __KB_KEY(key_t, x)[i] = __KB_KEY(key_t, y)[b->t - 1]; \ - ++x->n; \ - } \ - static void __kb_putp_aux_##name(kbtree_##name##_t *b, kbnode_t *x, const key_t * __restrict k) \ - { \ - int i = x->n - 1; \ - if (x->is_internal == 0) { \ - i = __kb_getp_aux_##name(x, k, 0); \ - if (i != x->n - 1) \ - memmove(__KB_KEY(key_t, x) + i + 2, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \ - __KB_KEY(key_t, x)[i + 1] = *k; \ - ++x->n; \ - } else { \ - i = __kb_getp_aux_##name(x, k, 0) + 1; \ - if (__KB_PTR(b, x)[i]->n == 2 * b->t - 1) { \ - __kb_split_##name(b, x, i, __KB_PTR(b, x)[i]); \ - if (__cmp(*k, __KB_KEY(key_t, x)[i]) > 0) ++i; \ - } \ - __kb_putp_aux_##name(b, __KB_PTR(b, x)[i], k); \ - } \ - } \ - static void kb_putp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \ - { \ - kbnode_t *r, *s; \ - ++b->n_keys; \ - r = b->root; \ - if (r->n == 2 * b->t - 1) { \ - ++b->n_nodes; \ - s = (kbnode_t*)calloc(1, b->ilen); \ - b->root = s; s->is_internal = 1; s->n = 0; \ - __KB_PTR(b, s)[0] = r; \ - __kb_split_##name(b, s, 0, r); \ - r = s; \ - } \ - __kb_putp_aux_##name(b, r, k); \ - } \ - static inline void kb_put_##name(kbtree_##name##_t *b, const key_t k) \ - { \ - kb_putp_##name(b, &k); \ - } - - -#define __KB_DEL(name, key_t) \ - static key_t __kb_delp_aux_##name(kbtree_##name##_t *b, kbnode_t *x, const key_t * __restrict k, int s) \ - { \ - int yn, zn, i, r = 0; \ - kbnode_t *xp, *y, *z; \ - key_t kp; \ - if (x == 0) return *k; \ - if (s) { /* s can only be 0, 1 or 2 */ \ - r = x->is_internal == 0? 0 : s == 1? 1 : -1; \ - i = s == 1? x->n - 1 : -1; \ - } else i = __kb_getp_aux_##name(x, k, &r); \ - if (x->is_internal == 0) { \ - if (s == 2) ++i; \ - kp = __KB_KEY(key_t, x)[i]; \ - memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \ - --x->n; \ - return kp; \ - } \ - if (r == 0) { \ - if ((yn = __KB_PTR(b, x)[i]->n) >= b->t) { \ - xp = __KB_PTR(b, x)[i]; \ - kp = __KB_KEY(key_t, x)[i]; \ - __KB_KEY(key_t, x)[i] = __kb_delp_aux_##name(b, xp, 0, 1); \ - return kp; \ - } else if ((zn = __KB_PTR(b, x)[i + 1]->n) >= b->t) { \ - xp = __KB_PTR(b, x)[i + 1]; \ - kp = __KB_KEY(key_t, x)[i]; \ - __KB_KEY(key_t, x)[i] = __kb_delp_aux_##name(b, xp, 0, 2); \ - return kp; \ - } else if (yn == b->t - 1 && zn == b->t - 1) { \ - y = __KB_PTR(b, x)[i]; z = __KB_PTR(b, x)[i + 1]; \ - __KB_KEY(key_t, y)[y->n++] = *k; \ - memmove(__KB_KEY(key_t, y) + y->n, __KB_KEY(key_t, z), z->n * sizeof(key_t)); \ - if (y->is_internal) memmove(__KB_PTR(b, y) + y->n, __KB_PTR(b, z), (z->n + 1) * sizeof(void*)); \ - y->n += z->n; \ - memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \ - memmove(__KB_PTR(b, x) + i + 1, __KB_PTR(b, x) + i + 2, (x->n - i - 1) * sizeof(void*)); \ - --x->n; \ - free(z); \ - return __kb_delp_aux_##name(b, y, k, s); \ - } \ - } \ - ++i; \ - if ((xp = __KB_PTR(b, x)[i])->n == b->t - 1) { \ - if (i > 0 && (y = __KB_PTR(b, x)[i - 1])->n >= b->t) { \ - memmove(__KB_KEY(key_t, xp) + 1, __KB_KEY(key_t, xp), xp->n * sizeof(key_t)); \ - if (xp->is_internal) memmove(__KB_PTR(b, xp) + 1, __KB_PTR(b, xp), (xp->n + 1) * sizeof(void*)); \ - __KB_KEY(key_t, xp)[0] = __KB_KEY(key_t, x)[i - 1]; \ - __KB_KEY(key_t, x)[i - 1] = __KB_KEY(key_t, y)[y->n - 1]; \ - if (xp->is_internal) __KB_PTR(b, xp)[0] = __KB_PTR(b, y)[y->n]; \ - --y->n; ++xp->n; \ - } else if (i < x->n && (y = __KB_PTR(b, x)[i + 1])->n >= b->t) { \ - __KB_KEY(key_t, xp)[xp->n++] = __KB_KEY(key_t, x)[i]; \ - __KB_KEY(key_t, x)[i] = __KB_KEY(key_t, y)[0]; \ - if (xp->is_internal) __KB_PTR(b, xp)[xp->n] = __KB_PTR(b, y)[0]; \ - --y->n; \ - memmove(__KB_KEY(key_t, y), __KB_KEY(key_t, y) + 1, y->n * sizeof(key_t)); \ - if (y->is_internal) memmove(__KB_PTR(b, y), __KB_PTR(b, y) + 1, (y->n + 1) * sizeof(void*)); \ - } else if (i > 0 && (y = __KB_PTR(b, x)[i - 1])->n == b->t - 1) { \ - __KB_KEY(key_t, y)[y->n++] = __KB_KEY(key_t, x)[i - 1]; \ - memmove(__KB_KEY(key_t, y) + y->n, __KB_KEY(key_t, xp), xp->n * sizeof(key_t)); \ - if (y->is_internal) memmove(__KB_PTR(b, y) + y->n, __KB_PTR(b, xp), (xp->n + 1) * sizeof(void*)); \ - y->n += xp->n; \ - memmove(__KB_KEY(key_t, x) + i - 1, __KB_KEY(key_t, x) + i, (x->n - i) * sizeof(key_t)); \ - memmove(__KB_PTR(b, x) + i, __KB_PTR(b, x) + i + 1, (x->n - i) * sizeof(void*)); \ - --x->n; \ - free(xp); \ - xp = y; \ - } else if (i < x->n && (y = __KB_PTR(b, x)[i + 1])->n == b->t - 1) { \ - __KB_KEY(key_t, xp)[xp->n++] = __KB_KEY(key_t, x)[i]; \ - memmove(__KB_KEY(key_t, xp) + xp->n, __KB_KEY(key_t, y), y->n * sizeof(key_t)); \ - if (xp->is_internal) memmove(__KB_PTR(b, xp) + xp->n, __KB_PTR(b, y), (y->n + 1) * sizeof(void*)); \ - xp->n += y->n; \ - memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \ - memmove(__KB_PTR(b, x) + i + 1, __KB_PTR(b, x) + i + 2, (x->n - i - 1) * sizeof(void*)); \ - --x->n; \ - free(y); \ - } \ - } \ - return __kb_delp_aux_##name(b, xp, k, s); \ - } \ - static key_t kb_delp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \ - { \ - kbnode_t *x; \ - key_t ret; \ - ret = __kb_delp_aux_##name(b, b->root, k, 0); \ - --b->n_keys; \ - if (b->root->n == 0 && b->root->is_internal) { \ - --b->n_nodes; \ - x = b->root; \ - b->root = __KB_PTR(b, x)[0]; \ - free(x); \ - } \ - return ret; \ - } \ - static inline key_t kb_del_##name(kbtree_##name##_t *b, const key_t k) \ - { \ - return kb_delp_##name(b, &k); \ - } - -typedef struct { - kbnode_t *x; - int i; -} __kbstack_t; - -#define __kb_traverse(key_t, b, __func) do { \ - int __kmax = 8; \ - __kbstack_t *__kstack, *__kp; \ - __kp = __kstack = (__kbstack_t*)calloc(__kmax, sizeof(__kbstack_t)); \ - __kp->x = (b)->root; __kp->i = 0; \ - for (;;) { \ - while (__kp->x && __kp->i <= __kp->x->n) { \ - if (__kp - __kstack == __kmax - 1) { \ - __kmax <<= 1; \ - __kstack = (__kbstack_t*)realloc(__kstack, __kmax * sizeof(__kbstack_t)); \ - __kp = __kstack + (__kmax>>1) - 1; \ - } \ - (__kp+1)->i = 0; (__kp+1)->x = __kp->x->is_internal? __KB_PTR(b, __kp->x)[__kp->i] : 0; \ - ++__kp; \ - } \ - --__kp; \ - if (__kp >= __kstack) { \ - if (__kp->x && __kp->i < __kp->x->n) __func(&__KB_KEY(key_t, __kp->x)[__kp->i]); \ - ++__kp->i; \ - } else break; \ - } \ - free(__kstack); \ - } while (0) - -#define KBTREE_INIT(name, key_t, __cmp) \ - __KB_TREE_T(name) \ - __KB_INIT(name, key_t) \ - __KB_GET_AUX1(name, key_t, __cmp) \ - __KB_GET(name, key_t) \ - __KB_INTERVAL(name, key_t) \ - __KB_PUT(name, key_t, __cmp) \ - __KB_DEL(name, key_t) - -#define KB_DEFAULT_SIZE 512 - -#define kbtree_t(name) kbtree_##name##_t -#define kb_init(name, s) kb_init_##name(s) -#define kb_destroy(name, b) __kb_destroy(b) -#define kb_get(name, b, k) kb_get_##name(b, k) -#define kb_put(name, b, k) kb_put_##name(b, k) -#define kb_del(name, b, k) kb_del_##name(b, k) -#define kb_interval(name, b, k, l, u) kb_interval_##name(b, k, l, u) -#define kb_getp(name, b, k) kb_getp_##name(b, k) -#define kb_putp(name, b, k) kb_putp_##name(b, k) -#define kb_delp(name, b, k) kb_delp_##name(b, k) -#define kb_intervalp(name, b, k, l, u) kb_intervalp_##name(b, k, l, u) - -#define kb_size(b) ((b)->n_keys) - -#define kb_generic_cmp(a, b) (((b) < (a)) - ((a) < (b))) -#define kb_str_cmp(a, b) strcmp(a, b) - -#endif - -#ifdef TEST -#include -#include -#include -#include -#include -#include - -typedef const char *str_t; - -#include "kbtree.h" - -typedef struct { - unsigned key; - char *value; -} intmap_t; - -#define __intcmp(a, b) (((a).key > (b).key) - ((a).key < (b).key)) - -KBTREE_INIT(int, uint32_t, kb_generic_cmp) -KBTREE_INIT(str, str_t, kb_str_cmp) -KBTREE_INIT(intmap, intmap_t, __intcmp); - -static int data_size = 5000000; -static unsigned *int_data; -static char **str_data; -static intmap_t *intmap_data; - -void kb_init_data() -{ - int i; - char buf[256]; - printf("--- generating data... "); - srand48(11); - int_data = (unsigned*)calloc(data_size, sizeof(unsigned)); - str_data = (char**)calloc(data_size, sizeof(char*)); - intmap_data = (intmap_t*)calloc(data_size, sizeof(intmap_t)); - for (i = 0; i < data_size; ++i) { - int_data[i] = (unsigned)(data_size * drand48() / 4) * 271828183u; - sprintf(buf, "%x", int_data[i]); - str_data[i] = strdup(buf); - intmap_data[i].key = i; - intmap_data[i].value = str_data[i]; - } - printf("done!\n"); -} -void kb_destroy_data() -{ - int i; - for (i = 0; i < data_size; ++i) free(str_data[i]); - free(str_data); free(int_data); -} -void kb_tree_intmap() -{ - int i; - intmap_t *data = intmap_data; - kbtree_t(intmap) *h; - h = kb_init(intmap, KB_DEFAULT_SIZE); - for (i = 0; i < data_size; ++i) { - if (kb_get(intmap, h, data[i]) == 0) kb_put(intmap, h, data[i]); - else kb_del(intmap, h, data[i]); - } - printf("[kb_tree_intmap] size: %d\n", kb_size(h)); - __kb_destroy(h); -} -void kb_tree_int() -{ - int i; - unsigned *data = int_data; - uint32_t *l, *u; - kbtree_t(int) *h; - - h = kb_init(int, KB_DEFAULT_SIZE); - for (i = 0; i < data_size; ++i) { - if (kb_get(int, h, data[i]) == 0) kb_put(int, h, data[i]); - else kb_del(int, h, data[i]); - } - printf("[kb_tree_int] size: %d\n", kb_size(h)); - if (1) { - int cnt = 0; - uint32_t x, y; - kb_interval(int, h, 2174625464u, &l, &u); - printf("interval for 2174625464: (%u, %u)\n", l? *l : 0, u? *u : 0); -#define traverse_f(p) { if (cnt == 0) y = *p; ++cnt; } - __kb_traverse(uint32_t, h, traverse_f); - __kb_get_first(uint32_t, h, x); - printf("# of elements from traversal: %d\n", cnt); - printf("first element: %d == %d\n", x, y); - } - __kb_destroy(h); -} -void kb_tree_str() -{ - int i; - char **data = str_data; - kbtree_t(str) *h; - - h = kb_init(str, KB_DEFAULT_SIZE); - for (i = 0; i < data_size; ++i) { - if (kb_get(str, h, data[i]) == 0) kb_put(str, h, data[i]); - else kb_del(str, h, data[i]); - } - printf("[kb_tree_int] size: %d\n", kb_size(h)); - __kb_destroy(h); -} -void kb_timing(void (*f)(void)) -{ - clock_t t = clock(); - (*f)(); - printf("[kb_timing] %.3lf sec\n", (double)(clock() - t) / CLOCKS_PER_SEC); -} -int main(int argc, char *argv[]) -{ - if (argc > 1) data_size = atoi(argv[1]); - kb_init_data(); - kb_timing(kb_tree_int); - kb_timing(kb_tree_str); - kb_timing(kb_tree_intmap); - kb_destroy_data(); - return 0; -} -#endif diff --git a/c_src/wterl.c b/c_src/wterl.c index 81fcd7e..0774bdf 100644 --- a/c_src/wterl.c +++ b/c_src/wterl.c @@ -29,13 +29,14 @@ #include "wiredtiger.h" #include "stats.h" #include "async_nif.h" -#include "kbtree.h" +#include "khash.h" #include "queue.h" +#define MAX_CACHE_SIZE ASYNC_NIF_MAX_WORKERS + static ErlNifResourceType *wterl_conn_RESOURCE; static ErlNifResourceType *wterl_cursor_RESOURCE; -/* WiredTiger object names*/ typedef char Uri[128]; struct wterl_ctx { @@ -51,14 +52,15 @@ struct cache_entry { SLIST_HEAD(ctxs, wterl_ctx) contexts; }; -#define __ctx_sig_cmp(a, b) (((a).sig > (b).sig) - ((a).sig < (b).sig)) -KBTREE_INIT(cache_entries, struct cache_entry, __ctx_sig_cmp); +KHASH_MAP_INIT_INT64(cache_entries, struct cache_entry*); typedef struct wterl_conn { WT_CONNECTION *conn; const char *session_config; ErlNifMutex *cache_mutex; - kbtree_t(cache_entries) *cache; + khash_t(cache_entries) *cache; + uint32_t num_ctx_in_cache; + struct wterl_ctx *last_ctx_used[ASYNC_NIF_MAX_WORKERS]; SLIST_ENTRY(wterl_conn) conns; uint64_t histogram[64]; uint64_t histogram_count; @@ -149,17 +151,6 @@ static inline uint32_t __log2(uint64_t x) { } #endif -/** - * Is the context cache full? - * - * -> 0 = no/false, anything else is true - */ -static int -__ctx_cache_full(WterlConnHandle *conn_handle) -{ - return kb_size(conn_handle->cache) == ASYNC_NIF_MAX_WORKERS; // TODO: -} - /** * Evict items from the cache. * @@ -171,12 +162,17 @@ __ctx_cache_full(WterlConnHandle *conn_handle) static int __ctx_cache_evict(WterlConnHandle *conn_handle) { - uint32_t num_evicted, i; - uint64_t mean, now; - struct wterl_ctx *to_free[ASYNC_NIF_MAX_WORKERS]; + uint32_t mean, log, num_evicted, i; + uint64_t now, elapsed; + khash_t(cache_entries) *h = conn_handle->cache; + khiter_t itr; + struct cache_entry *e; + struct wterl_ctx *c, *n; + + if (conn_handle->num_ctx_in_cache != MAX_CACHE_SIZE) + return 0; now = cpu_clock_ticks(); - kbtree_t(cache_entries) *t = conn_handle->cache; // Find the mean of the recorded times that items stayed in cache. mean = 0; @@ -191,51 +187,32 @@ __ctx_cache_evict(WterlConnHandle *conn_handle) /* * Evict anything older than the mean time in queue by removing those - * items from the lists at the leaf nodes of the tree. + * items from the lists stored in the tree. */ - num_evicted = 0; -#define traverse_f(p) \ - { \ - struct cache_entry *e; \ - struct wterl_ctx *c; \ - e = (struct cache_entry *)p; \ - SLIST_FOREACH(c, &e->contexts, entries) { \ - uint64_t elapsed = c->tstamp - now; \ - uint32_t log = __log2(elapsed); \ - if (log > mean) { \ - SLIST_REMOVE(&e->contexts, c, wterl_ctx, entries); \ - c->session->close(c->session, NULL); \ - to_free[num_evicted] = c; \ - num_evicted++; \ - } \ - } \ + for (itr = kh_begin(h); itr != kh_end(h); ++itr) { + if (kh_exist(h, itr)) { + e = kh_val(h, itr); + c = SLIST_FIRST(&e->contexts); + while (c != NULL) { + n = SLIST_NEXT(c, entries); + elapsed = c->tstamp - now; + log = __log2(elapsed); + if (log > mean) { + SLIST_REMOVE(&e->contexts, c, wterl_ctx, entries); + c->session->close(c->session, NULL); + enif_free(c); + num_evicted++; + } + c = n; + } + if (SLIST_EMPTY(&e->contexts)) { + kh_del(cache_entries, h, itr); + enif_free(e); + kh_value(h, itr) = NULL; + } + } } - __kb_traverse(struct cache_entry, t, traverse_f); -#undef traverse_f - - /* - * Free up the wterl_ctx we've removed after finishing the loop. - */ - for (i = 0; i < num_evicted; i++) { - enif_free(to_free[i]); - } - - /* - * Walk the tree again looking for empty lists to prune from the - * tree. - */ -#define traverse_f(p) \ - { \ - struct cache_entry *e, query; \ - e = p; \ - query.sig = e->sig; \ - if (SLIST_EMPTY(&e->contexts)) { \ - kb_del(cache_entries, t, query); \ - } \ - } - __kb_traverse(struct cache_entry, t, traverse_f); -#undef traverse_f - + conn_handle->num_ctx_in_cache -= num_evicted; return num_evicted; } @@ -252,21 +229,30 @@ static struct wterl_ctx * __ctx_cache_find(WterlConnHandle *conn_handle, const uint64_t sig) { struct wterl_ctx *c = NULL; - struct cache_entry query, *result; + struct cache_entry *e; + khash_t(cache_entries) *h; + khiter_t itr; - query.sig = sig; - result = kb_get(cache_entries, conn_handle->cache, query); - if (result && !SLIST_EMPTY(&result->contexts)) { - /* - * cache hit: - * remove a context from the list in the tree node - */ - c = SLIST_FIRST(&result->contexts); - SLIST_REMOVE_HEAD(&result->contexts, entries); - uint64_t elapsed = cpu_clock_ticks() - c->tstamp; - conn_handle->histogram[__log2(elapsed)]++; - conn_handle->histogram_count++; - } // else { cache miss + h = conn_handle->cache; + enif_mutex_lock(conn_handle->cache_mutex); + if (conn_handle->num_ctx_in_cache > 0) { + itr = kh_get(cache_entries, h, sig); + if (itr != kh_end(h)) { + e = kh_value(h, itr); + if (!SLIST_EMPTY(&e->contexts)) { + /* + * cache hit: + * remove a context from the list in the tree node + */ + c = SLIST_FIRST(&e->contexts); + SLIST_REMOVE_HEAD(&e->contexts, entries); + conn_handle->histogram[__log2(cpu_clock_ticks() - c->tstamp)]++; + conn_handle->histogram_count++; + conn_handle->num_ctx_in_cache -= 1; + } + } + } + enif_mutex_unlock(conn_handle->cache_mutex); return c; } @@ -277,28 +263,28 @@ __ctx_cache_find(WterlConnHandle *conn_handle, const uint64_t sig) * the front of the LRU. */ static void -__ctx_cache_add(WterlConnHandle *conn, struct wterl_ctx *c) +__ctx_cache_add(WterlConnHandle *conn_handle, struct wterl_ctx *c) { - struct cache_entry query, *result; - - /* - * Check to see if the cache is full and if so trigger eviction which will - * remove the least-recently-used half of the items from the cache. - */ - if (__ctx_cache_full(conn)) - __ctx_cache_evict(conn); + struct cache_entry *e; + khash_t(cache_entries) *h; + khiter_t itr; + int itr_status; + enif_mutex_lock(conn_handle->cache_mutex); + __ctx_cache_evict(conn_handle); c->tstamp = cpu_clock_ticks(); - - query.sig = c->sig; - result = kb_get(cache_entries, conn->cache, query); - if (result == NULL) { - SLIST_INIT(&query.contexts); // TODO: should this be on the heap? - SLIST_INSERT_HEAD(&query.contexts, c, entries); - kb_put(cache_entries, conn->cache, query); - } else { - SLIST_INSERT_HEAD(&result->contexts, c, entries); + h = conn_handle->cache; + itr = kh_get(cache_entries, h, c->sig); + if (itr == kh_end(h)) { + e = enif_alloc(sizeof(struct cache_entry)); // TODO: enomem + SLIST_INIT(&e->contexts); + itr = kh_put(cache_entries, h, c->sig, &itr_status); + kh_value(h, itr) = e; } + e = kh_value(h, itr); + SLIST_INSERT_HEAD(&e->contexts, c, entries); + conn_handle->num_ctx_in_cache += 1; + enif_mutex_unlock(conn_handle->cache_mutex); } /** @@ -352,7 +338,7 @@ __zi(uint32_t p, uint32_t q) /** * Create a signature for the operation we're about to perform. * - * Create a 32bit signature for this a combination of session configuration + * Create a 64bit signature for this a combination of session configuration * some number of cursors open on tables each potentially with a different * configuration. "session_config, [{table_name, cursor_config}, ...]" * @@ -386,7 +372,8 @@ __ctx_cache_sig(const char *c, va_list ap, int count) * session. */ static int -__retain_ctx(WterlConnHandle *conn_handle, struct wterl_ctx **ctx, +__retain_ctx(WterlConnHandle *conn_handle, uint32_t worker_id, + struct wterl_ctx **ctx, int count, const char *session_config, ...) { int i = 0; @@ -399,39 +386,50 @@ __retain_ctx(WterlConnHandle *conn_handle, struct wterl_ctx **ctx, sig = __ctx_cache_sig(session_config, ap, count); va_end(ap); - enif_mutex_lock(conn_handle->cache_mutex); - (*ctx) = __ctx_cache_find(conn_handle, sig); - if ((*ctx) == NULL) { - // cache miss - WT_CONNECTION *conn = conn_handle->conn; - WT_SESSION *session = NULL; - int rc = conn->open_session(conn, NULL, session_config, &session); - if (rc != 0) - return rc; - size_t s = sizeof(struct wterl_ctx) + (count * sizeof(WT_CURSOR*)); - *ctx = enif_alloc(s); // TODO: enif_alloc_resource() - if (*ctx == NULL) { - session->close(session, NULL); - return ENOMEM; - } - memset(*ctx, 0, s); - (*ctx)->sig = sig; - (*ctx)->session = session; - session_config = arg; - va_start(ap, session_config); - for (i = 0; i < count; i++) { - const char *uri = va_arg(ap, const char *); - const char *config = va_arg(ap, const char *); - // TODO: error when uri or config is NULL - rc = session->open_cursor(session, uri, NULL, config, &(*ctx)->cursors[i]); - if (rc != 0) { - session->close(session, NULL); // this will free the cursors too + DPRINTF("worker: %u cache size: %u", worker_id, conn_handle->num_ctx_in_cache); + if (conn_handle->last_ctx_used[worker_id] != NULL && + conn_handle->last_ctx_used[worker_id]->sig == sig) { + (*ctx) = conn_handle->last_ctx_used[worker_id]; + DPRINTF("worker: %u reuse hit: %lu %p", worker_id, sig, *ctx); + } else { + if (conn_handle->last_ctx_used[worker_id] != NULL) + __ctx_cache_add(conn_handle, conn_handle->last_ctx_used[worker_id]); + conn_handle->last_ctx_used[worker_id] = NULL; + (*ctx) = __ctx_cache_find(conn_handle, sig); + if ((*ctx) == NULL) { + // cache miss + DPRINTF("worker: %u cache miss: %lu", worker_id, sig); + WT_CONNECTION *conn = conn_handle->conn; + WT_SESSION *session = NULL; + int rc = conn->open_session(conn, NULL, session_config, &session); + if (rc != 0) return rc; + size_t s = sizeof(struct wterl_ctx) + (count * sizeof(WT_CURSOR*)); + *ctx = enif_alloc(s); // TODO: enif_alloc_resource() + if (*ctx == NULL) { + session->close(session, NULL); + return ENOMEM; } + memset(*ctx, 0, s); + (*ctx)->sig = sig; + (*ctx)->session = session; + session_config = arg; + va_start(ap, session_config); + for (i = 0; i < count; i++) { + const char *uri = va_arg(ap, const char *); + const char *config = va_arg(ap, const char *); + // TODO: error when uri or config is NULL + rc = session->open_cursor(session, uri, NULL, config, &(*ctx)->cursors[i]); + if (rc != 0) { + session->close(session, NULL); // this will free the cursors too + return rc; + } + } + va_end(ap); + } else { // else { cache hit } + DPRINTF("worker: %u cache hit: %lu %p", worker_id, sig, *ctx); } - va_end (ap); - } // else { cache hit } - enif_mutex_unlock(conn_handle->cache_mutex); + } return 0; } @@ -439,19 +437,20 @@ __retain_ctx(WterlConnHandle *conn_handle, struct wterl_ctx **ctx, * Return a context to the cache for reuse. */ static void -__release_ctx(WterlConnHandle *conn_handle, struct wterl_ctx *ctx) +__release_ctx(WterlConnHandle *conn_handle, uint32_t worker_id, struct wterl_ctx *ctx) { - int i, c; + int i, n; WT_CURSOR *cursor; - c = sizeof((WT_CURSOR**)ctx->cursors) / sizeof(ctx->cursors[0]); - for (i = 0; i < c; i++) { + DPRINTF("worker: %u cache size: %u", worker_id, conn_handle->num_ctx_in_cache); + n = sizeof((WT_CURSOR**)ctx->cursors) / sizeof(ctx->cursors[0]); + for (i = 0; i < n; i++) { cursor = ctx->cursors[i]; cursor->reset(cursor); } - enif_mutex_lock(conn_handle->cache_mutex); - __ctx_cache_add(conn_handle, ctx); - enif_mutex_unlock(conn_handle->cache_mutex); + assert(conn_handle->last_ctx_used[worker_id] == 0 || + conn_handle->last_ctx_used[worker_id] == ctx); + conn_handle->last_ctx_used[worker_id] = ctx; } /** @@ -462,46 +461,32 @@ __release_ctx(WterlConnHandle *conn_handle, struct wterl_ctx *ctx) void __close_all_sessions(WterlConnHandle *conn_handle) { - int i, num_closed = 0; - struct wterl_ctx *to_free[ASYNC_NIF_MAX_WORKERS]; - kbtree_t(cache_entries) *t = conn_handle->cache; + khash_t(cache_entries) *h = conn_handle->cache; + struct cache_entry *e; + struct wterl_ctx *c; + int i; -#define traverse_f(p) \ - { \ - struct cache_entry *e; \ - struct wterl_ctx *c; \ - e = (struct cache_entry *)p; \ - SLIST_FOREACH(c, &e->contexts, entries) { \ - SLIST_REMOVE(&e->contexts, c, wterl_ctx, entries); \ - c->session->close(c->session, NULL); \ - to_free[num_closed] = c; \ - num_closed++; \ - } \ + for (i = 0; i < ASYNC_NIF_MAX_WORKERS; i++) { + c = conn_handle->last_ctx_used[i]; + c->session->close(c->session, NULL); + enif_free(c); + conn_handle->last_ctx_used[i] = NULL; } - __kb_traverse(struct cache_entry *, t, traverse_f); -#undef traverse_f - - /* - * Free up the wterl_ctx we've removed after finishing the loop. - */ - for (i = 0; i < num_closed; i++) { - enif_free(to_free[i]); + khiter_t itr; + for (itr = kh_begin(h); itr != kh_end(h); ++itr) { + if (kh_exist(h, itr)) { + e = kh_val(h, itr); + while ((c = SLIST_FIRST(&e->contexts)) != NULL) { + SLIST_REMOVE(&e->contexts, c, wterl_ctx, entries); + c->session->close(c->session, NULL); + enif_free(c); + } + kh_del(cache_entries, h, itr); + enif_free(e); + kh_value(h, itr) = NULL; + } } - - /* - * Walk the tree again to prune all the empty lists from the tree. - */ -#define traverse_f(p) \ - { \ - struct cache_entry *e, query; \ - e = (struct cache_entry *)p; \ - query.sig = e->sig; \ - if (SLIST_EMPTY(&e->contexts)) { \ - kb_del(cache_entries, t, query); \ - } \ - } - __kb_traverse(struct cache_entry, t, traverse_f); -#undef traverse_f + conn_handle->num_ctx_in_cache = 0; } /** @@ -728,8 +713,9 @@ ASYNC_NIF_DECL( conn_handle->conn = conn; ERL_NIF_TERM result = enif_make_resource(env, conn_handle); - /* Init tree which manages the cache of session/cursor(s) */ - conn_handle->cache = kb_init(cache_entries, ASYNC_NIF_MAX_WORKERS); // TODO: size + /* Init hash table which manages the cache of session/cursor(s) */ + conn_handle->cache = kh_init(cache_entries); + conn_handle->num_ctx_in_cache = 0; /* Keep track of open connections so as to free when unload/reload/etc. are called. */ @@ -1425,7 +1411,7 @@ ASYNC_NIF_DECL( struct wterl_ctx *ctx = NULL; WT_CURSOR *cursor = NULL; - int rc = __retain_ctx(args->conn_handle, &ctx, 1, + int rc = __retain_ctx(args->conn_handle, worker_id, &ctx, 1, args->conn_handle->session_config, args->uri, "overwrite,raw"); if (rc != 0) { @@ -1440,7 +1426,7 @@ ASYNC_NIF_DECL( cursor->set_key(cursor, &item_key); rc = cursor->remove(cursor); ASYNC_NIF_REPLY(rc == 0 ? ATOM_OK : __strerror_term(env, rc)); - __release_ctx(args->conn_handle, ctx); + __release_ctx(args->conn_handle, worker_id, ctx); }, { // post @@ -1484,7 +1470,7 @@ ASYNC_NIF_DECL( struct wterl_ctx *ctx = NULL; WT_CURSOR *cursor = NULL; - int rc = __retain_ctx(args->conn_handle, &ctx, 1, + int rc = __retain_ctx(args->conn_handle, worker_id, &ctx, 1, args->conn_handle->session_config, args->uri, "overwrite,raw"); if (rc != 0) { @@ -1513,7 +1499,7 @@ ASYNC_NIF_DECL( unsigned char *bin = enif_make_new_binary(env, item_value.size, &value); memcpy(bin, item_value.data, item_value.size); ASYNC_NIF_REPLY(enif_make_tuple2(env, ATOM_OK, value)); - __release_ctx(args->conn_handle, ctx); + __release_ctx(args->conn_handle, worker_id, ctx); }, { // post @@ -1566,7 +1552,7 @@ ASYNC_NIF_DECL( struct wterl_ctx *ctx = NULL; WT_CURSOR *cursor = NULL; - int rc = __retain_ctx(args->conn_handle, &ctx, 1, + int rc = __retain_ctx(args->conn_handle, worker_id, &ctx, 1, args->conn_handle->session_config, args->uri, "overwrite,raw"); if (rc != 0) { @@ -1584,7 +1570,7 @@ ASYNC_NIF_DECL( item_value.size = value.size; cursor->set_value(cursor, &item_value); rc = cursor->insert(cursor); - __release_ctx(args->conn_handle, ctx); + __release_ctx(args->conn_handle, worker_id, ctx); ASYNC_NIF_REPLY(rc == 0 ? ATOM_OK : __strerror_term(env, rc)); }, { // post @@ -2388,8 +2374,9 @@ on_unload(ErlNifEnv *env, void *priv_data) SLIST_FOREACH(conn_handle, &priv->conns, conns) { __close_all_sessions(conn_handle); conn_handle->conn->close(conn_handle->conn, NULL); - kb_destroy(cache_entries, conn_handle->cache); - enif_free((void*)conn_handle->session_config); + kh_destroy(cache_entries, conn_handle->cache); + if (conn_handle->session_config) + enif_free((void*)conn_handle->session_config); enif_mutex_unlock(conn_handle->cache_mutex); enif_mutex_destroy(conn_handle->cache_mutex); } diff --git a/src/wterl.erl b/src/wterl.erl index 45afae5..b940433 100644 --- a/src/wterl.erl +++ b/src/wterl.erl @@ -664,7 +664,7 @@ various_maintenance_test_() -> fun () -> {ok, CWD} = file:get_cwd(), ?assertMatch(ok, filelib:ensure_dir(filename:join([?TEST_DATA_DIR, "x"]))), - {ok, ConnRef} = connection_open(filename:join([CWD, ?TEST_DATA_DIR]), []), + {ok, ConnRef} = connection_open(filename:join([CWD, ?TEST_DATA_DIR]), [{create,true}]), ConnRef end, fun (ConnRef) -> diff --git a/tools/basho_bench_driver_wterl.erl b/tools/basho_bench_driver_wterl.erl index bdc6eb3..ae4dd93 100644 --- a/tools/basho_bench_driver_wterl.erl +++ b/tools/basho_bench_driver_wterl.erl @@ -26,12 +26,12 @@ new(1) -> new(Id) -> setup(Id). -setup(_Id) -> +setup(Id) -> %% Get the target directory Dir = basho_bench_config:get(wterl_dir, "/tmp"), Config = basho_bench_config:get(wterl, []), Uri = config_value(table_uri, Config, "lsm:test"), - ConnectionOpts = config_value(connection, Config, [{create, true}]), + ConnectionOpts = config_value(connection, Config, [{create,true},{session_max, 8192}]), SessionOpts = config_value(session, Config, []), TableOpts = config_value(table, Config, []), @@ -43,7 +43,7 @@ setup(_Id) -> {ok, Conn} -> Conn; {error, Reason0} -> - ?FAIL_MSG("Failed to establish a WiredTiger connection, wterl backend unable to start: ~p\n", [Reason0]) + ?FAIL_MSG("Failed to establish a WiredTiger connection for ~p, wterl backend unable to start: ~p\n", [Id, Reason0]) end; true -> {ok, Conn} = wterl_conn:get(),