From bb460a505ecdd1d1675ed3571e450d4c74bd7998 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 12:06:12 -0400 Subject: [PATCH] WIP --- tests/soak.c | 1123 +++++++++++++++++++++++++++----------------------- 1 file changed, 613 insertions(+), 510 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index 595cd17..044997c 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -5,433 +5,16 @@ #include #include #include +#include #include "../include/common.h" #include "../include/roaring.h" #include "../include/sparsemap.h" #include "../include/tdigest.h" -/* midl.h ------------------------------------------------------------------ */ -/** @defgroup idls ID List Management - * @{ - */ -/** A generic unsigned ID number. These were entryIDs in back-bdb. - * Preferably it should have the same size as a pointer. - */ -typedef size_t MDB_ID; +#include "midl.c" -/** An IDL is an ID List, a sorted array of IDs. The first - * element of the array is a counter for how many actual - * IDs are in the list. In the original back-bdb code, IDLs are - * sorted in ascending order. For libmdb IDLs are sorted in - * descending order. - */ -typedef MDB_ID *MDB_IDL; - -/* IDL sizes - likely should be even bigger - * limiting factors: sizeof(ID), thread stack size - */ -#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ -#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN) -#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1)) - -#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1) -#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1) - -#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID)) -#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0) -#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src))) -#define MDB_IDL_FIRST(ids) ((ids)[1]) -#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]]) - -/** Current max length of an #mdb_midl_alloc()ed IDL */ -#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1]) - -/** Append ID to IDL. The IDL must be big enough. */ -#define mdb_midl_xappend(idl, id) \ - do { \ - MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ - xidl[xlen] = (id); \ - } while (0) - -/** Search for an ID in an IDL. - * @param[in] ids The IDL to search. - * @param[in] id The ID to search for. - * @return The index of the first ID greater than or equal to \b id. - */ -unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id); - -/** Allocate an IDL. - * Allocates memory for an IDL of the given size. - * @return IDL on success, NULL on failure. - */ -MDB_IDL mdb_midl_alloc(int num); - -/** Free an IDL. - * @param[in] ids The IDL to free. - */ -void mdb_midl_free(MDB_IDL ids); - -/** Shrink an IDL. - * Return the IDL to the default size if it has grown larger. - * @param[in,out] idp Address of the IDL to shrink. - */ -void mdb_midl_shrink(MDB_IDL *idp); - -/** Shrink an IDL to a specific size. - * Resize the IDL to \b size if it is larger. - * @param[in,out] idp Address of the IDL to shrink. - * @param[in] size Capacity to have once resized. - */ -void mdb_midl_shrink(MDB_IDL *idp); - -/** Make room for num additional elements in an IDL. - * @param[in,out] idp Address of the IDL. - * @param[in] num Number of elements to make room for. - * @return 0 on success, ENOMEM on failure. - */ -int mdb_midl_need(MDB_IDL *idp, unsigned num); - -/** Append an ID onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] id The ID to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append(MDB_IDL *idp, MDB_ID id); - -/** Append an IDL onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] app The IDL to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app); - -/** Append an ID range onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] id The lowest ID to append. - * @param[in] n Number of IDs to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n); - -/** Merge an IDL onto an IDL. The destination IDL must be big enough. - * @param[in] idl The IDL to merge into. - * @param[in] merge The IDL to merge. - */ -void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge); - -/** Sort an IDL. - * @param[in,out] ids The IDL to sort. - */ -void mdb_midl_sort(MDB_IDL ids); - -/* midl.c ------------------------------------------------------------------ */ -/** @defgroup idls ID List Management - * @{ - */ -#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y)) - -unsigned -mdb_midl_search(MDB_IDL ids, MDB_ID id) -{ - /* - * binary search of id in ids - * if found, returns position of id - * if not found, returns first position greater than id - */ - unsigned base = 0; - unsigned cursor = 1; - int val = 0; - unsigned n = ids[0]; - - while (0 < n) { - unsigned pivot = n >> 1; - cursor = base + pivot + 1; - val = CMP(ids[cursor], id); - - if (val < 0) { - n = pivot; - - } else if (val > 0) { - base = cursor; - n -= pivot + 1; - - } else { - return cursor; - } - } - - if (val > 0) { - ++cursor; - } - return cursor; -} - -int -mdb_midl_insert(MDB_IDL ids, MDB_ID id) -{ - unsigned x, i; - - x = mdb_midl_search(ids, id); - assert(x > 0); - - if (x < 1) { - /* internal error */ - return -2; - } - - if (x <= ids[0] && ids[x] == id) { - /* duplicate */ - assert(0); - return -1; - } - - if (++ids[0] >= MDB_IDL_DB_MAX) { - /* no room */ - --ids[0]; - return -2; - - } else { - /* insert id */ - for (i = ids[0]; i > x; i--) - ids[i] = ids[i - 1]; - ids[x] = id; - } - - return 0; -} - -inline void -mdb_midl_pop_n(MDB_IDL ids, unsigned n) -{ - ids[0] = ids[0] - n; -} - -void -mdb_midl_remove_at(MDB_IDL ids, unsigned idx) -{ - for (int i = idx - 1; idx < ids[0] - 1;) - ids[++i] = ids[++idx]; - ids[0] = ids[0] - 1; -} - -void -mdb_midl_remove(MDB_IDL ids, MDB_ID id) -{ - unsigned idx = mdb_midl_search(ids, id); - if (idx <= ids[0] && ids[idx] == id) - mdb_midl_remove_at(ids, idx); -} - -MDB_IDL -mdb_midl_alloc(int num) -{ - MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID)); - if (ids) { - *ids++ = num; - *ids = 0; - } - return ids; -} - -void -mdb_midl_free(MDB_IDL ids) -{ - if (ids) - free(ids - 1); -} - -void -mdb_midl_shrink(MDB_IDL *idp) -{ - MDB_IDL ids = *idp; - if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) { - *ids++ = MDB_IDL_UM_MAX; - *idp = ids; - } -} - -void -mdb_midl_shrink_to(MDB_IDL *idp, size_t size) -{ - MDB_IDL ids = *idp; - if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) { - *ids++ = size; - *idp = ids; - *idp[0] = *idp[0] > size ? size : *idp[0]; - } -} - -static int -mdb_midl_grow(MDB_IDL *idp, int num) -{ - MDB_IDL idn = *idp - 1; - /* grow it */ - idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); - if (!idn) - return ENOMEM; - *idn++ += num; - *idp = idn; - return 0; -} - -int -mdb_midl_need(MDB_IDL *idp, unsigned num) -{ - MDB_IDL ids = *idp; - num += ids[0]; - if (num > ids[-1]) { - num = (num + num / 4 + (256 + 2)) & -256; - if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID)))) - return ENOMEM; - *ids++ = num - 2; - *idp = ids; - } - return 0; -} - -int -mdb_midl_append(MDB_IDL *idp, MDB_ID id) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] >= ids[-1]) { - if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0]++; - ids[ids[0]] = id; - return 0; -} - -int -mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] + app[0] >= ids[-1]) { - if (mdb_midl_grow(idp, app[0])) - return ENOMEM; - ids = *idp; - } - memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID)); - ids[0] += app[0]; - return 0; -} - -int -mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n) -{ - MDB_ID *ids = *idp, len = ids[0]; - /* Too big? */ - if (len + n > ids[-1]) { - if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0] = len + n; - ids += len; - while (n) - ids[n--] = id++; - return 0; -} - -void -mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge) -{ - MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; - idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ - old_id = idl[j]; - while (i) { - merge_id = merge[i--]; - for (; old_id < merge_id; old_id = idl[--j]) - idl[k--] = old_id; - idl[k--] = merge_id; - } - idl[0] = total; -} - -/* Quicksort + Insertion sort for small arrays */ - -#define SMALL 8 -#define MIDL_SWAP(a, b) \ - { \ - itmp = (a); \ - (a) = (b); \ - (b) = itmp; \ - } - -void -mdb_midl_sort(MDB_IDL ids) -{ - /* Max possible depth of int-indexed tree * 2 items/level */ - int istack[sizeof(int) * CHAR_BIT * 2]; - int i, j, k, l, ir, jstack; - MDB_ID a, itmp; - - ir = (int)ids[0]; - l = 1; - jstack = 0; - for (;;) { - if (ir - l < SMALL) { /* Insertion sort */ - for (j = l + 1; j <= ir; j++) { - a = ids[j]; - for (i = j - 1; i >= 1; i--) { - if (ids[i] >= a) - break; - ids[i + 1] = ids[i]; - } - ids[i + 1] = a; - } - if (jstack == 0) - break; - ir = istack[jstack--]; - l = istack[jstack--]; - } else { - k = (l + ir) >> 1; /* Choose median of left, center, right */ - MIDL_SWAP(ids[k], ids[l + 1]); - if (ids[l] < ids[ir]) { - MIDL_SWAP(ids[l], ids[ir]); - } - if (ids[l + 1] < ids[ir]) { - MIDL_SWAP(ids[l + 1], ids[ir]); - } - if (ids[l] < ids[l + 1]) { - MIDL_SWAP(ids[l], ids[l + 1]); - } - i = l + 1; - j = ir; - a = ids[l + 1]; - for (;;) { - do - i++; - while (ids[i] > a); - do - j--; - while (ids[j] < a); - if (j < i) - break; - MIDL_SWAP(ids[i], ids[j]); - } - ids[l + 1] = ids[j]; - ids[j] = a; - jstack += 2; - if (ir - i + 1 >= j - l) { - istack[jstack] = ir; - istack[jstack - 1] = i; - ir = j - 1; - } else { - istack[jstack] = j - 1; - istack[jstack - 1] = l; - l = i; - } - } - } -} -/* ------------------------------------------------------------------------- */ - -typedef MDB_ID pgno_t; +typedef size_t pgno_t; char * bytes_as(double bytes, char *s, size_t size) @@ -467,28 +50,299 @@ toss(size_t max) return level; } -bool -verify_midl_contains(MDB_IDL list, pgno_t pg) +bool recording = true; + +void +record_set_mutation(FILE *out, pgno_t pg) { - unsigned idx = mdb_midl_search(list, pg); - return idx <= list[0] && list[idx] == pg; + if (recording) { + fprintf(out, "set %lu\n", pg); + } } -bool -verify_midl_nodups(MDB_IDL list) +void +record_clear_mutation(FILE *out, pgno_t pg) { - pgno_t id = 1; - while (id < list[0]) { - if (list[id] == list[id + 1]) - return false; - id++; + if (recording) { + fprintf(out, "clear %lu\n", pg); + } +} + +void +record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) +{ + if (recording) { + fprintf(out, "take %lu %u\n", pg, len); + } +} + +void +record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) +{ + if (recording) { + fprintf(out, "release %lu %u\n", pg, len); + } +} + +/* sparsemap ------------------------------------------------------------- */ + +sparsemap_idx_t +_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) +{ + do { + int retval = sparsemap_merge(*map, other); + if (retval != 0) { + if (errno == ENOSPC) { + size_t new_size = retval + (64 - (retval % 64)) + 64; + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + return retval; + } + } while (true); +} + +static sparsemap_idx_t +_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +{ + do { + sparsemap_idx_t l = sparsemap_set(*map, idx, value); + if (l != idx) { + if (errno == ENOSPC) { + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + return l; + } + } while (true); +} + +static void * +__sm_alloc(size_t capacity) +{ + return (void *)sparsemap(capacity); +} + +static void +__sm_free(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + free(map); +} + +static pgno_t +__sm_set(void **handle, pgno_t pg) +{ + sparsemap_t **map = (sparsemap_t **)handle; + return (pgno_t)_sparsemap_set(map, pg, true); +} + +static bool +__sm_is_set(void *handle, pgno_t pg) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_is_set(map, pg); +} + +static pgno_t +__sm_clear(void **handle, pgno_t pg) +{ + sparsemap_t **map = (sparsemap_t **)handle; + return (pgno_t)_sparsemap_set(map, pg, false); +} + +static pgno_t +__sm_find_span(void *handle, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + pgno_t pgno = (pgno_t)sparsemap_span(map, 0, len, true); + assert(SPARSEMAP_NOT_FOUND(pgno) == false); + return pgno; +} + +static bool +__sm_take_span(void **handle, pgno_t pg, unsigned len) +{ + sparsemap_t **map = (sparsemap_t **)handle; + for (pgno_t i = pg; i < pg + len; i++) { + assert(_sparsemap_set(map, i, false) == i); } return true; } -bool -verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) +static bool +__sm_release_span(void **handle, pgno_t pg, unsigned len) { + sparsemap_t **map = (sparsemap_t **)handle; + for (pgno_t i = pg; i <= len; i++) { + assert(_sparsemap_set(map, i, true) == i); + } + return true; +} + +static bool +__sm_is_span(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (pgno_t i = pg; i < pg + len; i++) { + if (sparsemap_is_set(map, i) != true) { + return false; + } + } + return true; +} + +static bool +__sm_is_empty(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (pgno_t i = 0; i < len; i++) { + if (sparsemap_is_set(map, pg + i) != false) { + return false; + } + } + return true; +} + +static bool +__sm_is_first(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (sparsemap_idx_t i = 0; i < pg + len; i++) { + sparsemap_idx_t j = 0; + while (sparsemap_is_set(map, i + j) == true && j < len) { + j++; + } + if (j == len) { + return i == pg; + } + } + return false; +} + +/* midl ------------------------------------------------------------------ */ + +static void * +__midl_alloc(size_t capacity) +{ + MDB_IDL list = mdb_midl_alloc(capacity); + return (void *)list; +} + +static void +__midl_free(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + mdb_midl_free(list); +} + +static pgno_t +__midl_set(void **handle, pgno_t pg) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + if (list[0] + 1 == list[-1]) { + mdb_midl_need(&list, list[-1] + 1); + } + mdb_midl_insert(list, pg); + return pg; +} + +static bool +__midl_is_set(void *handle, pgno_t pg) +{ + MDB_IDL list = (MDB_IDL)handle; + pgno_t i = mdb_midl_search(list, pg); + return i <= list[0] && list[i] == pg; +} + +static pgno_t +__midl_clear(void **handle, pgno_t pg) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + unsigned len = list[0]; + list[0] = len -= 1; + for (unsigned j = pg - 1; j < len;) + list[++j] = list[++pg]; + for (unsigned j = len + 1; j <= list[-1]; j++) + list[j] = 0; + return pg; +} + +static pgno_t +__midl_find_span(void *handle, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; + + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + int retry = 1; + unsigned i = 0; + pgno_t pgno = 0, *mop = list; + unsigned n2 = len, mop_len = mop[0]; + do { + if (mop_len > n2) { + i = mop_len; + do { + pgno = mop[i]; + if (mop[i - n2] == pgno + n2) + goto search_done; + } while (--i > n2); + if (--retry < 0) + break; + } + } while (1); +search_done:; + return pgno; +} + +static bool +__midl_take_span(void **handle, pgno_t pg, unsigned len) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + + int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; + unsigned j, num = len; + pgno_t *mop = list; + unsigned mop_len = mop[0]; + + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i - num; j < mop_len;) + mop[++j] = mop[++i]; + /* Set all unused values in the array to 0 + for (j = mop_len + 1; j <= mop[-1]; j++) + mop[j] = 0; + */ + return true; +} + +static bool +__midl_release_span(void **handle, pgno_t pg, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; + if (list[0] + len >= list[-1]) { + mdb_midl_need(&list, list[-1] + len); + } + for (size_t i = pg; i < pg + len; i++) { + mdb_midl_insert(list, i); + } + mdb_midl_sort(list); + return true; +} + +static bool +__midl_is_span(void *handle, pgno_t pg, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; pgno_t idx = mdb_midl_search(list, pg); bool found = idx <= list[0] && list[idx] == pg; if (!found) @@ -500,9 +354,10 @@ verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) return true; } -bool -verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) +static bool +__midl_is_empty(void *handle, pgno_t pg, unsigned len) { + MDB_IDL list = (MDB_IDL)handle; for (pgno_t i = pg; i < pg + len; i++) { pgno_t idx = mdb_midl_search(list, pg); bool found = idx <= list[0] && list[idx] == pg; @@ -512,9 +367,95 @@ verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) return true; } -bool -verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) +static bool +__midl_validate(void *handle) { + MDB_IDL list = (MDB_IDL)handle; + pgno_t id = 1; + while (id < list[0]) { + if (list[id] >= list[id + 1]) + return false; + id++; + } + return true; +} + +/* roaring --------------------------------------------------------------- */ + +static void * +__roar_alloc(size_t capacity) +{ + return roaring_bitmap_create(); +} + +static void +__roar_free(void *handle) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + roaring_free(rbm); +} + +static pgno_t +__roar_set(void **handle, pgno_t pg) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + assert(roaring_bitmap_add_checked(rbm, pg) == true); + return pg; +} + +static bool +__roar_is_set(void *handle, pgno_t pg) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + return roaring_bitmap_contains(rbm, pg); +} + +static pgno_t +__roar_clear(void **handle, pgno_t pg) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_remove(rbm, pg); + return pg; +} + +static pgno_t +__roar_find_span(void *handle, unsigned len) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + uint64_t max = roaring_bitmap_maximum(rbm); + uint64_t offset = roaring_bitmap_minimum(rbm); + do { + if (len == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + len) == len) { + break; + } + offset++; + } while (offset <= max); + return offset; +} + +static bool +__roar_take_span(void **handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_remove_range(rbm, pg, pg + len); + roaring_bitmap_run_optimize(rbm); + return true; +} + +static bool +__roar_release_span(void **handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + for (size_t i = pg; i < pg + len; i++) { + assert(roaring_bitmap_add_checked(rbm, i) == true); + } + return true; +} + +static bool +__roar_is_span(void *handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; for (pgno_t i = pg; i < pg + len; i++) { if (roaring_bitmap_contains(rbm, i) != true) { return false; @@ -523,31 +464,10 @@ verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) return true; } -bool -verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) -{ - for (pgno_t i = pg; i < pg + len; i++) { - if (sparsemap_is_set(map, i) != true) { - return false; - } - } - return true; -} - -bool -verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) -{ - for (pgno_t i = 0; i < len; i++) { - if (sparsemap_is_set(map, pg + i) != false) { - return false; - } - } - return true; -} - -bool -verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) +static bool +__roar_is_empty(void *handle, pgno_t pg, unsigned len) { + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; for (pgno_t i = 0; i < len; i++) { if (roaring_bitmap_contains(rbm, pg + i) != false) { return false; @@ -556,23 +476,98 @@ verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) return true; } -bool -verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) +static bool +__roar_validate(void *handle) { - for (sparsemap_idx_t i = 0; i < idx + len; i++) { - sparsemap_idx_t j = 0; - while (sparsemap_is_set(map, i + j) == value && j < len) { - j++; - } - if (j == len) { - return i == idx; - } - } - return false; + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + roaring_bitmap_run_optimize(rbm); + return true; } +/* ----------------------------------------------------------------------- */ + +typedef enum { SM, ML, RB } container_impl_t; + +typedef struct container { + const char *name; + void *(*alloc)(size_t capacity); + void (*free)(void *handle); + pgno_t (*set)(void **handle, pgno_t pg); + bool (*is_set)(void *handle, pgno_t pg); + pgno_t (*clear)(void **handle, pgno_t pg); + pgno_t (*find_span)(void *handle, unsigned len); + bool (*take_span)(void **handle, pgno_t pg, unsigned len); + bool (*release_span)(void **handle, pgno_t pg, unsigned len); + bool (*is_span)(void *handle, pgno_t pg, unsigned len); + bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + bool (*is_first)(void *handle, pgno_t pg, unsigned len); + bool (*validate)(void *handle); +} container_t; + +// clang-format off +container_t containers[] = { + { "sparsemap", + .alloc = __sm_alloc, + .free = __sm_free, + .set = __sm_set, + .is_set = __sm_is_set, + .clear = __sm_clear, + .find_span = __sm_find_span, + .take_span = __sm_take_span, + .release_span = __sm_release_span, + .is_span = __sm_is_span, + .is_empty = __sm_is_empty, + .is_first = __sm_is_first, + .validate = NULL + }, + { "midl", + .alloc = __midl_alloc, + .free = __midl_free, + .set = __midl_set, + .is_set = __midl_is_set, + .clear = __midl_clear, + .find_span = __midl_find_span, + .take_span = __midl_take_span, + .release_span = __midl_release_span, + .is_span = __midl_is_span, + .is_empty = __midl_is_empty, + .is_first = NULL, + .validate = __midl_validate + }, + { "roaring", + .alloc = __roar_alloc, + .free = __roar_free, + .set = __roar_set, + .is_set = __roar_is_set, + .clear = __roar_clear, + .find_span = __roar_find_span, + .take_span = __roar_take_span, + .release_span = __roar_release_span, + .is_span = __roar_is_span, + .is_empty = __roar_is_empty, + .is_first = NULL, + .validate = __roar_validate, + }, +}; +// clang-format on + +void *handles[3]; +FILE *fp; + +#define alloc(type, size) containers[type].alloc(size); +#define cast(type, fn, ...) \ + if (containers[type].fn) \ + containers[type].fn(handles[type], ##__VA_ARGS__) +#define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) +#define mutate(type, fn, ...) record_##fn##_mutation(fp, __VA_ARGS__), containers[type].fn(&handles[type], __VA_ARGS__) +#define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) +#define compare(set) \ + for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ + verify_eq(0, handles[0], type, handles[type]); \ + } + bool -verify_sm_eq_rm(sparsemap_t *map, roaring_bitmap_t *rbm) +verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm) { uint64_t max = roaring_bitmap_maximum(rbm); roaring_uint32_iterator_t iter; @@ -610,43 +605,23 @@ verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) return true; } -sparsemap_idx_t -_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +bool +verify_eq(unsigned a, void *ad, unsigned b, void *bd) { - do { - sparsemap_idx_t l = sparsemap_set(*map, idx, value); - if (l != idx) { - if (errno == ENOSPC) { - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return l; - } - } while (true); -} + bool ret = true; -sparsemap_idx_t -_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) -{ - do { - int retval = sparsemap_merge(*map, other); - if (retval != 0) { - if (errno == ENOSPC) { - size_t new_size = retval + (64 - (retval % 64)) + 64; - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return retval; - } - } while (true); + // 'a' should always be a Sparsemap + switch (b) { + case ML: + assert((ret = verify_sm_eq_ml((sparsemap_t *)ad, (MDB_IDL)bd)) == true); + break; + case RB: + assert((ret = verify_sm_eq_rb((sparsemap_t *)ad, (roaring_bitmap_t *)bd)) == true); + break; + default: + break; + } + return ret; } td_histogram_t *l_span_loc; @@ -690,6 +665,133 @@ stats(size_t iterations, sparsemap_t *map, MDB_IDL list) #define INITIAL_AMOUNT 1024 * 2 +#define SHORT_OPT "r:fa:bh" +#define LONG_OPT "record:,force,amount:,buffer,help" + +void +print_usage(const char *program_name) +{ + printf("Usage: %s [OPTIONS]\n", program_name); + printf(" -r, --record Path to the file for recording (optional)\n"); + printf(" -f, --force Force overwrite of existing file (optional)\n"); + printf(" -b, --buffer Disable buffering writes to stdout/err (optional)\n"); + printf(" -a, --amount Specify the number of entries to record (must be positive, optional)\n"); + printf(" -h, --help Print this help message\n"); +} + +int +main(int argc, char *argv[]) +{ + int opt; + const char *record_file = NULL; + int force_flag = 0; + size_t amt = INITIAL_AMOUNT; + bool buffer = true; + + fp = stdout; + + while ((opt = getopt(argc, argv, SHORT_OPT LONG_OPT)) != -1) { + switch (opt) { + case 'r': + record_file = optarg; + break; + case 'f': + force_flag = 1; + break; + case 'b': + buffer = false; + break; + case 'a': + amt = atoi(optarg); + if (amt <= 0) { + fprintf(stderr, "Error: Invalid amount. Amount must be a positive number.\n"); + return 1; + } + break; + case 'h': + print_usage(argv[0]); + return 0; + case '?': + fprintf(stderr, "Unknown option: %c\n", optopt); + return 1; + default: + break; + } + } + + // Check if record file is specified + if (record_file == NULL) { + recording = false; + } else { + recording = true; + + // Check for existing file without force flag + if (access(record_file, F_OK) == 0 && !force_flag) { + fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file); + return 1; + } + + // Open the file for writing (truncate if force flag is set) + fp = fopen(record_file, force_flag ? "w" : "a"); + if (fp == NULL) { + perror("Error opening file"); + return 1; + } + } + + // disable buffering + if (!buffer) { + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + } + unsigned types[] = { SM, ML, RB }; + unsigned num_types = (sizeof((types)) / sizeof((types)[0])); + + /* Setup: add an amt of bits to each container. */ + foreach(types) + { + handles[type] = alloc(type, amt); + for (size_t i = 0; i < amt; i++) { + assert(invoke(type, is_set, i) == false); + assert(mutate(type, set, i) == i); + assert(invoke(type, is_set, i) == true); + } + cast(type, validate); + } + compare(types); + + while (true) { + // the an amount [1, 16] of pages to find preferring smaller sizes + unsigned len = toss(15) + 1; + pgno_t loc[num_types]; + foreach(types) + { + loc[type] = invoke(type, find_span, len); + } + for (unsigned n = 0; n < num_types; n++) { + foreach(types) + { + assert(invoke(type, is_span, loc[n], len)); + } + } + foreach(types) + { + cast(type, validate); + } + + unsigned which_loc = (unsigned)xorshift32() % num_types; + foreach(types) + { + assert(mutate(type, take_span, loc[which_loc], len)); + cast(type, validate); + } + compare(types); + } + + return 0; +} + +#if 0 /* * A "soak test" that tries to replicate behavior in LMDB for page allocation. */ @@ -751,7 +853,7 @@ main(void) int retry = 1; unsigned i = 0; pgno_t pgno = 0, *mop = list; - unsigned n2 = n, mop_len = mop[0]; + unsigned n2 = len, mop_len = mop[0]; if (mop_len > n2) { i = mop_len; do { @@ -1008,3 +1110,4 @@ main(void) return 0; } +#endif