From bb460a505ecdd1d1675ed3571e450d4c74bd7998 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 12:06:12 -0400 Subject: [PATCH 01/10] WIP --- tests/soak.c | 1123 +++++++++++++++++++++++++++----------------------- 1 file changed, 613 insertions(+), 510 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index 595cd17..044997c 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -5,433 +5,16 @@ #include #include #include +#include #include "../include/common.h" #include "../include/roaring.h" #include "../include/sparsemap.h" #include "../include/tdigest.h" -/* midl.h ------------------------------------------------------------------ */ -/** @defgroup idls ID List Management - * @{ - */ -/** A generic unsigned ID number. These were entryIDs in back-bdb. - * Preferably it should have the same size as a pointer. - */ -typedef size_t MDB_ID; +#include "midl.c" -/** An IDL is an ID List, a sorted array of IDs. The first - * element of the array is a counter for how many actual - * IDs are in the list. In the original back-bdb code, IDLs are - * sorted in ascending order. For libmdb IDLs are sorted in - * descending order. - */ -typedef MDB_ID *MDB_IDL; - -/* IDL sizes - likely should be even bigger - * limiting factors: sizeof(ID), thread stack size - */ -#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ -#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN) -#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1)) - -#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1) -#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1) - -#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID)) -#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0) -#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src))) -#define MDB_IDL_FIRST(ids) ((ids)[1]) -#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]]) - -/** Current max length of an #mdb_midl_alloc()ed IDL */ -#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1]) - -/** Append ID to IDL. The IDL must be big enough. */ -#define mdb_midl_xappend(idl, id) \ - do { \ - MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ - xidl[xlen] = (id); \ - } while (0) - -/** Search for an ID in an IDL. - * @param[in] ids The IDL to search. - * @param[in] id The ID to search for. - * @return The index of the first ID greater than or equal to \b id. - */ -unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id); - -/** Allocate an IDL. - * Allocates memory for an IDL of the given size. - * @return IDL on success, NULL on failure. - */ -MDB_IDL mdb_midl_alloc(int num); - -/** Free an IDL. - * @param[in] ids The IDL to free. - */ -void mdb_midl_free(MDB_IDL ids); - -/** Shrink an IDL. - * Return the IDL to the default size if it has grown larger. - * @param[in,out] idp Address of the IDL to shrink. - */ -void mdb_midl_shrink(MDB_IDL *idp); - -/** Shrink an IDL to a specific size. - * Resize the IDL to \b size if it is larger. - * @param[in,out] idp Address of the IDL to shrink. - * @param[in] size Capacity to have once resized. - */ -void mdb_midl_shrink(MDB_IDL *idp); - -/** Make room for num additional elements in an IDL. - * @param[in,out] idp Address of the IDL. - * @param[in] num Number of elements to make room for. - * @return 0 on success, ENOMEM on failure. - */ -int mdb_midl_need(MDB_IDL *idp, unsigned num); - -/** Append an ID onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] id The ID to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append(MDB_IDL *idp, MDB_ID id); - -/** Append an IDL onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] app The IDL to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app); - -/** Append an ID range onto an IDL. - * @param[in,out] idp Address of the IDL to append to. - * @param[in] id The lowest ID to append. - * @param[in] n Number of IDs to append. - * @return 0 on success, ENOMEM if the IDL is too large. - */ -int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n); - -/** Merge an IDL onto an IDL. The destination IDL must be big enough. - * @param[in] idl The IDL to merge into. - * @param[in] merge The IDL to merge. - */ -void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge); - -/** Sort an IDL. - * @param[in,out] ids The IDL to sort. - */ -void mdb_midl_sort(MDB_IDL ids); - -/* midl.c ------------------------------------------------------------------ */ -/** @defgroup idls ID List Management - * @{ - */ -#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y)) - -unsigned -mdb_midl_search(MDB_IDL ids, MDB_ID id) -{ - /* - * binary search of id in ids - * if found, returns position of id - * if not found, returns first position greater than id - */ - unsigned base = 0; - unsigned cursor = 1; - int val = 0; - unsigned n = ids[0]; - - while (0 < n) { - unsigned pivot = n >> 1; - cursor = base + pivot + 1; - val = CMP(ids[cursor], id); - - if (val < 0) { - n = pivot; - - } else if (val > 0) { - base = cursor; - n -= pivot + 1; - - } else { - return cursor; - } - } - - if (val > 0) { - ++cursor; - } - return cursor; -} - -int -mdb_midl_insert(MDB_IDL ids, MDB_ID id) -{ - unsigned x, i; - - x = mdb_midl_search(ids, id); - assert(x > 0); - - if (x < 1) { - /* internal error */ - return -2; - } - - if (x <= ids[0] && ids[x] == id) { - /* duplicate */ - assert(0); - return -1; - } - - if (++ids[0] >= MDB_IDL_DB_MAX) { - /* no room */ - --ids[0]; - return -2; - - } else { - /* insert id */ - for (i = ids[0]; i > x; i--) - ids[i] = ids[i - 1]; - ids[x] = id; - } - - return 0; -} - -inline void -mdb_midl_pop_n(MDB_IDL ids, unsigned n) -{ - ids[0] = ids[0] - n; -} - -void -mdb_midl_remove_at(MDB_IDL ids, unsigned idx) -{ - for (int i = idx - 1; idx < ids[0] - 1;) - ids[++i] = ids[++idx]; - ids[0] = ids[0] - 1; -} - -void -mdb_midl_remove(MDB_IDL ids, MDB_ID id) -{ - unsigned idx = mdb_midl_search(ids, id); - if (idx <= ids[0] && ids[idx] == id) - mdb_midl_remove_at(ids, idx); -} - -MDB_IDL -mdb_midl_alloc(int num) -{ - MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID)); - if (ids) { - *ids++ = num; - *ids = 0; - } - return ids; -} - -void -mdb_midl_free(MDB_IDL ids) -{ - if (ids) - free(ids - 1); -} - -void -mdb_midl_shrink(MDB_IDL *idp) -{ - MDB_IDL ids = *idp; - if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) { - *ids++ = MDB_IDL_UM_MAX; - *idp = ids; - } -} - -void -mdb_midl_shrink_to(MDB_IDL *idp, size_t size) -{ - MDB_IDL ids = *idp; - if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) { - *ids++ = size; - *idp = ids; - *idp[0] = *idp[0] > size ? size : *idp[0]; - } -} - -static int -mdb_midl_grow(MDB_IDL *idp, int num) -{ - MDB_IDL idn = *idp - 1; - /* grow it */ - idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); - if (!idn) - return ENOMEM; - *idn++ += num; - *idp = idn; - return 0; -} - -int -mdb_midl_need(MDB_IDL *idp, unsigned num) -{ - MDB_IDL ids = *idp; - num += ids[0]; - if (num > ids[-1]) { - num = (num + num / 4 + (256 + 2)) & -256; - if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID)))) - return ENOMEM; - *ids++ = num - 2; - *idp = ids; - } - return 0; -} - -int -mdb_midl_append(MDB_IDL *idp, MDB_ID id) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] >= ids[-1]) { - if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0]++; - ids[ids[0]] = id; - return 0; -} - -int -mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] + app[0] >= ids[-1]) { - if (mdb_midl_grow(idp, app[0])) - return ENOMEM; - ids = *idp; - } - memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID)); - ids[0] += app[0]; - return 0; -} - -int -mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n) -{ - MDB_ID *ids = *idp, len = ids[0]; - /* Too big? */ - if (len + n > ids[-1]) { - if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0] = len + n; - ids += len; - while (n) - ids[n--] = id++; - return 0; -} - -void -mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge) -{ - MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; - idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ - old_id = idl[j]; - while (i) { - merge_id = merge[i--]; - for (; old_id < merge_id; old_id = idl[--j]) - idl[k--] = old_id; - idl[k--] = merge_id; - } - idl[0] = total; -} - -/* Quicksort + Insertion sort for small arrays */ - -#define SMALL 8 -#define MIDL_SWAP(a, b) \ - { \ - itmp = (a); \ - (a) = (b); \ - (b) = itmp; \ - } - -void -mdb_midl_sort(MDB_IDL ids) -{ - /* Max possible depth of int-indexed tree * 2 items/level */ - int istack[sizeof(int) * CHAR_BIT * 2]; - int i, j, k, l, ir, jstack; - MDB_ID a, itmp; - - ir = (int)ids[0]; - l = 1; - jstack = 0; - for (;;) { - if (ir - l < SMALL) { /* Insertion sort */ - for (j = l + 1; j <= ir; j++) { - a = ids[j]; - for (i = j - 1; i >= 1; i--) { - if (ids[i] >= a) - break; - ids[i + 1] = ids[i]; - } - ids[i + 1] = a; - } - if (jstack == 0) - break; - ir = istack[jstack--]; - l = istack[jstack--]; - } else { - k = (l + ir) >> 1; /* Choose median of left, center, right */ - MIDL_SWAP(ids[k], ids[l + 1]); - if (ids[l] < ids[ir]) { - MIDL_SWAP(ids[l], ids[ir]); - } - if (ids[l + 1] < ids[ir]) { - MIDL_SWAP(ids[l + 1], ids[ir]); - } - if (ids[l] < ids[l + 1]) { - MIDL_SWAP(ids[l], ids[l + 1]); - } - i = l + 1; - j = ir; - a = ids[l + 1]; - for (;;) { - do - i++; - while (ids[i] > a); - do - j--; - while (ids[j] < a); - if (j < i) - break; - MIDL_SWAP(ids[i], ids[j]); - } - ids[l + 1] = ids[j]; - ids[j] = a; - jstack += 2; - if (ir - i + 1 >= j - l) { - istack[jstack] = ir; - istack[jstack - 1] = i; - ir = j - 1; - } else { - istack[jstack] = j - 1; - istack[jstack - 1] = l; - l = i; - } - } - } -} -/* ------------------------------------------------------------------------- */ - -typedef MDB_ID pgno_t; +typedef size_t pgno_t; char * bytes_as(double bytes, char *s, size_t size) @@ -467,28 +50,299 @@ toss(size_t max) return level; } -bool -verify_midl_contains(MDB_IDL list, pgno_t pg) +bool recording = true; + +void +record_set_mutation(FILE *out, pgno_t pg) { - unsigned idx = mdb_midl_search(list, pg); - return idx <= list[0] && list[idx] == pg; + if (recording) { + fprintf(out, "set %lu\n", pg); + } } -bool -verify_midl_nodups(MDB_IDL list) +void +record_clear_mutation(FILE *out, pgno_t pg) { - pgno_t id = 1; - while (id < list[0]) { - if (list[id] == list[id + 1]) - return false; - id++; + if (recording) { + fprintf(out, "clear %lu\n", pg); + } +} + +void +record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) +{ + if (recording) { + fprintf(out, "take %lu %u\n", pg, len); + } +} + +void +record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) +{ + if (recording) { + fprintf(out, "release %lu %u\n", pg, len); + } +} + +/* sparsemap ------------------------------------------------------------- */ + +sparsemap_idx_t +_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) +{ + do { + int retval = sparsemap_merge(*map, other); + if (retval != 0) { + if (errno == ENOSPC) { + size_t new_size = retval + (64 - (retval % 64)) + 64; + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + return retval; + } + } while (true); +} + +static sparsemap_idx_t +_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +{ + do { + sparsemap_idx_t l = sparsemap_set(*map, idx, value); + if (l != idx) { + if (errno == ENOSPC) { + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + return l; + } + } while (true); +} + +static void * +__sm_alloc(size_t capacity) +{ + return (void *)sparsemap(capacity); +} + +static void +__sm_free(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + free(map); +} + +static pgno_t +__sm_set(void **handle, pgno_t pg) +{ + sparsemap_t **map = (sparsemap_t **)handle; + return (pgno_t)_sparsemap_set(map, pg, true); +} + +static bool +__sm_is_set(void *handle, pgno_t pg) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_is_set(map, pg); +} + +static pgno_t +__sm_clear(void **handle, pgno_t pg) +{ + sparsemap_t **map = (sparsemap_t **)handle; + return (pgno_t)_sparsemap_set(map, pg, false); +} + +static pgno_t +__sm_find_span(void *handle, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + pgno_t pgno = (pgno_t)sparsemap_span(map, 0, len, true); + assert(SPARSEMAP_NOT_FOUND(pgno) == false); + return pgno; +} + +static bool +__sm_take_span(void **handle, pgno_t pg, unsigned len) +{ + sparsemap_t **map = (sparsemap_t **)handle; + for (pgno_t i = pg; i < pg + len; i++) { + assert(_sparsemap_set(map, i, false) == i); } return true; } -bool -verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) +static bool +__sm_release_span(void **handle, pgno_t pg, unsigned len) { + sparsemap_t **map = (sparsemap_t **)handle; + for (pgno_t i = pg; i <= len; i++) { + assert(_sparsemap_set(map, i, true) == i); + } + return true; +} + +static bool +__sm_is_span(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (pgno_t i = pg; i < pg + len; i++) { + if (sparsemap_is_set(map, i) != true) { + return false; + } + } + return true; +} + +static bool +__sm_is_empty(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (pgno_t i = 0; i < len; i++) { + if (sparsemap_is_set(map, pg + i) != false) { + return false; + } + } + return true; +} + +static bool +__sm_is_first(void *handle, pgno_t pg, unsigned len) +{ + sparsemap_t *map = (sparsemap_t *)handle; + for (sparsemap_idx_t i = 0; i < pg + len; i++) { + sparsemap_idx_t j = 0; + while (sparsemap_is_set(map, i + j) == true && j < len) { + j++; + } + if (j == len) { + return i == pg; + } + } + return false; +} + +/* midl ------------------------------------------------------------------ */ + +static void * +__midl_alloc(size_t capacity) +{ + MDB_IDL list = mdb_midl_alloc(capacity); + return (void *)list; +} + +static void +__midl_free(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + mdb_midl_free(list); +} + +static pgno_t +__midl_set(void **handle, pgno_t pg) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + if (list[0] + 1 == list[-1]) { + mdb_midl_need(&list, list[-1] + 1); + } + mdb_midl_insert(list, pg); + return pg; +} + +static bool +__midl_is_set(void *handle, pgno_t pg) +{ + MDB_IDL list = (MDB_IDL)handle; + pgno_t i = mdb_midl_search(list, pg); + return i <= list[0] && list[i] == pg; +} + +static pgno_t +__midl_clear(void **handle, pgno_t pg) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + unsigned len = list[0]; + list[0] = len -= 1; + for (unsigned j = pg - 1; j < len;) + list[++j] = list[++pg]; + for (unsigned j = len + 1; j <= list[-1]; j++) + list[j] = 0; + return pg; +} + +static pgno_t +__midl_find_span(void *handle, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; + + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + int retry = 1; + unsigned i = 0; + pgno_t pgno = 0, *mop = list; + unsigned n2 = len, mop_len = mop[0]; + do { + if (mop_len > n2) { + i = mop_len; + do { + pgno = mop[i]; + if (mop[i - n2] == pgno + n2) + goto search_done; + } while (--i > n2); + if (--retry < 0) + break; + } + } while (1); +search_done:; + return pgno; +} + +static bool +__midl_take_span(void **handle, pgno_t pg, unsigned len) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + + int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; + unsigned j, num = len; + pgno_t *mop = list; + unsigned mop_len = mop[0]; + + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i - num; j < mop_len;) + mop[++j] = mop[++i]; + /* Set all unused values in the array to 0 + for (j = mop_len + 1; j <= mop[-1]; j++) + mop[j] = 0; + */ + return true; +} + +static bool +__midl_release_span(void **handle, pgno_t pg, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; + if (list[0] + len >= list[-1]) { + mdb_midl_need(&list, list[-1] + len); + } + for (size_t i = pg; i < pg + len; i++) { + mdb_midl_insert(list, i); + } + mdb_midl_sort(list); + return true; +} + +static bool +__midl_is_span(void *handle, pgno_t pg, unsigned len) +{ + MDB_IDL list = (MDB_IDL)handle; pgno_t idx = mdb_midl_search(list, pg); bool found = idx <= list[0] && list[idx] == pg; if (!found) @@ -500,9 +354,10 @@ verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) return true; } -bool -verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) +static bool +__midl_is_empty(void *handle, pgno_t pg, unsigned len) { + MDB_IDL list = (MDB_IDL)handle; for (pgno_t i = pg; i < pg + len; i++) { pgno_t idx = mdb_midl_search(list, pg); bool found = idx <= list[0] && list[idx] == pg; @@ -512,9 +367,95 @@ verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) return true; } -bool -verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) +static bool +__midl_validate(void *handle) { + MDB_IDL list = (MDB_IDL)handle; + pgno_t id = 1; + while (id < list[0]) { + if (list[id] >= list[id + 1]) + return false; + id++; + } + return true; +} + +/* roaring --------------------------------------------------------------- */ + +static void * +__roar_alloc(size_t capacity) +{ + return roaring_bitmap_create(); +} + +static void +__roar_free(void *handle) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + roaring_free(rbm); +} + +static pgno_t +__roar_set(void **handle, pgno_t pg) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + assert(roaring_bitmap_add_checked(rbm, pg) == true); + return pg; +} + +static bool +__roar_is_set(void *handle, pgno_t pg) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + return roaring_bitmap_contains(rbm, pg); +} + +static pgno_t +__roar_clear(void **handle, pgno_t pg) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_remove(rbm, pg); + return pg; +} + +static pgno_t +__roar_find_span(void *handle, unsigned len) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + uint64_t max = roaring_bitmap_maximum(rbm); + uint64_t offset = roaring_bitmap_minimum(rbm); + do { + if (len == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + len) == len) { + break; + } + offset++; + } while (offset <= max); + return offset; +} + +static bool +__roar_take_span(void **handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_remove_range(rbm, pg, pg + len); + roaring_bitmap_run_optimize(rbm); + return true; +} + +static bool +__roar_release_span(void **handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + for (size_t i = pg; i < pg + len; i++) { + assert(roaring_bitmap_add_checked(rbm, i) == true); + } + return true; +} + +static bool +__roar_is_span(void *handle, pgno_t pg, unsigned len) +{ + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; for (pgno_t i = pg; i < pg + len; i++) { if (roaring_bitmap_contains(rbm, i) != true) { return false; @@ -523,31 +464,10 @@ verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) return true; } -bool -verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) -{ - for (pgno_t i = pg; i < pg + len; i++) { - if (sparsemap_is_set(map, i) != true) { - return false; - } - } - return true; -} - -bool -verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) -{ - for (pgno_t i = 0; i < len; i++) { - if (sparsemap_is_set(map, pg + i) != false) { - return false; - } - } - return true; -} - -bool -verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) +static bool +__roar_is_empty(void *handle, pgno_t pg, unsigned len) { + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; for (pgno_t i = 0; i < len; i++) { if (roaring_bitmap_contains(rbm, pg + i) != false) { return false; @@ -556,23 +476,98 @@ verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len) return true; } -bool -verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) +static bool +__roar_validate(void *handle) { - for (sparsemap_idx_t i = 0; i < idx + len; i++) { - sparsemap_idx_t j = 0; - while (sparsemap_is_set(map, i + j) == value && j < len) { - j++; - } - if (j == len) { - return i == idx; - } - } - return false; + roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; + roaring_bitmap_run_optimize(rbm); + return true; } +/* ----------------------------------------------------------------------- */ + +typedef enum { SM, ML, RB } container_impl_t; + +typedef struct container { + const char *name; + void *(*alloc)(size_t capacity); + void (*free)(void *handle); + pgno_t (*set)(void **handle, pgno_t pg); + bool (*is_set)(void *handle, pgno_t pg); + pgno_t (*clear)(void **handle, pgno_t pg); + pgno_t (*find_span)(void *handle, unsigned len); + bool (*take_span)(void **handle, pgno_t pg, unsigned len); + bool (*release_span)(void **handle, pgno_t pg, unsigned len); + bool (*is_span)(void *handle, pgno_t pg, unsigned len); + bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + bool (*is_first)(void *handle, pgno_t pg, unsigned len); + bool (*validate)(void *handle); +} container_t; + +// clang-format off +container_t containers[] = { + { "sparsemap", + .alloc = __sm_alloc, + .free = __sm_free, + .set = __sm_set, + .is_set = __sm_is_set, + .clear = __sm_clear, + .find_span = __sm_find_span, + .take_span = __sm_take_span, + .release_span = __sm_release_span, + .is_span = __sm_is_span, + .is_empty = __sm_is_empty, + .is_first = __sm_is_first, + .validate = NULL + }, + { "midl", + .alloc = __midl_alloc, + .free = __midl_free, + .set = __midl_set, + .is_set = __midl_is_set, + .clear = __midl_clear, + .find_span = __midl_find_span, + .take_span = __midl_take_span, + .release_span = __midl_release_span, + .is_span = __midl_is_span, + .is_empty = __midl_is_empty, + .is_first = NULL, + .validate = __midl_validate + }, + { "roaring", + .alloc = __roar_alloc, + .free = __roar_free, + .set = __roar_set, + .is_set = __roar_is_set, + .clear = __roar_clear, + .find_span = __roar_find_span, + .take_span = __roar_take_span, + .release_span = __roar_release_span, + .is_span = __roar_is_span, + .is_empty = __roar_is_empty, + .is_first = NULL, + .validate = __roar_validate, + }, +}; +// clang-format on + +void *handles[3]; +FILE *fp; + +#define alloc(type, size) containers[type].alloc(size); +#define cast(type, fn, ...) \ + if (containers[type].fn) \ + containers[type].fn(handles[type], ##__VA_ARGS__) +#define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) +#define mutate(type, fn, ...) record_##fn##_mutation(fp, __VA_ARGS__), containers[type].fn(&handles[type], __VA_ARGS__) +#define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) +#define compare(set) \ + for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ + verify_eq(0, handles[0], type, handles[type]); \ + } + bool -verify_sm_eq_rm(sparsemap_t *map, roaring_bitmap_t *rbm) +verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm) { uint64_t max = roaring_bitmap_maximum(rbm); roaring_uint32_iterator_t iter; @@ -610,43 +605,23 @@ verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) return true; } -sparsemap_idx_t -_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +bool +verify_eq(unsigned a, void *ad, unsigned b, void *bd) { - do { - sparsemap_idx_t l = sparsemap_set(*map, idx, value); - if (l != idx) { - if (errno == ENOSPC) { - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return l; - } - } while (true); -} + bool ret = true; -sparsemap_idx_t -_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) -{ - do { - int retval = sparsemap_merge(*map, other); - if (retval != 0) { - if (errno == ENOSPC) { - size_t new_size = retval + (64 - (retval % 64)) + 64; - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return retval; - } - } while (true); + // 'a' should always be a Sparsemap + switch (b) { + case ML: + assert((ret = verify_sm_eq_ml((sparsemap_t *)ad, (MDB_IDL)bd)) == true); + break; + case RB: + assert((ret = verify_sm_eq_rb((sparsemap_t *)ad, (roaring_bitmap_t *)bd)) == true); + break; + default: + break; + } + return ret; } td_histogram_t *l_span_loc; @@ -690,6 +665,133 @@ stats(size_t iterations, sparsemap_t *map, MDB_IDL list) #define INITIAL_AMOUNT 1024 * 2 +#define SHORT_OPT "r:fa:bh" +#define LONG_OPT "record:,force,amount:,buffer,help" + +void +print_usage(const char *program_name) +{ + printf("Usage: %s [OPTIONS]\n", program_name); + printf(" -r, --record Path to the file for recording (optional)\n"); + printf(" -f, --force Force overwrite of existing file (optional)\n"); + printf(" -b, --buffer Disable buffering writes to stdout/err (optional)\n"); + printf(" -a, --amount Specify the number of entries to record (must be positive, optional)\n"); + printf(" -h, --help Print this help message\n"); +} + +int +main(int argc, char *argv[]) +{ + int opt; + const char *record_file = NULL; + int force_flag = 0; + size_t amt = INITIAL_AMOUNT; + bool buffer = true; + + fp = stdout; + + while ((opt = getopt(argc, argv, SHORT_OPT LONG_OPT)) != -1) { + switch (opt) { + case 'r': + record_file = optarg; + break; + case 'f': + force_flag = 1; + break; + case 'b': + buffer = false; + break; + case 'a': + amt = atoi(optarg); + if (amt <= 0) { + fprintf(stderr, "Error: Invalid amount. Amount must be a positive number.\n"); + return 1; + } + break; + case 'h': + print_usage(argv[0]); + return 0; + case '?': + fprintf(stderr, "Unknown option: %c\n", optopt); + return 1; + default: + break; + } + } + + // Check if record file is specified + if (record_file == NULL) { + recording = false; + } else { + recording = true; + + // Check for existing file without force flag + if (access(record_file, F_OK) == 0 && !force_flag) { + fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file); + return 1; + } + + // Open the file for writing (truncate if force flag is set) + fp = fopen(record_file, force_flag ? "w" : "a"); + if (fp == NULL) { + perror("Error opening file"); + return 1; + } + } + + // disable buffering + if (!buffer) { + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + } + unsigned types[] = { SM, ML, RB }; + unsigned num_types = (sizeof((types)) / sizeof((types)[0])); + + /* Setup: add an amt of bits to each container. */ + foreach(types) + { + handles[type] = alloc(type, amt); + for (size_t i = 0; i < amt; i++) { + assert(invoke(type, is_set, i) == false); + assert(mutate(type, set, i) == i); + assert(invoke(type, is_set, i) == true); + } + cast(type, validate); + } + compare(types); + + while (true) { + // the an amount [1, 16] of pages to find preferring smaller sizes + unsigned len = toss(15) + 1; + pgno_t loc[num_types]; + foreach(types) + { + loc[type] = invoke(type, find_span, len); + } + for (unsigned n = 0; n < num_types; n++) { + foreach(types) + { + assert(invoke(type, is_span, loc[n], len)); + } + } + foreach(types) + { + cast(type, validate); + } + + unsigned which_loc = (unsigned)xorshift32() % num_types; + foreach(types) + { + assert(mutate(type, take_span, loc[which_loc], len)); + cast(type, validate); + } + compare(types); + } + + return 0; +} + +#if 0 /* * A "soak test" that tries to replicate behavior in LMDB for page allocation. */ @@ -751,7 +853,7 @@ main(void) int retry = 1; unsigned i = 0; pgno_t pgno = 0, *mop = list; - unsigned n2 = n, mop_len = mop[0]; + unsigned n2 = len, mop_len = mop[0]; if (mop_len > n2) { i = mop_len; do { @@ -1008,3 +1110,4 @@ main(void) return 0; } +#endif -- 2.45.2 From e9e64041150c914c2e88140c860fe91bfe884af5 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 15:52:42 -0400 Subject: [PATCH 02/10] WIP --- src/sparsemap.c | 10 +-- tests/soak.c | 232 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 201 insertions(+), 41 deletions(-) diff --git a/src/sparsemap.c b/src/sparsemap.c index 69ec576..da9ad78 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -730,15 +730,15 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[], continue; } size_t n = 0; - for (size_t b = skip; b < SM_BITS_PER_VECTOR; b++) { - buffer[n++] = start + b; + for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) { + buffer[n++] = start + ret + b; } scanner(&buffer[0], n, aux); ret += n; skip = 0; } else { for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) { - buffer[b] = start + b; + buffer[b] = start + ret + b; } scanner(&buffer[0], SM_BITS_PER_VECTOR, aux); ret += SM_BITS_PER_VECTOR; @@ -758,14 +758,14 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[], continue; } if (w & ((sm_bitvec_t)1 << b)) { - buffer[n++] = start + b; + buffer[n++] = start + ret + b; ret++; } } } else { for (int b = 0; b < SM_BITS_PER_VECTOR; b++) { if (w & ((sm_bitvec_t)1 << b)) { - buffer[n++] = start + b; + buffer[n++] = start + ret + b; } } ret += n; diff --git a/tests/soak.c b/tests/soak.c index 044997c..c04072b 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -52,7 +52,7 @@ toss(size_t max) bool recording = true; -void +static void record_set_mutation(FILE *out, pgno_t pg) { if (recording) { @@ -60,7 +60,7 @@ record_set_mutation(FILE *out, pgno_t pg) } } -void +static void record_clear_mutation(FILE *out, pgno_t pg) { if (recording) { @@ -68,7 +68,7 @@ record_clear_mutation(FILE *out, pgno_t pg) } } -void +static void record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) { if (recording) { @@ -76,7 +76,7 @@ record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) } } -void +static void record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) { if (recording) { @@ -84,28 +84,28 @@ record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) } } -/* sparsemap ------------------------------------------------------------- */ - -sparsemap_idx_t -_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) +static void +__scan_record_offsets(sm_idx_t v[], size_t n, void *aux) { - do { - int retval = sparsemap_merge(*map, other); - if (retval != 0) { - if (errno == ENOSPC) { - size_t new_size = retval + (64 - (retval % 64)) + 64; - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return retval; - } - } while (true); + FILE *out = (FILE *)aux; + for (size_t i = 0; i < n; i++) { + fprintf(out, "%u ", v[i]); + } } +static void +record_merge_mutation(FILE *out, void *handle) +{ + if (recording) { + sparsemap_t *map = (sparsemap_t *)handle; + fprintf(out, "merge %zu ", sparsemap_get_ending_offset(map)); + sparsemap_scan(map, __scan_record_offsets, 0, (void *)out); + fprintf(out, "\n"); + } +} + +/* sparsemap ------------------------------------------------------------- */ + static sparsemap_idx_t _sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) { @@ -228,6 +228,43 @@ __sm_is_first(void *handle, pgno_t pg, unsigned len) return false; } +static bool +__sm_merge(void **handle, void *other_handle) +{ + sparsemap_t **map = (sparsemap_t **)handle; + sparsemap_t *other = (sparsemap_t *)other_handle; + do { + int retval = sparsemap_merge(*map, other); + if (retval != 0) { + if (errno == ENOSPC) { + size_t new_size = retval + (64 - (retval % 64)) + 64; + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + break; + } + } while (true); + return true; +} + +static size_t +__sm_size(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_get_size(map); +} + +static size_t +__sm_count(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true); +} + /* midl ------------------------------------------------------------------ */ static void * @@ -249,7 +286,7 @@ __midl_set(void **handle, pgno_t pg) { MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { - mdb_midl_need(&list, list[-1] + 1); + mdb_midl_need(_list, list[-1] + 1); } mdb_midl_insert(list, pg); return pg; @@ -308,7 +345,6 @@ static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; - int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; unsigned j, num = len; pgno_t *mop = list; @@ -320,17 +356,16 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len) mop[++j] = mop[++i]; /* Set all unused values in the array to 0 for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - */ + mop[j] = 0; */ return true; } static bool __midl_release_span(void **handle, pgno_t pg, unsigned len) { - MDB_IDL list = (MDB_IDL)handle; + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { - mdb_midl_need(&list, list[-1] + len); + mdb_midl_need(_list, list[-1] + len); } for (size_t i = pg; i < pg + len; i++) { mdb_midl_insert(list, i); @@ -367,6 +402,30 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len) return true; } +static bool +__midl_merge(void **handle, void *other_handle) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + MDB_IDL other = (MDB_IDL)other_handle; + mdb_midl_append_list(_list, other); + mdb_midl_sort(list); + return true; +} + +static size_t +__midl_size(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + return list[0] * sizeof(pgno_t); +} + +static size_t +__midl_count(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + return list[0]; +} + static bool __midl_validate(void *handle) { @@ -476,6 +535,29 @@ __roar_is_empty(void *handle, pgno_t pg, unsigned len) return true; } +static bool +__roar_merge(void **handle, void *other_handle) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_t *other = (roaring_bitmap_t *)other_handle; + roaring_bitmap_or_inplace(rbm, other); + return true; +} + +static size_t +__roar_size(void *handle) +{ + // TODO + return 0; +} + +static size_t +__roar_count(void *handle) +{ + // TODO + return 0; +} + static bool __roar_validate(void *handle) { @@ -501,6 +583,9 @@ typedef struct container { bool (*is_span)(void *handle, pgno_t pg, unsigned len); bool (*is_empty)(void *handle, pgno_t pg, unsigned len); bool (*is_first)(void *handle, pgno_t pg, unsigned len); + bool (*merge)(void **handle, void *other_handle); + size_t (*size)(void *handle); + size_t (*count)(void *handle); bool (*validate)(void *handle); } container_t; @@ -518,6 +603,9 @@ container_t containers[] = { .is_span = __sm_is_span, .is_empty = __sm_is_empty, .is_first = __sm_is_first, + .merge = __sm_merge, + .size = __sm_size, + .count = __sm_count, .validate = NULL }, { "midl", @@ -532,6 +620,9 @@ container_t containers[] = { .is_span = __midl_is_span, .is_empty = __midl_is_empty, .is_first = NULL, + .merge = __midl_merge, + .size = __midl_size, + .count = __midl_count, .validate = __midl_validate }, { "roaring", @@ -546,12 +637,16 @@ container_t containers[] = { .is_span = __roar_is_span, .is_empty = __roar_is_empty, .is_first = NULL, + .merge = __roar_merge, + .size = __roar_size, + .count = __roar_count, .validate = __roar_validate, }, }; // clang-format on -void *handles[3]; +void *handles[(sizeof((containers)) / sizeof((containers)[0]))]; +void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))]; FILE *fp; #define alloc(type, size) containers[type].alloc(size); @@ -559,7 +654,7 @@ FILE *fp; if (containers[type].fn) \ containers[type].fn(handles[type], ##__VA_ARGS__) #define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) -#define mutate(type, fn, ...) record_##fn##_mutation(fp, __VA_ARGS__), containers[type].fn(&handles[type], __VA_ARGS__) +#define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) #define compare(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ @@ -685,7 +780,7 @@ main(int argc, char *argv[]) int opt; const char *record_file = NULL; int force_flag = 0; - size_t amt = INITIAL_AMOUNT; + size_t left, amt = INITIAL_AMOUNT; bool buffer = true; fp = stdout; @@ -721,10 +816,8 @@ main(int argc, char *argv[]) // Check if record file is specified if (record_file == NULL) { - recording = false; + recording = true; // TODO false } else { - recording = true; - // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file); @@ -742,7 +835,7 @@ main(int argc, char *argv[]) // disable buffering if (!buffer) { setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); + setvbuf(fp, NULL, _IONBF, 0); } unsigned types[] = { SM, ML, RB }; unsigned num_types = (sizeof((types)) / sizeof((types)[0])); @@ -759,6 +852,7 @@ main(int argc, char *argv[]) cast(type, validate); } compare(types); + left = amt; while (true) { // the an amount [1, 16] of pages to find preferring smaller sizes @@ -786,6 +880,72 @@ main(int argc, char *argv[]) cast(type, validate); } compare(types); + left -= len; + + // Once we've used 1/10th of the free list, let's replenish it a bit. + if (amt - left > amt / 10) { + do { + pgno_t pgno; + size_t len, retries = amt; + // Find a hole in the map to replenish. + do { + len = toss(15) + 1; + pgno = sparsemap_span(handles[SM], 0, len, false); + } while (SPARSEMAP_NOT_FOUND(pgno) && --retries); + if (retries == 0) { + goto larger_please; + } + if (SPARSEMAP_FOUND(pgno)) { + foreach(types) + { + assert(invoke(type, is_empty, pgno, len)); + } + compare(types); + foreach(types) + { + assert(invoke(type, is_span, pgno, len) == false); + assert(mutate(type, release_span, pgno, len)); + assert(invoke(type, is_span, pgno, len) == true); + cast(type, validate); + } + compare(types); + left += len; + } + } while (amt - left > amt / 100); + } + + if (toss(1000) == 0) { + size_t new_amt; + pgno_t max; + larger_please: + new_amt = 1024 + (xorshift32() % 2048) + toss(1024); + max = sparsemap_get_ending_offset(handles[SM]); + + // Build a new container to merge with the existing one. + foreach(types) + { + new_handles[type] = alloc(type, new_amt); + for (size_t i = 0; i < new_amt; i++) { + // We don't want to record and we're using new_handles not + // handles, so call fn directly. + assert(containers[type].is_set(new_handles[type], i) == false); + containers[type].set(&new_handles[type], i); + assert(containers[type].is_set(new_handles[type], i) == true); + } + } + foreach(types) + { + assert(mutate(type, merge, new_handles[type])); + cast(type, validate); + } + compare(types); + left += new_amt; + amt += new_amt; + foreach(types) + { + containers[type].free(new_handles[type]); + } + } } return 0; -- 2.45.2 From 5bd3872153da3e8b33ab8df7f741845c8f3a03f3 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 16:45:22 -0400 Subject: [PATCH 03/10] WIP --- tests/soak.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index c04072b..dc83059 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -128,7 +128,9 @@ _sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) static void * __sm_alloc(size_t capacity) { - return (void *)sparsemap(capacity); + sparsemap_t *map = sparsemap(capacity); + assert(map != NULL); + return map; } static void @@ -271,6 +273,7 @@ static void * __midl_alloc(size_t capacity) { MDB_IDL list = mdb_midl_alloc(capacity); + assert(list != NULL); return (void *)list; } @@ -287,6 +290,7 @@ __midl_set(void **handle, pgno_t pg) MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { mdb_midl_need(_list, list[-1] + 1); + list = *_list; } mdb_midl_insert(list, pg); return pg; @@ -303,7 +307,7 @@ __midl_is_set(void *handle, pgno_t pg) static pgno_t __midl_clear(void **handle, pgno_t pg) { - MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + MDB_IDL list = (MDB_IDL *)*handle; unsigned len = list[0]; list[0] = len -= 1; for (unsigned j = pg - 1; j < len;) @@ -344,7 +348,7 @@ search_done:; static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { - MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + MDB_IDL list = (MDB_IDL *)*handle; int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; unsigned j, num = len; pgno_t *mop = list; @@ -366,6 +370,7 @@ __midl_release_span(void **handle, pgno_t pg, unsigned len) MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { mdb_midl_need(_list, list[-1] + len); + list = *_list; } for (size_t i = pg; i < pg + len; i++) { mdb_midl_insert(list, i); @@ -407,7 +412,12 @@ __midl_merge(void **handle, void *other_handle) { MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; MDB_IDL other = (MDB_IDL)other_handle; + if (list[0] + other[0] >= list[-1]) { + mdb_midl_need(_list, list[-1] + other[0]); + list = *_list; + } mdb_midl_append_list(_list, other); + list = *_list; mdb_midl_sort(list); return true; } @@ -444,7 +454,9 @@ __midl_validate(void *handle) static void * __roar_alloc(size_t capacity) { - return roaring_bitmap_create(); + roaring_bitmap_t *map = roaring_bitmap_create(); + assert(map != NULL); + return map; } static void @@ -497,7 +509,6 @@ __roar_take_span(void **handle, pgno_t pg, unsigned len) { roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; roaring_bitmap_remove_range(rbm, pg, pg + len); - roaring_bitmap_run_optimize(rbm); return true; } -- 2.45.2 From b2a2f0865e9edd102162b7004c18213c69e70df3 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 21:35:45 -0400 Subject: [PATCH 04/10] WIP --- tests/soak.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index dc83059..ba68c63 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -184,7 +184,7 @@ static bool __sm_release_span(void **handle, pgno_t pg, unsigned len) { sparsemap_t **map = (sparsemap_t **)handle; - for (pgno_t i = pg; i <= len; i++) { + for (pgno_t i = pg; i < pg + len; i++) { assert(_sparsemap_set(map, i, true) == i); } return true; @@ -307,7 +307,7 @@ __midl_is_set(void *handle, pgno_t pg) static pgno_t __midl_clear(void **handle, pgno_t pg) { - MDB_IDL list = (MDB_IDL *)*handle; + MDB_IDL list = *(MDB_IDL *)handle; unsigned len = list[0]; list[0] = len -= 1; for (unsigned j = pg - 1; j < len;) @@ -348,8 +348,8 @@ search_done:; static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { - MDB_IDL list = (MDB_IDL *)*handle; - int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; + MDB_IDL list = *(MDB_IDL *)handle; + int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg); unsigned j, num = len; pgno_t *mop = list; unsigned mop_len = mop[0]; @@ -827,7 +827,7 @@ main(int argc, char *argv[]) // Check if record file is specified if (record_file == NULL) { - recording = true; // TODO false + recording = true;//TODO } else { // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { @@ -925,11 +925,12 @@ main(int argc, char *argv[]) } while (amt - left > amt / 100); } - if (toss(1000) == 0) { - size_t new_amt; + if (toss(1000) > 800) { + size_t new_offset, new_amt; pgno_t max; larger_please: new_amt = 1024 + (xorshift32() % 2048) + toss(1024); + new_offset = xorshift32() % 4096 + 1024; max = sparsemap_get_ending_offset(handles[SM]); // Build a new container to merge with the existing one. @@ -939,9 +940,9 @@ main(int argc, char *argv[]) for (size_t i = 0; i < new_amt; i++) { // We don't want to record and we're using new_handles not // handles, so call fn directly. - assert(containers[type].is_set(new_handles[type], i) == false); - containers[type].set(&new_handles[type], i); - assert(containers[type].is_set(new_handles[type], i) == true); + assert(containers[type].is_set(new_handles[type], i + new_offset) == false); + containers[type].set(&new_handles[type], i + new_offset); + assert(containers[type].is_set(new_handles[type], i + new_offset) == true); } } foreach(types) -- 2.45.2 From 1b7fafa0e168c1f1d2ab4a91b51b09d8e3eb5b50 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 21:44:20 -0400 Subject: [PATCH 05/10] WIP --- tests/midl.c | 417 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/soak.c | 328 +--------------------------------------- 2 files changed, 420 insertions(+), 325 deletions(-) create mode 100644 tests/midl.c diff --git a/tests/midl.c b/tests/midl.c new file mode 100644 index 0000000..6b4993d --- /dev/null +++ b/tests/midl.c @@ -0,0 +1,417 @@ +/** @defgroup idls ID List Management + * @{ + */ +/** A generic unsigned ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + +/** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN) +#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1)) + +#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1) +#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1) + +#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID)) +#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0) +#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src))) +#define MDB_IDL_FIRST(ids) ((ids)[1]) +#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]]) + +/** Current max length of an #mdb_midl_alloc()ed IDL */ +#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1]) + +/** Append ID to IDL. The IDL must be big enough. */ +#define mdb_midl_xappend(idl, id) \ + do { \ + MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ + xidl[xlen] = (id); \ + } while (0) + +/** Search for an ID in an IDL. + * @param[in] ids The IDL to search. + * @param[in] id The ID to search for. + * @return The index of the first ID greater than or equal to \b id. + */ +unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id); + +/** Allocate an IDL. + * Allocates memory for an IDL of the given size. + * @return IDL on success, NULL on failure. + */ +MDB_IDL mdb_midl_alloc(int num); + +/** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(MDB_IDL ids); + +/** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + */ +void mdb_midl_shrink(MDB_IDL *idp); + +/** Shrink an IDL to a specific size. + * Resize the IDL to \b size if it is larger. + * @param[in,out] idp Address of the IDL to shrink. + * @param[in] size Capacity to have once resized. + */ +void mdb_midl_shrink(MDB_IDL *idp); + +/** Make room for num additional elements in an IDL. + * @param[in,out] idp Address of the IDL. + * @param[in] num Number of elements to make room for. + * @return 0 on success, ENOMEM on failure. + */ +int mdb_midl_need(MDB_IDL *idp, unsigned num); + +/** Append an ID onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The ID to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append(MDB_IDL *idp, MDB_ID id); + +/** Append an IDL onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] app The IDL to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app); + +/** Append an ID range onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The lowest ID to append. + * @param[in] n Number of IDs to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n); + +/** Merge an IDL onto an IDL. The destination IDL must be big enough. + * @param[in] idl The IDL to merge into. + * @param[in] merge The IDL to merge. + */ +void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge); + +/** Sort an IDL. + * @param[in,out] ids The IDL to sort. + */ +void mdb_midl_sort(MDB_IDL ids); + +/* midl.c ------------------------------------------------------------------ */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y)) + +unsigned +mdb_midl_search(MDB_IDL ids, MDB_ID id) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while (0 < n) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP(ids[cursor], id); + + if (val < 0) { + n = pivot; + + } else if (val > 0) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if (val > 0) { + ++cursor; + } + return cursor; +} + +int +mdb_midl_insert(MDB_IDL ids, MDB_ID id) +{ + unsigned x, i; + + x = mdb_midl_search(ids, id); + assert(x > 0); + + if (x < 1) { + /* internal error */ + return -2; + } + + if (x <= ids[0] && ids[x] == id) { + /* duplicate */ + assert(0); + return -1; + } + + if (++ids[0] >= MDB_IDL_DB_MAX) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i = ids[0]; i > x; i--) + ids[i] = ids[i - 1]; + ids[x] = id; + } + + return 0; +} + +inline void +mdb_midl_pop_n(MDB_IDL ids, unsigned n) +{ + ids[0] = ids[0] - n; +} + +void +mdb_midl_remove_at(MDB_IDL ids, unsigned idx) +{ + for (int i = idx - 1; idx < ids[0] - 1;) + ids[++i] = ids[++idx]; + ids[0] = ids[0] - 1; +} + +void +mdb_midl_remove(MDB_IDL ids, MDB_ID id) +{ + unsigned idx = mdb_midl_search(ids, id); + if (idx <= ids[0] && ids[idx] == id) + mdb_midl_remove_at(ids, idx); +} + +MDB_IDL +mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void +mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids - 1); +} + +void +mdb_midl_shrink(MDB_IDL *idp) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + } +} + +void +mdb_midl_shrink_to(MDB_IDL *idp, size_t size) +{ + MDB_IDL ids = *idp; + if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) { + *ids++ = size; + *idp = ids; + *idp[0] = *idp[0] > size ? size : *idp[0]; + } +} + +static int +mdb_midl_grow(MDB_IDL *idp, int num) +{ + MDB_IDL idn = *idp - 1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int +mdb_midl_need(MDB_IDL *idp, unsigned num) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num / 4 + (256 + 2)) & -256; + if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num - 2; + *idp = ids; + } + return 0; +} + +int +mdb_midl_append(MDB_IDL *idp, MDB_ID id) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int +mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int +mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +void +mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge) +{ + MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; + idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ + old_id = idl[j]; + while (i) { + merge_id = merge[i--]; + for (; old_id < merge_id; old_id = idl[--j]) + idl[k--] = old_id; + idl[k--] = merge_id; + } + idl[0] = total; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define MIDL_SWAP(a, b) \ + { \ + itmp = (a); \ + (a) = (b); \ + (b) = itmp; \ + } + +void +mdb_midl_sort(MDB_IDL ids) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int) * CHAR_BIT * 2]; + int i, j, k, l, ir, jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for (;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j = l + 1; j <= ir; j++) { + a = ids[j]; + for (i = j - 1; i >= 1; i--) { + if (ids[i] >= a) + break; + ids[i + 1] = ids[i]; + } + ids[i + 1] = a; + } + if (jstack == 0) + break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + MIDL_SWAP(ids[k], ids[l + 1]); + if (ids[l] < ids[ir]) { + MIDL_SWAP(ids[l], ids[ir]); + } + if (ids[l + 1] < ids[ir]) { + MIDL_SWAP(ids[l + 1], ids[ir]); + } + if (ids[l] < ids[l + 1]) { + MIDL_SWAP(ids[l], ids[l + 1]); + } + i = l + 1; + j = ir; + a = ids[l + 1]; + for (;;) { + do + i++; + while (ids[i] > a); + do + j--; + while (ids[j] < a); + if (j < i) + break; + MIDL_SWAP(ids[i], ids[j]); + } + ids[l + 1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir - i + 1 >= j - l) { + istack[jstack] = ir; + istack[jstack - 1] = i; + ir = j - 1; + } else { + istack[jstack] = j - 1; + istack[jstack - 1] = l; + l = i; + } + } + } +} diff --git a/tests/soak.c b/tests/soak.c index ba68c63..c0fab52 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -827,7 +827,7 @@ main(int argc, char *argv[]) // Check if record file is specified if (record_file == NULL) { - recording = true;//TODO + recording = false; } else { // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { @@ -925,13 +925,12 @@ main(int argc, char *argv[]) } while (amt - left > amt / 100); } - if (toss(1000) > 800) { + if (toss(10) > 8) { size_t new_offset, new_amt; pgno_t max; larger_please: new_amt = 1024 + (xorshift32() % 2048) + toss(1024); - new_offset = xorshift32() % 4096 + 1024; - max = sparsemap_get_ending_offset(handles[SM]); + new_offset = sparsemap_get_ending_offset(handles[SM]); // Build a new container to merge with the existing one. foreach(types) @@ -962,324 +961,3 @@ main(int argc, char *argv[]) return 0; } - -#if 0 -/* - * A "soak test" that tries to replicate behavior in LMDB for page allocation. - */ -int -main(void) -{ - size_t replenish = 0, iterations = 0; - - // disable buffering -#ifdef DEBUG - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); -#endif - - l_span_loc = td_new(100); - b_span_loc = td_new(100); - l_span_take = td_new(100); - b_span_take = td_new(100); - l_span_merge = td_new(100); - b_span_merge = td_new(100); - - stats_header(); - - sparsemap_idx_t amt = INITIAL_AMOUNT; - MDB_IDL list = mdb_midl_alloc(amt); - sparsemap_t *map = sparsemap(INITIAL_AMOUNT); - roaring_bitmap_t *rbm = roaring_bitmap_create(); - - // start with 2GiB of 4KiB free pages to track: - // - MDB_IDL requires one int for each free page - // - Sparsemap will compress the set bits using less memory - mdb_midl_need(&list, amt); - for (sparsemap_idx_t pg = 0; pg < amt; pg++) { - // We list every free (unallocated) page in the IDL, while... - mdb_midl_xappend(list, pg); - // ... true (unset in the bitmap) indicates free in the bitmap, ... - assert(_sparsemap_set(&map, pg, true) == pg); - assert(roaring_bitmap_add_checked(rbm, pg)); - } - mdb_midl_sort(list); - roaring_bitmap_run_optimize(rbm); - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - - double b, e; - while (1) { - unsigned mi; - pgno_t ml, sl, rl; - - // get an amount [1, 16] of pages to find preferring smaller sizes - unsigned n = toss(15) + 1; - - // find a set of pages using the MDB_IDL - { - b = nsts(); - /* Seek a big enough contiguous page range. Prefer - * pages at the tail, just truncating the list. - */ - int retry = 1; - unsigned i = 0; - pgno_t pgno = 0, *mop = list; - unsigned n2 = len, mop_len = mop[0]; - if (mop_len > n2) { - i = mop_len; - do { - pgno = mop[i]; - if (mop[i - n2] == pgno + n2) - goto search_done; - } while (--i > n2); - if (--retry < 0) - break; - } - search_done:; - ml = pgno; - mi = i; - e = nsts(); - td_add(l_span_loc, e - b, 1); - } - assert(verify_span_midl(list, ml, n)); - assert(verify_span_sparsemap(map, ml, n)); - assert(verify_span_roaring(rbm, ml, n)); - - // find a set of pages using the Sparsemap - { - b = nsts(); - pgno_t pgno = sparsemap_span(map, 0, n, true); - assert(SPARSEMAP_NOT_FOUND(pgno) == false); - sl = pgno; - e = nsts(); - td_add(b_span_loc, e - b, 1); - assert(verify_sm_is_first_available_span(map, pgno, n, true)); - } - assert(verify_span_midl(list, sl, n)); - assert(verify_span_sparsemap(map, sl, n)); - assert(verify_span_roaring(rbm, sl, n)); - - // find a set of pages using the Roaring Bitmap - { - b = nsts(); - uint64_t max = roaring_bitmap_maximum(rbm); - uint64_t offset = roaring_bitmap_minimum(rbm); - do { - if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) { - break; - } - offset++; - } while (offset <= max); - rl = offset; - e = nsts(); - } - /* - if (rl != sl) { - assert(verify_span_midl(list, rl, n)); - assert(verify_span_sparsemap(map, rl, n)); - assert(verify_span_roaring(rbm, rl, n)); - } - */ - assert(rl == sl); - - bool prefer_mdb_idl_loc = (bool)xorshift32() % 2; - - // acquire the set of pages within the list - if (prefer_mdb_idl_loc) { - b = nsts(); - unsigned j, num = n; - int i = mi; - pgno_t *mop = list; - unsigned mop_len = mop[0]; - - mop[0] = mop_len -= num; - /* Move any stragglers down */ - for (j = i - num; j < mop_len;) - mop[++j] = mop[++i]; - e = nsts(); - for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - td_add(l_span_take, e - b, 1); - } else { - b = nsts(); - unsigned j, num = n; - int i = mdb_midl_search(list, sl) + num; - pgno_t *mop = list; - unsigned mop_len = mop[0]; - - mop[0] = mop_len -= num; - /* Move any stragglers down */ - for (j = i - num; j < mop_len;) - mop[++j] = mop[++i]; - e = nsts(); - for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - td_add(l_span_take, e - b, 1); - } - - // acquire the set of pages within the sparsemap - if (prefer_mdb_idl_loc) { - b = nsts(); - for (pgno_t i = ml; i < ml + n; i++) { - assert(_sparsemap_set(&map, i, false) == i); - } - e = nsts(); - td_add(b_span_take, e - b, 1); - } else { - b = nsts(); - for (pgno_t i = sl; i <= sl + n; i++) { - assert(_sparsemap_set(&map, i, false) == i); - } - e = nsts(); - td_add(b_span_take, e - b, 1); - } - - // acquire the set of pages within the roaring bitmap - if (prefer_mdb_idl_loc) { - b = nsts(); - roaring_bitmap_remove_range(rbm, ml, ml + n); - e = nsts(); - } else { - b = nsts(); - roaring_bitmap_remove_range(rbm, sl, sl + n); - e = nsts(); - } - roaring_bitmap_run_optimize(rbm); - - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - - // Once we've used a tenth of the free list, let's replenish it a bit. - if (list[0] < amt / 10) { - do { - pgno_t pgno; - size_t len, retries = amt; - do { - len = toss(15) + 1; - pgno = sparsemap_span(map, 0, len, false); - assert(verify_sm_is_first_available_span(map, pgno, n, false)); - //__diag("%zu\t%zu,%zu\n", iterations, replenish, retries); - } while (SPARSEMAP_NOT_FOUND(pgno) && --retries); - if (retries == 0) { - goto larger_please; - } - if (SPARSEMAP_FOUND(pgno)) { - assert(verify_empty_midl(list, pgno, len)); - assert(verify_empty_sparsemap(map, pgno, len)); - assert(verify_empty_roaring(rbm, pgno, len)); - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - if (list[-1] - list[0] < len) { - mdb_midl_need(&list, list[-1] + len); - } - for (size_t i = pgno; i < pgno + len; i++) { - assert(verify_midl_contains(list, i) == false); - assert(sparsemap_is_set(map, i) == false); - mdb_midl_insert(list, i); - assert(verify_midl_contains(list, i) == true); - assert(_sparsemap_set(&map, i, true) == i); - assert(sparsemap_is_set(map, i) == true); - assert(roaring_bitmap_add_checked(rbm, i) == true); - } - mdb_midl_sort(list); - assert(verify_midl_nodups(list)); - assert(verify_span_midl(list, pgno, len)); - assert(verify_span_sparsemap(map, pgno, len)); - assert(verify_span_roaring(rbm, pgno, len)); - } - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - replenish++; - } while (list[0] < amt - 32); - } - replenish = 0; - - // every so often, either ... - if (iterations % 1000 == 0) { - larger_please:; - size_t COUNT = xorshift32() % 3586 + 513; - // ... add some amount of 4KiB pages, or - size_t len = COUNT; - // The largest page is at list[1] because this is a reverse sorted list. - pgno_t pg = list[0] ? list[1] + 1 : 0; - if (true) { // disable shrinking for now... (toss(6) + 1 < 7) - MDB_IDL new_list = mdb_midl_alloc(len); - sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT); - roaring_bitmap_t *new_rbm = roaring_bitmap_create(); - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - new_list[i + 1] = gp; - new_list[0]++; - assert(verify_midl_contains(new_list, gp) == true); - assert(_sparsemap_set(&new_map, gp, true) == gp); - assert(sparsemap_is_set(new_map, gp)); - assert(roaring_bitmap_add_checked(new_rbm, gp)); - assert(roaring_bitmap_contains(new_rbm, gp)); - } - assert(verify_sm_eq_ml(new_map, new_list)); - assert(verify_sm_eq_rm(new_map, new_rbm)); - { - b = nsts(); - mdb_midl_append_list(&list, new_list); - mdb_midl_sort(list); - e = nsts(); - td_add(l_span_merge, e - b, 1); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(verify_midl_contains(list, gp) == true); - } - { - b = nsts(); - _sparsemap_merge(&map, new_map); - e = nsts(); - td_add(b_span_merge, e - b, 1); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(sparsemap_is_set(map, gp)); - } - free(new_map); - { - b = nsts(); - roaring_bitmap_or_inplace(rbm, new_rbm); - e = nsts(); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(roaring_bitmap_contains(rbm, gp)); - } - roaring_free(new_rbm); - } else { - if (list[-1] > INITIAL_AMOUNT) { - // ... a fraction of the time, remove COUNT / 2 of 4KiB pages. - { - pgno_t pg; - for (size_t i = 0; i < COUNT; i++) { - pg = list[list[0] - i]; - assert(sparsemap_is_set(map, pg) == true); - assert(_sparsemap_set(&map, pg, false) == pg); - } - } - { - roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]); - } - { - mdb_midl_shrink_to(&list, list[0] - COUNT); - } - assert(list[list[0]] != pg); - assert(verify_midl_nodups(list)); - verify_sm_eq_ml(map, list); - verify_sm_eq_rm(map, rbm); - } - } - } - stats(iterations, map, list); - // printf("\033[K%zu\r", iterations); - iterations++; - } - - return 0; -} -#endif -- 2.45.2 From 3b4106743bd0507a413d6989876b8a6020561dc1 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 20 May 2024 11:05:46 -0400 Subject: [PATCH 06/10] WIP --- tests/soak.c | 338 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 271 insertions(+), 67 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index c0fab52..058a5af 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -16,6 +16,9 @@ typedef size_t pgno_t; +#define INITIAL_AMOUNT 1024 * 2 +bool recording = true; + char * bytes_as(double bytes, char *s, size_t size) { @@ -50,7 +53,152 @@ toss(size_t max) return level; } -bool recording = true; +static size_t +b64_encoded_size(size_t inlen) +{ + size_t ret; + + ret = inlen; + if (inlen % 3 != 0) + ret += 3 - (inlen % 3); + ret /= 3; + ret *= 4; + + return ret; +} + +static const char b64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static char * +b64_encode(const unsigned char *in, size_t len) +{ + char *out; + size_t elen; + size_t i; + size_t j; + size_t v; + + if (in == NULL || len == 0) + return NULL; + + elen = b64_encoded_size(len); + out = malloc(elen + 1); + out[elen] = '\0'; + + for (i = 0, j = 0; i < len; i += 3, j += 4) { + v = in[i]; + v = i + 1 < len ? v << 8 | in[i + 1] : v << 8; + v = i + 2 < len ? v << 8 | in[i + 2] : v << 8; + + out[j] = b64chars[(v >> 18) & 0x3F]; + out[j + 1] = b64chars[(v >> 12) & 0x3F]; + if (i + 1 < len) { + out[j + 2] = b64chars[(v >> 6) & 0x3F]; + } else { + out[j + 2] = '='; + } + if (i + 2 < len) { + out[j + 3] = b64chars[v & 0x3F]; + } else { + out[j + 3] = '='; + } + } + + return out; +} + +static size_t +b64_decoded_size(const char *in) +{ + size_t len; + size_t ret; + size_t i; + + if (in == NULL) + return 0; + + len = strlen(in); + ret = len / 4 * 3; + + for (i = len; i-- > 0;) { + if (in[i] == '=') { + ret--; + } else { + break; + } + } + + return ret; +} + +#if 0 +static void +b64_generate_decode_table() +{ + int inv[80]; + size_t i; + + memset(inv, -1, sizeof(inv)); + for (i = 0; i < sizeof(b64chars) - 1; i++) { + inv[b64chars[i] - 43] = i; + } +} +#endif + +static int b64invs[] = { 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51 }; + +static int +b64_isvalidchar(char c) +{ + if (c >= '0' && c <= '9') + return 1; + if (c >= 'A' && c <= 'Z') + return 1; + if (c >= 'a' && c <= 'z') + return 1; + if (c == '+' || c == '/' || c == '=') + return 1; + return 0; +} + +static int +b64_decode(const char *in, unsigned char *out, size_t outlen) +{ + size_t len; + size_t i; + size_t j; + int v; + + if (in == NULL || out == NULL) + return 0; + + len = strlen(in); + if (outlen < b64_decoded_size(in) || len % 4 != 0) + return 0; + + for (i = 0; i < len; i++) { + if (!b64_isvalidchar(in[i])) { + return 0; + } + } + + for (i = 0, j = 0; i < len; i += 4, j += 3) { + v = b64invs[in[i] - 43]; + v = (v << 6) | b64invs[in[i + 1] - 43]; + v = in[i + 2] == '=' ? v << 6 : (v << 6) | b64invs[in[i + 2] - 43]; + v = in[i + 3] == '=' ? v << 6 : (v << 6) | b64invs[in[i + 3] - 43]; + + out[j] = (v >> 16) & 0xFF; + if (in[i + 2] != '=') + out[j + 1] = (v >> 8) & 0xFF; + if (in[i + 3] != '=') + out[j + 2] = v & 0xFF; + } + + return 1; +} static void record_set_mutation(FILE *out, pgno_t pg) @@ -104,20 +252,38 @@ record_merge_mutation(FILE *out, void *handle) } } +static void +record_checkpoint(FILE *out, void *handle) +{ + if (recording) { + sparsemap_t *map = (sparsemap_t *)handle; + size_t capacity = sparsemap_get_capacity(map); + size_t buffer_size = sparsemap_get_size(map); + size_t encoded_size = b64_encoded_size(buffer_size); + char *encoded = b64_encode(sparsemap_get_data(map), buffer_size); + fprintf(out, "checkpoint %zu %zu %zu ", capacity, buffer_size, encoded_size); + fprintf(out, "%s", encoded); + fprintf(out, "\n"); + } +} + /* sparsemap ------------------------------------------------------------- */ static sparsemap_idx_t -_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +_sparsemap_set(sparsemap_t **_map, sparsemap_idx_t idx, bool value) { + sparsemap_t *map = *_map, *new_map = NULL; do { - sparsemap_idx_t l = sparsemap_set(*map, idx, value); + sparsemap_idx_t l = sparsemap_set(map, idx, value); if (l != idx) { if (errno == ENOSPC) { - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); - assert(*map != NULL); + size_t capacity = sparsemap_get_capacity(map) + 64; + new_map = sparsemap_set_data_size(map, NULL, capacity); + assert(new_map != NULL); errno = 0; + *_map = new_map; } else { - assert(false); + perror("Unable to grow sparsemap"); } } else { return l; @@ -166,8 +332,7 @@ __sm_find_span(void *handle, unsigned len) { sparsemap_t *map = (sparsemap_t *)handle; pgno_t pgno = (pgno_t)sparsemap_span(map, 0, len, true); - assert(SPARSEMAP_NOT_FOUND(pgno) == false); - return pgno; + return SPARSEMAP_NOT_FOUND(pgno) ? -1 : pgno; } static bool @@ -269,6 +434,8 @@ __sm_count(void *handle) /* midl ------------------------------------------------------------------ */ +static bool __midl_validate(void *handle); + static void * __midl_alloc(size_t capacity) { @@ -287,12 +454,16 @@ __midl_free(void *handle) static pgno_t __midl_set(void **handle, pgno_t pg) { + assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { - mdb_midl_need(_list, list[-1] + 1); + assert(mdb_midl_need(_list, list[-1] + 1) == 0); list = *_list; } - mdb_midl_insert(list, pg); + mdb_midl_xappend(list, pg); + mdb_midl_sort(list); + //assert(mdb_midl_insert(list, pg) == 0); + assert(__midl_validate(*handle)); return pg; } @@ -307,13 +478,17 @@ __midl_is_set(void *handle, pgno_t pg) static pgno_t __midl_clear(void **handle, pgno_t pg) { + assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; unsigned len = list[0]; list[0] = len -= 1; for (unsigned j = pg - 1; j < len;) list[++j] = list[++pg]; +#ifdef MDB_DEBUG for (unsigned j = len + 1; j <= list[-1]; j++) list[j] = 0; +#endif + assert(__midl_validate(*handle)); return pg; } @@ -323,8 +498,7 @@ __midl_find_span(void *handle, unsigned len) MDB_IDL list = (MDB_IDL)handle; /* Seek a big enough contiguous page range. Prefer - * pages at the tail, just truncating the list. - */ + pages at the tail, just truncating the list. */ int retry = 1; unsigned i = 0; pgno_t pgno = 0, *mop = list; @@ -339,15 +513,18 @@ __midl_find_span(void *handle, unsigned len) } while (--i > n2); if (--retry < 0) break; + } else { + return -1; } } while (1); search_done:; - return pgno; + return retry < 0 ? -1 : pgno; } static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { + assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg); unsigned j, num = len; @@ -358,24 +535,29 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len) /* Move any stragglers down */ for (j = i - num; j < mop_len;) mop[++j] = mop[++i]; - /* Set all unused values in the array to 0 +#ifdef MDB_DEBUG for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; */ + mop[j] = 0; +#endif + assert(__midl_validate(*handle)); return true; } static bool __midl_release_span(void **handle, pgno_t pg, unsigned len) { + assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { - mdb_midl_need(_list, list[-1] + len); + assert(mdb_midl_need(_list, list[-1] + len) == 0); list = *_list; } for (size_t i = pg; i < pg + len; i++) { - mdb_midl_insert(list, i); + mdb_midl_xappend(list, i); + // assert(mdb_midl_insert(list, i) == 0); } mdb_midl_sort(list); + assert(__midl_validate(*handle)); return true; } @@ -410,15 +592,15 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len) static bool __midl_merge(void **handle, void *other_handle) { - MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; - MDB_IDL other = (MDB_IDL)other_handle; + assert(__midl_validate(*handle)); + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list, other = (MDB_IDL)other_handle; if (list[0] + other[0] >= list[-1]) { - mdb_midl_need(_list, list[-1] + other[0]); + assert(mdb_midl_need(_list, list[-1] + other[0]) == 0); list = *_list; } - mdb_midl_append_list(_list, other); - list = *_list; - mdb_midl_sort(list); + mdb_midl_xmerge(list, other_handle); + mdb_midl_sort(*_list); + assert(__midl_validate(*handle)); return true; } @@ -440,11 +622,19 @@ static bool __midl_validate(void *handle) { MDB_IDL list = (MDB_IDL)handle; - pgno_t id = 1; - while (id < list[0]) { - if (list[id] >= list[id + 1]) - return false; - id++; + if (list[0] > list[-1]) { + return false; + } + if (list[0] > 1) { + // check for duplicates + for (pgno_t i = 2; i < list[0]; i++) { + if (list[i] == list[i - 1]) { + return false; + } + // ensure ordering + if (list[i] > list[i - 1]) + return false; + } } return true; } @@ -501,7 +691,7 @@ __roar_find_span(void *handle, unsigned len) } offset++; } while (offset <= max); - return offset; + return offset > max ? -1 : offset; } static bool @@ -526,24 +716,14 @@ static bool __roar_is_span(void *handle, pgno_t pg, unsigned len) { roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; - for (pgno_t i = pg; i < pg + len; i++) { - if (roaring_bitmap_contains(rbm, i) != true) { - return false; - } - } - return true; + return roaring_bitmap_contains_range(rbm, pg, pg + len); } static bool __roar_is_empty(void *handle, pgno_t pg, unsigned len) { roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; - for (pgno_t i = 0; i < len; i++) { - if (roaring_bitmap_contains(rbm, pg + i) != false) { - return false; - } - } - return true; + return !roaring_bitmap_contains_range(rbm, pg, pg + len); } static bool @@ -558,15 +738,13 @@ __roar_merge(void **handle, void *other_handle) static size_t __roar_size(void *handle) { - // TODO - return 0; + return roaring_bitmap_frozen_size_in_bytes((roaring_bitmap_t *)handle); } static size_t __roar_count(void *handle) { - // TODO - return 0; + return roaring_bitmap_get_cardinality((roaring_bitmap_t *)handle); } static bool @@ -583,20 +761,35 @@ typedef enum { SM, ML, RB } container_impl_t; typedef struct container { const char *name; + /* allocate a new container */ void *(*alloc)(size_t capacity); + /* free the container */ void (*free)(void *handle); + /* add pg to the container */ pgno_t (*set)(void **handle, pgno_t pg); + /* is pg in the container */ bool (*is_set)(void *handle, pgno_t pg); + /* remove pg from the container */ pgno_t (*clear)(void **handle, pgno_t pg); + /* find a set of contigious page of len and return the smallest pgno */ pgno_t (*find_span)(void *handle, unsigned len); + /* remove the span [pg, pg + len) from the container */ bool (*take_span)(void **handle, pgno_t pg, unsigned len); + /* add the span [pg, pg + len) into the container */ bool (*release_span)(void **handle, pgno_t pg, unsigned len); + /* are the pgno in the span [pg, pg+ len) in the container? */ bool (*is_span)(void *handle, pgno_t pg, unsigned len); + /* are the pgno in the span [pg, pg+ len) notn in the container? */ bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + /* is the span the first one (brute force check) */ bool (*is_first)(void *handle, pgno_t pg, unsigned len); + /* ensure that all pgno contained in other_handle are also in handle */ bool (*merge)(void **handle, void *other_handle); + /* the bytes size of the container */ size_t (*size)(void *handle); + /* the number of items in the container */ size_t (*count)(void *handle); + /* perform internal validation on the container (optional) */ bool (*validate)(void *handle); } container_t; @@ -667,48 +860,57 @@ FILE *fp; #define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) #define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) -#define compare(set) \ +#define checkpoint(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ verify_eq(0, handles[0], type, handles[type]); \ - } + } \ + record_checkpoint(fp, handles[0]) bool verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm) { + bool ret = true; uint64_t max = roaring_bitmap_maximum(rbm); roaring_uint32_iterator_t iter; roaring_iterator_init(rbm, &iter); for (uint64_t i = 0; i <= max; i++) { if (i == iter.current_value) { - assert(sparsemap_is_set(map, i) == true); + if (sparsemap_is_set(map, i) == false) { + fprintf(stdout, "- %zu ", i); + ret = false; + } roaring_uint32_iterator_advance(&iter); } else { - assert(sparsemap_is_set(map, i) == false); + if (sparsemap_is_set(map, i) == true) { + fprintf(stdout, "+ %zu ", i); + ret = false; + } } } - return true; + return ret; } bool verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) { + bool ret = true; for (MDB_ID i = 1; i <= list[0]; i++) { pgno_t pg = list[i]; unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1; if (skipped) { for (MDB_ID j = list[i - 1]; j > list[i]; j--) { if (sparsemap_is_set(map, pg - j) != false) { - __diag("%zu\n", pg - j); - return false; + fprintf(stdout, "+ %zu ", pg - j); + ret = false; } } } if (sparsemap_is_set(map, pg) != true) { - __diag("%zu\n", pg); - return false; + fprintf(stdout, "- %zu ", pg); + ret = false; } } - return true; + return ret; } bool @@ -769,8 +971,6 @@ stats(size_t iterations, sparsemap_t *map, MDB_IDL list) td_quantile(b_span_merge, .999)); } -#define INITIAL_AMOUNT 1024 * 2 - #define SHORT_OPT "r:fa:bh" #define LONG_OPT "record:,force,amount:,buffer,help" @@ -791,7 +991,7 @@ main(int argc, char *argv[]) int opt; const char *record_file = NULL; int force_flag = 0; - size_t left, amt = INITIAL_AMOUNT; + size_t left, iteration = 0, amt = INITIAL_AMOUNT; bool buffer = true; fp = stdout; @@ -862,16 +1062,20 @@ main(int argc, char *argv[]) } cast(type, validate); } - compare(types); + checkpoint(types); left = amt; while (true) { + iteration++; // the an amount [1, 16] of pages to find preferring smaller sizes unsigned len = toss(15) + 1; pgno_t loc[num_types]; foreach(types) { loc[type] = invoke(type, find_span, len); + if (loc[type] == -1) { + goto larger_please; + } } for (unsigned n = 0; n < num_types; n++) { foreach(types) @@ -890,14 +1094,13 @@ main(int argc, char *argv[]) assert(mutate(type, take_span, loc[which_loc], len)); cast(type, validate); } - compare(types); + checkpoint(types); left -= len; - // Once we've used 1/10th of the free list, let's replenish it a bit. - if (amt - left > amt / 10) { + if (toss(15) > 13) { do { pgno_t pgno; - size_t len, retries = amt; + size_t len, retries = amt / 10; // Find a hole in the map to replenish. do { len = toss(15) + 1; @@ -911,7 +1114,7 @@ main(int argc, char *argv[]) { assert(invoke(type, is_empty, pgno, len)); } - compare(types); + checkpoint(types); foreach(types) { assert(invoke(type, is_span, pgno, len) == false); @@ -919,7 +1122,7 @@ main(int argc, char *argv[]) assert(invoke(type, is_span, pgno, len) == true); cast(type, validate); } - compare(types); + checkpoint(types); left += len; } } while (amt - left > amt / 100); @@ -930,7 +1133,7 @@ main(int argc, char *argv[]) pgno_t max; larger_please: new_amt = 1024 + (xorshift32() % 2048) + toss(1024); - new_offset = sparsemap_get_ending_offset(handles[SM]); + new_offset = sparsemap_get_ending_offset(handles[SM]) + 1; // Build a new container to merge with the existing one. foreach(types) @@ -939,6 +1142,7 @@ main(int argc, char *argv[]) for (size_t i = 0; i < new_amt; i++) { // We don't want to record and we're using new_handles not // handles, so call fn directly. + assert(containers[type].is_set(handles[type], i + new_offset) == false); assert(containers[type].is_set(new_handles[type], i + new_offset) == false); containers[type].set(&new_handles[type], i + new_offset); assert(containers[type].is_set(new_handles[type], i + new_offset) == true); @@ -949,7 +1153,7 @@ main(int argc, char *argv[]) assert(mutate(type, merge, new_handles[type])); cast(type, validate); } - compare(types); + checkpoint(types); left += new_amt; amt += new_amt; foreach(types) -- 2.45.2 From 1b4e4c528713d5a3355f9df760fae32ae3ee154d Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 20 May 2024 11:06:04 -0400 Subject: [PATCH 07/10] WIP --- include/bencode.h | 141 +++++++++++++ lib/bencode.c | 511 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 652 insertions(+) create mode 100644 include/bencode.h create mode 100644 lib/bencode.c diff --git a/include/bencode.h b/include/bencode.h new file mode 100644 index 0000000..f79e93f --- /dev/null +++ b/include/bencode.h @@ -0,0 +1,141 @@ +// https://github.com/willemt/heapless-bencode +// +#ifndef BENCODE_H_ +#define BENCODE_H_ + +typedef struct +{ + const char *str; + const char *start; + void *parent; + int val; + int len; +} bencode_t; + +/** +* Initialise a bencode object. +* @param be The bencode object +* @param str Buffer we expect input from +* @param len Length of buffer +*/ +void bencode_init( + bencode_t * be, + const char *str, + int len +); + +/** +* @return 1 if the bencode object is an int; otherwise 0. +*/ +int bencode_is_int( + const bencode_t * be +); + +/** +* @return 1 if the bencode object is a string; otherwise 0. +*/ +int bencode_is_string( + const bencode_t * be +); + +/** +* @return 1 if the bencode object is a list; otherwise 0. +*/ +int bencode_is_list( + const bencode_t * be +); + +/** +* @return 1 if the bencode object is a dict; otherwise 0. +*/ +int bencode_is_dict( + const bencode_t * be +); + +/** +* Obtain value from integer bencode object. +* @param val Long int we are writing the result to +* @return 1 on success, otherwise 0 +*/ +int bencode_int_value( + bencode_t * be, + long int *val +); + +/** +* @return 1 if there is another item on this dict; otherwise 0. +*/ +int bencode_dict_has_next( + bencode_t * be +); + +/** +* Get the next item within this dictionary. +* @param be_item Next item. +* @param key Const pointer to key string of next item. +* @param klen Length of the key of next item. +* @return 1 on success; otherwise 0. +*/ +int bencode_dict_get_next( + bencode_t * be, + bencode_t * be_item, + const char **key, + int *klen +); + +/** +* Get the string value from this bencode object. +* The buffer returned is stored on the stack. +* @param be The bencode object. +* @param str Const pointer to the buffer. +* @param slen Length of the buffer we are outputting. +* @return 1 on success; otherwise 0 +*/ +int bencode_string_value( + bencode_t * be, + const char **str, + int *len +); + +/** +* Tell if there is another item within this list. +* @param be The bencode object +* @return 1 if another item exists on the list; 0 otherwise; -1 on invalid processing +*/ +int bencode_list_has_next( + bencode_t * be +); + +/** +* Get the next item within this list. +* @param be The bencode object +* @param be_item The next bencode object that we are going to initiate. +* @return return 0 on end; 1 on have next; -1 on error +*/ +int bencode_list_get_next( + bencode_t * be, + bencode_t * be_item +); + +/** + * Copy bencode object into other bencode object + */ +void bencode_clone( + bencode_t * be, + bencode_t * output +); + +/** +* Get the start and end position of this dictionary +* @param be Bencode object +* @param start Starting string +* @param len Length of the dictionary +* @return 0 on success +*/ +int bencode_dict_get_start_and_len( + bencode_t * be, + const char **start, + int *len +); + +#endif /* BENCODE_H_ */ diff --git a/lib/bencode.c b/lib/bencode.c new file mode 100644 index 0000000..7bb14a5 --- /dev/null +++ b/lib/bencode.c @@ -0,0 +1,511 @@ + +/** + * Copyright (c) 2014, Willem-Hendrik Thiart + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + * @file + * @brief Read bencoded data + * @author Willem Thiart himself@willemthiart.com + * @version 0.1 + * https://github.com/willemt/heapless-bencode + */ + +#include +#include +#include +#include +#include +#include + +#include "bencode.h" + +/** + * Carry length over to a new bencode object. + * This is done so that we don't exhaust the buffer */ +static int __carry_length( + bencode_t * be, + const char *pos +) +{ + assert(0 < be->len); + return be->len - (pos - be->str); +} + +/** + * @param end The point that we read out to + * @param val Output of number represented by string + * @return 0 if error; otherwise 1 */ +static long int __read_string_int( + const char *sp, + const char **end, + long int *val +) +{ + *val = 0; + + int sign = 1; + + /* negative */ + if ('-' == *sp) + { + sign = -1; + sp++; + } + + if (!isdigit(*sp)) + return 0; + + /* work out number */ + do + { + *val *= 10; + *val += *sp - '0'; + sp++; + } + while (isdigit(*sp)); + + *val *= sign; + + *end = sp; + return 1; +} + +int bencode_is_dict( + const bencode_t * be +) +{ + return be->str && *be->str == 'd'; +} + +int bencode_is_int( + const bencode_t * be +) +{ + return be->str && *be->str == 'i'; +} + +int bencode_is_list( + const bencode_t * be +) +{ + return be->str && *be->str == 'l'; +} + +int bencode_is_string( + const bencode_t * be +) +{ + const char *sp; + + sp = be->str; + + assert(sp); + + if (!isdigit(*sp)) + return 0; + + do sp++; + while (isdigit(*sp)); + + return *sp == ':'; +} + +/** + * Move to next item + * @param sp The bencode string we are processing + * @return Pointer to string on success, otherwise NULL */ +static const char *__iterate_to_next_string_pos( + bencode_t * be, + const char *sp +) +{ + bencode_t iter; + + bencode_init(&iter, sp, __carry_length(be, sp)); + + if (bencode_is_dict(&iter)) + { + /* navigate to the end of the dictionary */ + while (bencode_dict_has_next(&iter)) + { + /* ERROR: input string is invalid */ + if (0 == bencode_dict_get_next(&iter, NULL, NULL, NULL)) + return NULL; + } + + /* special case for empty dict */ + if (*iter.str == 'd' && *(iter.str + 1) == 'e') + return iter.str + 2; + + return iter.str + 1; + } + else if (bencode_is_list(&iter)) + { + /* navigate to the end of the list */ + while (bencode_list_has_next(&iter)) + { + /* ERROR: input string is invalid */ + if (-1 == bencode_list_get_next(&iter, NULL)) + return NULL; + } + + return iter.str + 1; + } + else if (bencode_is_string(&iter)) + { + int len; + const char *str; + + /* ERROR: input string is invalid */ + if (0 == bencode_string_value(&iter, &str, &len)) + return NULL; + + return str + len; + } + else if (bencode_is_int(&iter)) + { + const char *end; + long int val; + + if (0 == __read_string_int(&iter.str[1], &end, &val)) + return NULL; + + assert(end[0] == 'e'); + + return end + 1; + } + + /* input string is invalid */ + return NULL; +} + +static const char *__read_string_len( + const char *sp, + int *slen +) +{ + *slen = 0; + + if (!isdigit(*sp)) + return NULL; + + do + { + *slen *= 10; + *slen += *sp - '0'; + sp++; + } + while (isdigit(*sp)); + + assert(*sp == ':'); + assert(0 <= *slen); + + return sp + 1; +} + +void bencode_init( + bencode_t * be, + const char *str, + const int len +) +{ + memset(be, 0, sizeof(bencode_t)); + be->str = be->start = str; + be->str = str; + be->len = len; + /* assert(0 < be->len); */ +} + +int bencode_int_value( + bencode_t * be, + long int *val +) +{ + const char *end; + + if (0 == __read_string_int(&be->str[1], &end, val)) + return 0; + + assert(end[0] == 'e'); + + return 1; +} + +int bencode_dict_has_next( + bencode_t * be +) +{ + const char *sp = be->str; + + assert(be); + + if (!sp + /* at end of dict */ + || *sp == 'e' + /* at end of string */ + || *sp == '\0' + || *sp == '\r' + /* empty dict */ + || (*sp == 'd' && *(sp + 1) == 'e') + /* at the end of the input string */ + || be->str >= be->start + be->len - 1) + { + return 0; + } + + return 1; +} + +int bencode_dict_get_next( + bencode_t * be, + bencode_t * be_item, + const char **key, + int *klen +) +{ + const char *sp = be->str; + const char *keyin; + int len; + + assert(*sp != 'e'); + + /* if at start increment to 1st key */ + if (*sp == 'd') + { + sp++; + } + + /* can't get the next item if we are at the end of the dict */ + if (*sp == 'e') + { + return 0; + } + + /* 1. find out what the key's length is */ + keyin = __read_string_len(sp, &len); + + /* 2. if we have a value bencode, lets put the value inside */ + if (be_item) + { + *klen = len; + bencode_init(be_item, keyin + len, __carry_length(be, keyin + len)); + } + + /* 3. iterate to next dict key, or move to next item in parent */ + if (!(be->str = __iterate_to_next_string_pos(be, keyin + len))) + { + /* if there isn't anything else or we are at the end of the string */ + return 0; + } + +#if 0 + /* if at the end of bencode, check that the 'e' terminator is there */ + if (be->str == be->start + be->len - 1 && *be->str != 'e') + { + be->str = NULL; + return 0; + } +#endif + + assert(be->str); + + if (key) + { + *key = keyin; + } + + return 1; +} + +int bencode_string_value( + bencode_t * be, + const char **str, + int *slen +) +{ + const char *sp; + + *slen = 0; + + assert(bencode_is_string(be)); + + sp = __read_string_len(be->str, slen); + + assert(sp); + assert(0 < be->len); + + /* make sure we still fit within the buffer */ + if (sp + *slen > be->start + (long int) be->len) + { + *str = NULL; + return 0; + } + + *str = sp; + return 1; +} + +int bencode_list_has_next( + bencode_t * be +) +{ + const char *sp; + + sp = be->str; + + /* empty list */ + if (*sp == 'l' && + sp == be->start && + *(sp + 1) == 'e') + { + be->str++; + return 0; + } + + /* end of list */ + if (*sp == 'e') + { + return 0; + } + + return 1; +} + +int bencode_list_get_next( + bencode_t * be, + bencode_t * be_item +) +{ + const char *sp; + + sp = be->str; + +#if 0 /* debugging */ + printf("%.*s\n", (int)(be->len - (be->str - be->start)), be->str); +#endif + + /* we're at the end */ + if (!sp || *sp == 'e') + return 0; + + if (*sp == 'l') + { + /* just move off the start of this list */ + if (be->start == be->str) + { + sp++; + } + } + + /* can't get the next item if we are at the end of the list */ + if (*sp == 'e') + { + be->str = sp; + return 0; + } + + /* populate the be_item if it is available */ + if (be_item) + { + bencode_init(be_item, sp, __carry_length(be, sp)); + } + + /* iterate to next value */ + if (!(be->str = __iterate_to_next_string_pos(be, sp))) + { + return -1; + } + + return 1; +} + +void bencode_clone( + bencode_t * be, + bencode_t * output +) +{ + memcpy(output, be, sizeof(bencode_t)); +} + +int bencode_dict_get_start_and_len( + bencode_t * be, + const char **start, + int *len +) +{ + bencode_t ben, ben2; + const char *ren; + int tmplen; + + bencode_clone(be, &ben); + *start = ben.str; + while (bencode_dict_has_next(&ben)) + bencode_dict_get_next(&ben, &ben2, &ren, &tmplen); + + *len = ben.str - *start + 1; + return 0; +} + +static int __validate(bencode_t *ben) +{ + if (bencode_is_dict(ben)) + { + while (bencode_dict_has_next(ben)) + { + int klen; + const char *key; + bencode_t benk; + + if (0 == bencode_dict_get_next(ben, &benk, &key, &klen)) + return -1; + + int ret = __validate(&benk); + if (0 != ret) + return ret; + } + } + else if (bencode_is_list(ben)) + { + while (bencode_list_has_next(ben)) + { + bencode_t benl; + + if (-1 == bencode_list_get_next(ben, &benl)) + return -1; + + int ret = __validate(&benl); + if (0 != ret) + return ret; + } + + } + else if (bencode_is_string(ben)) + { + const char *str; + int len; + + if (0 == bencode_string_value(ben, &str, &len)) + return -1; + } + else if (bencode_is_int(ben)) + { + long int val; + + if (0 == bencode_int_value(ben, &val)) + return -1; + } + else + return -1; + + return 0; +} + +int bencode_validate(char* buf, int len) +{ + bencode_t ben; + if (0 == len) + return 0; + bencode_init(&ben, buf, len); + return __validate(&ben); +} -- 2.45.2 From e0c79c93482f44b6c7b2172a11c08f80f8e9bfdb Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 20 May 2024 14:11:33 -0400 Subject: [PATCH 08/10] WIP --- Makefile | 2 +- lib/common.c | 23 +++ tests/soak.c | 464 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 373 insertions(+), 116 deletions(-) diff --git a/Makefile b/Makefile index 1eb2aa4..9d05fce 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ clean: rm -f $(EXAMPLES) examples/*.o format: - clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c lib/common.c include/common.h + clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c tests/midl.c lib/common.c include/common.h # clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c %.o: src/%.c diff --git a/lib/common.c b/lib/common.c index 962ed71..1cdfff2 100644 --- a/lib/common.c +++ b/lib/common.c @@ -51,6 +51,29 @@ tsc(void) return 0; } +// get microsecond timestamp +uint64_t +msts() +{ +#ifdef _SC_MONOTONIC_CLOCK + struct timespec ts; + if (sysconf(_SC_MONOTONIC_CLOCK) > 0) { + /* A monotonic clock presents */ + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) + return (uint64_t)(ts.tv_sec * 1000000 + ts.tv_nsec / 1000); + else + return 0; + } + return 0; +#else + struct timeval tv; + if (gettimeofday(&tv, NULL) == 0) + return (uint64_t)(tv.tv_sec * 1000000 + tv.tv_usec); + else + return 0; +#endif +} + double nsts(void) { diff --git a/tests/soak.c b/tests/soak.c index 058a5af..53b4cd1 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "../include/common.h" @@ -14,10 +15,123 @@ #include "midl.c" +#define INITIAL_AMOUNT 1024 * 2 + +bool recording = true; + typedef size_t pgno_t; -#define INITIAL_AMOUNT 1024 * 2 -bool recording = true; +typedef enum { SM, ML, RB } container_impl_t; + +typedef struct container { + const char *name; + /* allocate a new container */ + void *(*alloc)(size_t capacity); + struct { + td_histogram_t *td; + void *(*alloc)(size_t capacity); + } alloc_stats; + + /* free the container */ + void (*free)(void *handle); + struct { + td_histogram_t *td; + void (*free)(void *handle); + } free_stats; + + /* add pg to the container */ + pgno_t (*set)(void **handle, pgno_t pg); + struct { + td_histogram_t *td; + pgno_t (*set)(void **handle, pgno_t pg); + } set_stats; +#define timed_set(fn) (pgno_t(*)(void **, pgno_t)) __stats_set, .set_stats.set = fn + + /* is pg in the container */ + bool (*is_set)(void *handle, pgno_t pg); + struct { + td_histogram_t *td; + bool (*is_set)(void *handle, pgno_t pg); + } is_set_stats; +#define timed_is_set(fn) (bool (*)(void *, pgno_t)) __stats_is_set, .is_set_stats.is_set = fn + + /* remove pg from the container */ + pgno_t (*clear)(void **handle, pgno_t pg); + struct { + td_histogram_t *td; + pgno_t (*clear)(void **handle, pgno_t pg); + } clear_stats; +#define timed_clear(fn) (pgno_t(*)(void **, pgno_t)) __stats_clear, .clear_stats.clear = fn + + /* find a set of contigious page of len and return the smallest pgno */ + pgno_t (*find_span)(void *handle, unsigned len); + struct { + td_histogram_t *td; + pgno_t (*find_span)(void *handle, unsigned len); + } find_span_stats; +#define timed_find_span(fn) (pgno_t(*)(void *, unsigned)) __stats_find_span, .find_span_stats.find_span = fn + + /* remove the span [pg, pg + len) from the container */ + bool (*take_span)(void **handle, pgno_t pg, unsigned len); + struct { + td_histogram_t *td; + bool (*take_span)(void **handle, pgno_t pg, unsigned len); + } take_span_stats; +#define timed_take_span(fn) (bool (*)(void **, pgno_t, unsigned)) __stats_take_span, .take_span_stats.take_span = fn + + /* add the span [pg, pg + len) into the container */ + bool (*release_span)(void **handle, pgno_t pg, unsigned len); + struct { + td_histogram_t *td; + bool (*release_span)(void **handle, pgno_t pg, unsigned len); + } release_span_stats; +#define timed_release_span(fn) (bool (*)(void **, pgno_t, unsigned)) __stats_release_span, .release_span_stats.release_span = fn + + /* are the pgno in the span [pg, pg+ len) in the container? */ + bool (*is_span)(void *handle, pgno_t pg, unsigned len); + struct { + td_histogram_t *td; + bool (*is_span)(void *handle, pgno_t pg, unsigned len); + } is_span_stats; +#define timed_is_span(fn) (bool (*)(void *, pgno_t, unsigned)) __stats_is_span, .is_span_stats.is_span = fn + + /* are the pgno in the span [pg, pg+ len) notn in the container? */ + bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + struct { + td_histogram_t *td; + bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + } is_empty_stats; +#define timed_is_empty(fn) (bool (*)(void *, pgno_t, unsigned)) __stats_is_empty, .is_empty_stats.is_empty = fn + + /* is the span the first one (brute force check) */ + bool (*is_first)(void *handle, pgno_t pg, unsigned len); + struct { + td_histogram_t *td; + bool (*is_first)(void *handle, pgno_t pg, unsigned len); + } is_first_stats; +#define timed_is_first(fn) (bool (*)(void *, pgno_t, unsigned)) __stats_is_first, .is_first_stats.is_first = fn + + /* ensure that all pgno contained in other_handle are also in handle */ + bool (*merge)(void **handle, void *other_handle); + struct { + td_histogram_t *td; + bool (*merge)(void **handle, void *other_handle); + } merge_stats; +#define timed_merge(fn) (bool (*)(void **, void *)) __stats_merge, .merge_stats.merge = fn + + /* the bytes size of the container */ + size_t (*size)(void *handle); + td_histogram_t *size_stats; + + /* the number of items in the container */ + size_t (*count)(void *handle); + td_histogram_t *count_stats; + + /* perform internal validation on the container (optional) */ + bool (*validate)(void *handle); + td_histogram_t *validate_stats; + +} container_t; char * bytes_as(double bytes, char *s, size_t size) @@ -200,6 +314,8 @@ b64_decode(const char *in, unsigned char *out, size_t outlen) return 1; } +/* recording ------------------------------------------------------------- */ + static void record_set_mutation(FILE *out, pgno_t pg) { @@ -462,7 +578,7 @@ __midl_set(void **handle, pgno_t pg) } mdb_midl_xappend(list, pg); mdb_midl_sort(list); - //assert(mdb_midl_insert(list, pg) == 0); + // assert(mdb_midl_insert(list, pg) == 0); assert(__midl_validate(*handle)); return pg; } @@ -631,7 +747,7 @@ __midl_validate(void *handle) if (list[i] == list[i - 1]) { return false; } - // ensure ordering + // ensure ordering if (list[i] > list[i - 1]) return false; } @@ -755,59 +871,192 @@ __roar_validate(void *handle) return true; } +/* histogram ------------------------------------------------------------- */ + +typedef struct sw { + struct timespec t1; /* start time */ + struct timespec t2; /* stop time */ +} sw_t; + +void +ts(struct timespec *ts) +{ + if (clock_gettime(CLOCK_REALTIME, ts) == -1) { + perror("clock_gettime"); + } +} + +static double +elapsed(struct timespec *s, struct timespec *e) +{ + long sec, nanos; + + sec = e->tv_sec - s->tv_sec; + nanos = e->tv_nsec - e->tv_nsec; + if (nanos < 0) { + nanos += 1e9; + sec--; + } + return ((double)nanos / 1e9 + (double)sec); +} + +static pgno_t +__stats_set(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) +{ + if (stats) { + struct timespec s, e; + ts(&s); + pgno_t retval = ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); +} + +static bool +__stats_is_set(td_histogram_t *stats, void *fn, void *handle, pgno_t pg) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t))fn)(handle, pg); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t))fn)(handle, pg); +} + +static pgno_t +__stats_clear(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) +{ + if (stats) { + struct timespec s, e; + ts(&s); + pgno_t retval = ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); +} + +static pgno_t +__stats_find_span(td_histogram_t *stats, void *fn, void *handle, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + pgno_t retval = ((pgno_t(*)(void *, unsigned))fn)(handle, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((pgno_t(*)(void *, unsigned))fn)(handle, len); +} + +static bool +__stats_take_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); +} + +static bool +__stats_release_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); +} + +static bool +__stats_is_span(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); +} + +static bool +__stats_is_empty(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); +} + +static bool +__stats_is_first(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); +} + +static bool +__stats_merge(td_histogram_t *stats, void *fn, void **handle, void *other_handle) +{ + if (stats) { + struct timespec s, e; + ts(&s); + bool retval = ((bool (*)(void **, void *))fn)(handle, other_handle); + ts(&e); + td_add(stats, elapsed(&s, &e), 1); + return retval; + } + return ((bool (*)(void **, void *))fn)(handle, other_handle); +} + /* ----------------------------------------------------------------------- */ -typedef enum { SM, ML, RB } container_impl_t; - -typedef struct container { - const char *name; - /* allocate a new container */ - void *(*alloc)(size_t capacity); - /* free the container */ - void (*free)(void *handle); - /* add pg to the container */ - pgno_t (*set)(void **handle, pgno_t pg); - /* is pg in the container */ - bool (*is_set)(void *handle, pgno_t pg); - /* remove pg from the container */ - pgno_t (*clear)(void **handle, pgno_t pg); - /* find a set of contigious page of len and return the smallest pgno */ - pgno_t (*find_span)(void *handle, unsigned len); - /* remove the span [pg, pg + len) from the container */ - bool (*take_span)(void **handle, pgno_t pg, unsigned len); - /* add the span [pg, pg + len) into the container */ - bool (*release_span)(void **handle, pgno_t pg, unsigned len); - /* are the pgno in the span [pg, pg+ len) in the container? */ - bool (*is_span)(void *handle, pgno_t pg, unsigned len); - /* are the pgno in the span [pg, pg+ len) notn in the container? */ - bool (*is_empty)(void *handle, pgno_t pg, unsigned len); - /* is the span the first one (brute force check) */ - bool (*is_first)(void *handle, pgno_t pg, unsigned len); - /* ensure that all pgno contained in other_handle are also in handle */ - bool (*merge)(void **handle, void *other_handle); - /* the bytes size of the container */ - size_t (*size)(void *handle); - /* the number of items in the container */ - size_t (*count)(void *handle); - /* perform internal validation on the container (optional) */ - bool (*validate)(void *handle); -} container_t; - // clang-format off container_t containers[] = { { "sparsemap", .alloc = __sm_alloc, .free = __sm_free, - .set = __sm_set, - .is_set = __sm_is_set, - .clear = __sm_clear, - .find_span = __sm_find_span, - .take_span = __sm_take_span, - .release_span = __sm_release_span, - .is_span = __sm_is_span, - .is_empty = __sm_is_empty, - .is_first = __sm_is_first, - .merge = __sm_merge, + .set = timed_set(__sm_set), + .is_set = timed_is_set(__sm_is_set), + .clear = timed_clear(__sm_clear), + .find_span = timed_find_span(__sm_find_span), + .take_span = timed_take_span(__sm_take_span), + .release_span = timed_release_span(__sm_release_span), + .is_span = timed_is_span(__sm_is_span), + .is_empty = timed_is_empty(__sm_is_empty), + .is_first = timed_is_first(__sm_is_first), + .merge = timed_merge(__sm_merge), .size = __sm_size, .count = __sm_count, .validate = NULL @@ -815,16 +1064,16 @@ container_t containers[] = { { "midl", .alloc = __midl_alloc, .free = __midl_free, - .set = __midl_set, - .is_set = __midl_is_set, - .clear = __midl_clear, - .find_span = __midl_find_span, - .take_span = __midl_take_span, - .release_span = __midl_release_span, - .is_span = __midl_is_span, - .is_empty = __midl_is_empty, - .is_first = NULL, - .merge = __midl_merge, + .set = timed_set(__midl_set), + .is_set = timed_is_set(__midl_is_set), + .clear = timed_clear(__midl_clear), + .find_span = timed_find_span(__midl_find_span), + .take_span = timed_take_span(__midl_take_span), + .release_span = timed_release_span(__midl_release_span), + .is_span = timed_is_span(__midl_is_span), + .is_empty = timed_is_empty(__midl_is_empty), + .is_first = timed_is_first(NULL), + .merge = timed_merge(__midl_merge), .size = __midl_size, .count = __midl_count, .validate = __midl_validate @@ -832,16 +1081,16 @@ container_t containers[] = { { "roaring", .alloc = __roar_alloc, .free = __roar_free, - .set = __roar_set, - .is_set = __roar_is_set, - .clear = __roar_clear, - .find_span = __roar_find_span, - .take_span = __roar_take_span, - .release_span = __roar_release_span, - .is_span = __roar_is_span, - .is_empty = __roar_is_empty, - .is_first = NULL, - .merge = __roar_merge, + .set = timed_set(__roar_set), + .is_set = timed_is_set(__roar_is_set), + .clear = timed_clear(__roar_clear), + .find_span = timed_find_span(__roar_find_span), + .take_span = timed_take_span(__roar_take_span), + .release_span = timed_release_span(__roar_release_span), + .is_span = timed_is_span(__roar_is_span), + .is_empty = timed_is_empty(__roar_is_empty), + .is_first = timed_is_first(NULL), + .merge = timed_merge(__roar_merge), .size = __roar_size, .count = __roar_count, .validate = __roar_validate, @@ -849,6 +1098,8 @@ container_t containers[] = { }; // clang-format on +/* ----------------------------------------------------------------------- */ + void *handles[(sizeof((containers)) / sizeof((containers)[0]))]; void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))]; FILE *fp; @@ -857,8 +1108,11 @@ FILE *fp; #define cast(type, fn, ...) \ if (containers[type].fn) \ containers[type].fn(handles[type], ##__VA_ARGS__) -#define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) -#define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__) + +#define invoke(type, fn, ...) __stats_##fn(containers[type].fn##_stats.td, containers[type].fn##_stats.fn, handles[type], __VA_ARGS__) +#define mutate(type, fn, ...) \ + (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, \ + __stats_##fn(containers[type].fn##_stats.td, containers[type].fn##_stats.fn, &handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) #define checkpoint(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ @@ -932,45 +1186,6 @@ verify_eq(unsigned a, void *ad, unsigned b, void *bd) return ret; } -td_histogram_t *l_span_loc; -td_histogram_t *b_span_loc; -td_histogram_t *l_span_take; -td_histogram_t *b_span_take; -td_histogram_t *l_span_merge; -td_histogram_t *b_span_merge; - -void -stats_header(void) -{ - printf( - "timestamp,iterations,idl_cap,idl_used,idl_bytes,sm_cap,sm_used,idl_loc_p50,idl_loc_p75,idl_loc_p90,idl_loc_p99,idl_loc_p999,sm_loc_p50,sm_loc_p75,sm_loc_p90,sm_loc_p99,sm_loc_p999,idl_take_p50,idl_take_p75,idl_take_p90,idl_take_p99,idl_take_p999,sm_take_p50,sm_take_p75,sm_take_p90,sm_take_p99,sm_take_p999,idl_merge_p50,idl_merge_p75,idl_merge_p90,idl_merge_p99,idl_merge_p999,sm_merge_p50,sm_merge_p75,sm_merge_p90,sm_merge_p99,sm_merge_p999\n"); -} - -void -stats(size_t iterations, sparsemap_t *map, MDB_IDL list) -{ - if (iterations < 10) - return; - - td_compress(l_span_loc); - td_compress(b_span_loc); - td_compress(l_span_take); - td_compress(b_span_take); - td_compress(l_span_merge); - td_compress(b_span_merge); - - printf( - "%f,%zu,%zu,%zu,%zu,%zu,%zu,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f\n", - nsts(), iterations, list[-1], list[0], MDB_IDL_SIZEOF(list), sparsemap_get_capacity(map), sparsemap_get_size(map), td_quantile(l_span_loc, .5), - td_quantile(l_span_loc, .75), td_quantile(l_span_loc, .90), td_quantile(l_span_loc, .99), td_quantile(l_span_loc, .999), td_quantile(b_span_loc, .5), - td_quantile(b_span_loc, .75), td_quantile(b_span_loc, .90), td_quantile(b_span_loc, .99), td_quantile(b_span_loc, .999), td_quantile(l_span_take, .5), - td_quantile(l_span_take, .75), td_quantile(l_span_take, .90), td_quantile(l_span_take, .99), td_quantile(l_span_take, .999), td_quantile(b_span_take, .5), - td_quantile(b_span_take, .75), td_quantile(b_span_take, .90), td_quantile(b_span_take, .99), td_quantile(b_span_take, .999), td_quantile(l_span_merge, .5), - td_quantile(l_span_merge, .75), td_quantile(l_span_merge, .90), td_quantile(l_span_merge, .99), td_quantile(l_span_merge, .999), - td_quantile(b_span_merge, .5), td_quantile(b_span_merge, .75), td_quantile(b_span_merge, .90), td_quantile(b_span_merge, .99), - td_quantile(b_span_merge, .999)); -} - #define SHORT_OPT "r:fa:bh" #define LONG_OPT "record:,force,amount:,buffer,help" @@ -1051,6 +1266,25 @@ main(int argc, char *argv[]) unsigned types[] = { SM, ML, RB }; unsigned num_types = (sizeof((types)) / sizeof((types)[0])); + foreach(types) + { + containers[type].alloc_stats.td = NULL; + containers[type].free_stats.td = NULL; + containers[type].set_stats.td = td_new(100); + containers[type].is_set_stats.td = td_new(100); + containers[type].clear_stats.td = td_new(100); + containers[type].find_span_stats.td = td_new(100); + containers[type].take_span_stats.td = td_new(100); + containers[type].release_span_stats.td = td_new(100); + containers[type].is_span_stats.td = NULL; + containers[type].is_empty_stats.td = NULL; + containers[type].is_first_stats.td = NULL; + containers[type].merge_stats.td = td_new(100); + containers[type].size = NULL; + containers[type].count = NULL; + containers[type].validate = NULL; + } + /* Setup: add an amt of bits to each container. */ foreach(types) { @@ -1142,10 +1376,10 @@ main(int argc, char *argv[]) for (size_t i = 0; i < new_amt; i++) { // We don't want to record and we're using new_handles not // handles, so call fn directly. - assert(containers[type].is_set(handles[type], i + new_offset) == false); - assert(containers[type].is_set(new_handles[type], i + new_offset) == false); - containers[type].set(&new_handles[type], i + new_offset); - assert(containers[type].is_set(new_handles[type], i + new_offset) == true); + assert(containers[type].is_set_stats.is_set(handles[type], i + new_offset) == false); + assert(containers[type].is_set_stats.is_set(new_handles[type], i + new_offset) == false); + containers[type].set_stats.set(&new_handles[type], i + new_offset); + assert(containers[type].is_set_stats.is_set(new_handles[type], i + new_offset) == true); } } foreach(types) -- 2.45.2 From 5afb6c6f0d395142fcc0356e717f050df2618571 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 20 May 2024 16:31:26 -0400 Subject: [PATCH 09/10] WIP --- tests/soak.c | 190 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 159 insertions(+), 31 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index 53b4cd1..3edb522 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -17,8 +17,6 @@ #define INITIAL_AMOUNT 1024 * 2 -bool recording = true; - typedef size_t pgno_t; typedef enum { SM, ML, RB } container_impl_t; @@ -133,6 +131,8 @@ typedef struct container { } container_t; +#define digest(name) containers[type].name##_stats.td + char * bytes_as(double bytes, char *s, size_t size) { @@ -316,6 +316,8 @@ b64_decode(const char *in, unsigned char *out, size_t outlen) /* recording ------------------------------------------------------------- */ +bool recording = false; + static void record_set_mutation(FILE *out, pgno_t pg) { @@ -871,7 +873,9 @@ __roar_validate(void *handle) return true; } -/* histogram ------------------------------------------------------------- */ +/* statistics ------------------------------------------------------------ */ + +bool statistics = false; typedef struct sw { struct timespec t1; /* start time */ @@ -892,7 +896,7 @@ elapsed(struct timespec *s, struct timespec *e) long sec, nanos; sec = e->tv_sec - s->tv_sec; - nanos = e->tv_nsec - e->tv_nsec; + nanos = e->tv_nsec - s->tv_nsec; if (nanos < 0) { nanos += 1e9; sec--; @@ -1102,7 +1106,8 @@ container_t containers[] = { void *handles[(sizeof((containers)) / sizeof((containers)[0]))]; void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))]; -FILE *fp; +FILE *record_fp; +FILE *stats_fp; #define alloc(type, size) containers[type].alloc(size); #define cast(type, fn, ...) \ @@ -1110,15 +1115,15 @@ FILE *fp; containers[type].fn(handles[type], ##__VA_ARGS__) #define invoke(type, fn, ...) __stats_##fn(containers[type].fn##_stats.td, containers[type].fn##_stats.fn, handles[type], __VA_ARGS__) -#define mutate(type, fn, ...) \ - (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, \ +#define mutate(type, fn, ...) \ + (type == 0) ? record_##fn##_mutation(record_fp, __VA_ARGS__) : (void)0, \ __stats_##fn(containers[type].fn##_stats.td, containers[type].fn##_stats.fn, &handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) #define checkpoint(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ verify_eq(0, handles[0], type, handles[type]); \ } \ - record_checkpoint(fp, handles[0]) + record_checkpoint(record_fp, handles[0]) bool verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm) @@ -1186,36 +1191,39 @@ verify_eq(unsigned a, void *ad, unsigned b, void *bd) return ret; } -#define SHORT_OPT "r:fa:bh" -#define LONG_OPT "record:,force,amount:,buffer,help" - void print_usage(const char *program_name) { printf("Usage: %s [OPTIONS]\n", program_name); - printf(" -r, --record Path to the file for recording (optional)\n"); - printf(" -f, --force Force overwrite of existing file (optional)\n"); - printf(" -b, --buffer Disable buffering writes to stdout/err (optional)\n"); - printf(" -a, --amount Specify the number of entries to record (must be positive, optional)\n"); - printf(" -h, --help Print this help message\n"); + printf(" -r Path to the file for recording (optional)\n"); + printf(" -s Path to the file for statistics (optional)\n"); + printf(" -f Force overwrite of existing file (optional)\n"); + printf(" -b Disable buffering writes to stdout/err (optional)\n"); + printf(" -a Specify the number of entries to record (must be positive, optional)\n"); + printf(" -h Print this help message\n"); } +#define SHORT_OPT "r:s:fa:bh" int main(int argc, char *argv[]) { int opt; const char *record_file = NULL; + const char *stats_file = NULL; int force_flag = 0; size_t left, iteration = 0, amt = INITIAL_AMOUNT; bool buffer = true; - fp = stdout; - - while ((opt = getopt(argc, argv, SHORT_OPT LONG_OPT)) != -1) { + while ((opt = getopt(argc, argv, SHORT_OPT)) != -1) { switch (opt) { case 'r': + recording = true; record_file = optarg; break; + case 's': + statistics = true; + stats_file = optarg; + break; case 'f': force_flag = 1; break; @@ -1240,10 +1248,9 @@ main(int argc, char *argv[]) } } - // Check if record file is specified - if (record_file == NULL) { - recording = false; - } else { + if (recording) { + record_fp = stdout; + // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file); @@ -1251,17 +1258,38 @@ main(int argc, char *argv[]) } // Open the file for writing (truncate if force flag is set) - fp = fopen(record_file, force_flag ? "w" : "a"); - if (fp == NULL) { + record_fp = fopen(record_file, force_flag ? "w" : "a"); + if (record_fp == NULL) { perror("Error opening file"); return 1; } } + // Check if statistics file is specified + if (statistics) { + if (stats_file[0] == '-') { + stats_fp = stdout; + setvbuf(stdout, NULL, _IONBF, 0); + } else { + // Check for existing file without force flag + if (access(stats_file, F_OK) == 0 && !force_flag) { + fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", stats_file); + return 1; + } + + // Open the file for writing (truncate if force flag is set) + stats_fp = fopen(stats_file, force_flag ? "w" : "a"); + if (stats_fp == NULL) { + perror("Error opening file"); + return 1; + } + } + } + // disable buffering if (!buffer) { - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(fp, NULL, _IONBF, 0); + setvbuf(record_fp, NULL, _IONBF, 0); + setvbuf(stats_fp, NULL, _IONBF, 0); } unsigned types[] = { SM, ML, RB }; unsigned num_types = (sizeof((types)) / sizeof((types)[0])); @@ -1280,9 +1308,6 @@ main(int argc, char *argv[]) containers[type].is_empty_stats.td = NULL; containers[type].is_first_stats.td = NULL; containers[type].merge_stats.td = td_new(100); - containers[type].size = NULL; - containers[type].count = NULL; - containers[type].validate = NULL; } /* Setup: add an amt of bits to each container. */ @@ -1299,6 +1324,77 @@ main(int argc, char *argv[]) checkpoint(types); left = amt; + if (statistics) { + const char *names[] = { "sm", "ml", "rb" }; + const char *dists[] = { "p50", "p75", "p90", "p99", "p999" }; + fprintf(stats_fp, "timestamp,iterations,"); + foreach(types) + { + fprintf(stats_fp, "%s_size,%s_bytes,", names[type], names[type]); + if (digest(alloc) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_alloc_%s,", names[type], dists[i]); + } + } + if (digest(free) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_free_%s,", names[type], dists[i]); + } + } + if (digest(set) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_set_%s,", names[type], dists[i]); + } + } + if (digest(is_set) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_is_set_%s,", names[type], dists[i]); + } + } + if (digest(clear) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_clear_%s,", names[type], dists[i]); + } + } + if (digest(find_span) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_find_span_%s,", names[type], dists[i]); + } + } + if (digest(take_span) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_take_span_%s,", names[type], dists[i]); + } + } + if (digest(release_span) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_release_span_%s,", names[type], dists[i]); + } + } + if (digest(is_span) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_is_span_%s,", names[type], dists[i]); + } + } + if (digest(is_empty) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_is_empty_%s,", names[type], dists[i]); + } + } + if (digest(is_first) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_is_first_%s,", names[type], dists[i]); + } + } + if (digest(merge) != NULL) { + for (int i = 0; i < 5; i++) { + fprintf(stats_fp, "%s_merge_%s,", names[type], dists[i]); + } + } + } + fprintf(stats_fp, "\n"); + } + while (true) { iteration++; // the an amount [1, 16] of pages to find preferring smaller sizes @@ -1395,7 +1491,39 @@ main(int argc, char *argv[]) containers[type].free(new_handles[type]); } } + if (statistics) { + const float dists[] = { 0.5, 0.75, 0.90, 0.99, 0.999 }; + fprintf(stats_fp, "%f,%zu,", nsts(), iteration); + foreach(types) + { + fprintf(stats_fp, "%zu,%zu,", containers[type].count(handles[type]), containers[type].size(handles[type])); + // clang-format off + td_histogram_t *td[] = { + digest(alloc), + digest(free), + digest(set), + digest(is_set), + digest(clear), + digest(find_span), + digest(take_span), + digest(release_span), + digest(is_span), + digest(is_empty), + digest(is_first), + digest(merge) + }; + // clang-format on + for (int i = 0; i < 12; i++) { + if (td[i] != NULL) { + td_compress(td[i]); + for (int j = 0; j < 5; j++) { + fprintf(stats_fp, "%.10f,", td_quantile(td[i], dists[j])); + } + } + } + } + fprintf(stats_fp, "\n"); + } } - return 0; } -- 2.45.2 From 4dbb7cec61449f5595f24f30c27259953e264918 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 20 May 2024 20:28:45 -0400 Subject: [PATCH 10/10] WIP --- tests/soak.c | 56 +++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/tests/soak.c b/tests/soak.c index 3edb522..c5fbddc 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -552,7 +552,7 @@ __sm_count(void *handle) /* midl ------------------------------------------------------------------ */ -static bool __midl_validate(void *handle); +//static bool __midl_validate(void *handle); static void * __midl_alloc(size_t capacity) @@ -572,7 +572,7 @@ __midl_free(void *handle) static pgno_t __midl_set(void **handle, pgno_t pg) { - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { assert(mdb_midl_need(_list, list[-1] + 1) == 0); @@ -581,7 +581,7 @@ __midl_set(void **handle, pgno_t pg) mdb_midl_xappend(list, pg); mdb_midl_sort(list); // assert(mdb_midl_insert(list, pg) == 0); - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); return pg; } @@ -596,7 +596,7 @@ __midl_is_set(void *handle, pgno_t pg) static pgno_t __midl_clear(void **handle, pgno_t pg) { - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; unsigned len = list[0]; list[0] = len -= 1; @@ -606,7 +606,7 @@ __midl_clear(void **handle, pgno_t pg) for (unsigned j = len + 1; j <= list[-1]; j++) list[j] = 0; #endif - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); return pg; } @@ -642,7 +642,7 @@ search_done:; static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg); unsigned j, num = len; @@ -657,14 +657,14 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len) for (j = mop_len + 1; j <= mop[-1]; j++) mop[j] = 0; #endif - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); return true; } static bool __midl_release_span(void **handle, pgno_t pg, unsigned len) { - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { assert(mdb_midl_need(_list, list[-1] + len) == 0); @@ -675,7 +675,7 @@ __midl_release_span(void **handle, pgno_t pg, unsigned len) // assert(mdb_midl_insert(list, i) == 0); } mdb_midl_sort(list); - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); return true; } @@ -689,7 +689,7 @@ __midl_is_span(void *handle, pgno_t pg, unsigned len) return false; if (len == 1) return true; - if (list[len] + 1 != list[len - 1]) + if (list[idx] + len - 1 != list[idx - len + 1]) return false; return true; } @@ -710,7 +710,7 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len) static bool __midl_merge(void **handle, void *other_handle) { - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list, other = (MDB_IDL)other_handle; if (list[0] + other[0] >= list[-1]) { assert(mdb_midl_need(_list, list[-1] + other[0]) == 0); @@ -718,7 +718,7 @@ __midl_merge(void **handle, void *other_handle) } mdb_midl_xmerge(list, other_handle); mdb_midl_sort(*_list); - assert(__midl_validate(*handle)); + // assert(__midl_validate(*handle)); return true; } @@ -907,7 +907,7 @@ elapsed(struct timespec *s, struct timespec *e) static pgno_t __stats_set(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); pgno_t retval = ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); @@ -921,7 +921,7 @@ __stats_set(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) static bool __stats_is_set(td_histogram_t *stats, void *fn, void *handle, pgno_t pg) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t))fn)(handle, pg); @@ -935,7 +935,7 @@ __stats_is_set(td_histogram_t *stats, void *fn, void *handle, pgno_t pg) static pgno_t __stats_clear(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); pgno_t retval = ((pgno_t(*)(void **, pgno_t))fn)(handle, pg); @@ -949,7 +949,7 @@ __stats_clear(td_histogram_t *stats, void *fn, void **handle, pgno_t pg) static pgno_t __stats_find_span(td_histogram_t *stats, void *fn, void *handle, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); pgno_t retval = ((pgno_t(*)(void *, unsigned))fn)(handle, len); @@ -963,7 +963,7 @@ __stats_find_span(td_histogram_t *stats, void *fn, void *handle, unsigned len) static bool __stats_take_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); @@ -977,7 +977,7 @@ __stats_take_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, uns static bool __stats_release_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); @@ -991,7 +991,7 @@ __stats_release_span(td_histogram_t *stats, void *fn, void **handle, pgno_t pg, static bool __stats_is_span(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); @@ -1005,7 +1005,7 @@ __stats_is_span(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsign static bool __stats_is_empty(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); @@ -1019,7 +1019,7 @@ __stats_is_empty(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsig static bool __stats_is_first(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsigned len) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void *, pgno_t, unsigned))fn)(handle, pg, len); @@ -1033,7 +1033,7 @@ __stats_is_first(td_histogram_t *stats, void *fn, void *handle, pgno_t pg, unsig static bool __stats_merge(td_histogram_t *stats, void *fn, void **handle, void *other_handle) { - if (stats) { + if (statistics && stats) { struct timespec s, e; ts(&s); bool retval = ((bool (*)(void **, void *))fn)(handle, other_handle); @@ -1427,7 +1427,7 @@ main(int argc, char *argv[]) checkpoint(types); left -= len; - if (toss(15) > 13) { + if (toss(7) == 6) { do { pgno_t pgno; size_t len, retries = amt / 10; @@ -1447,18 +1447,19 @@ main(int argc, char *argv[]) checkpoint(types); foreach(types) { - assert(invoke(type, is_span, pgno, len) == false); + assert(invoke(type, is_empty, pgno, len)); assert(mutate(type, release_span, pgno, len)); - assert(invoke(type, is_span, pgno, len) == true); + assert(invoke(type, is_span, pgno, len)); cast(type, validate); } checkpoint(types); left += len; } - } while (amt - left > amt / 100); + } while (toss(4) < 3); } - if (toss(10) > 8) { + // if (toss(10) > 8) { + if (0) { size_t new_offset, new_amt; pgno_t max; larger_please: @@ -1491,6 +1492,7 @@ main(int argc, char *argv[]) containers[type].free(new_handles[type]); } } + if (statistics) { const float dists[] = { 0.5, 0.75, 0.90, 0.99, 0.999 }; fprintf(stats_fp, "%f,%zu,", nsts(), iteration); -- 2.45.2