From 1b7fafa0e168c1f1d2ab4a91b51b09d8e3eb5b50 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 17 May 2024 21:44:20 -0400 Subject: [PATCH] WIP --- tests/midl.c | 417 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/soak.c | 328 +--------------------------------------- 2 files changed, 420 insertions(+), 325 deletions(-) create mode 100644 tests/midl.c diff --git a/tests/midl.c b/tests/midl.c new file mode 100644 index 0000000..6b4993d --- /dev/null +++ b/tests/midl.c @@ -0,0 +1,417 @@ +/** @defgroup idls ID List Management + * @{ + */ +/** A generic unsigned ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + +/** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN) +#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1)) + +#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1) +#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1) + +#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID)) +#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0) +#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src))) +#define MDB_IDL_FIRST(ids) ((ids)[1]) +#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]]) + +/** Current max length of an #mdb_midl_alloc()ed IDL */ +#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1]) + +/** Append ID to IDL. The IDL must be big enough. */ +#define mdb_midl_xappend(idl, id) \ + do { \ + MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ + xidl[xlen] = (id); \ + } while (0) + +/** Search for an ID in an IDL. + * @param[in] ids The IDL to search. + * @param[in] id The ID to search for. + * @return The index of the first ID greater than or equal to \b id. + */ +unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id); + +/** Allocate an IDL. + * Allocates memory for an IDL of the given size. + * @return IDL on success, NULL on failure. + */ +MDB_IDL mdb_midl_alloc(int num); + +/** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(MDB_IDL ids); + +/** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + */ +void mdb_midl_shrink(MDB_IDL *idp); + +/** Shrink an IDL to a specific size. + * Resize the IDL to \b size if it is larger. + * @param[in,out] idp Address of the IDL to shrink. + * @param[in] size Capacity to have once resized. + */ +void mdb_midl_shrink(MDB_IDL *idp); + +/** Make room for num additional elements in an IDL. + * @param[in,out] idp Address of the IDL. + * @param[in] num Number of elements to make room for. + * @return 0 on success, ENOMEM on failure. + */ +int mdb_midl_need(MDB_IDL *idp, unsigned num); + +/** Append an ID onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The ID to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append(MDB_IDL *idp, MDB_ID id); + +/** Append an IDL onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] app The IDL to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app); + +/** Append an ID range onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The lowest ID to append. + * @param[in] n Number of IDs to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n); + +/** Merge an IDL onto an IDL. The destination IDL must be big enough. + * @param[in] idl The IDL to merge into. + * @param[in] merge The IDL to merge. + */ +void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge); + +/** Sort an IDL. + * @param[in,out] ids The IDL to sort. + */ +void mdb_midl_sort(MDB_IDL ids); + +/* midl.c ------------------------------------------------------------------ */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y)) + +unsigned +mdb_midl_search(MDB_IDL ids, MDB_ID id) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while (0 < n) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP(ids[cursor], id); + + if (val < 0) { + n = pivot; + + } else if (val > 0) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if (val > 0) { + ++cursor; + } + return cursor; +} + +int +mdb_midl_insert(MDB_IDL ids, MDB_ID id) +{ + unsigned x, i; + + x = mdb_midl_search(ids, id); + assert(x > 0); + + if (x < 1) { + /* internal error */ + return -2; + } + + if (x <= ids[0] && ids[x] == id) { + /* duplicate */ + assert(0); + return -1; + } + + if (++ids[0] >= MDB_IDL_DB_MAX) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i = ids[0]; i > x; i--) + ids[i] = ids[i - 1]; + ids[x] = id; + } + + return 0; +} + +inline void +mdb_midl_pop_n(MDB_IDL ids, unsigned n) +{ + ids[0] = ids[0] - n; +} + +void +mdb_midl_remove_at(MDB_IDL ids, unsigned idx) +{ + for (int i = idx - 1; idx < ids[0] - 1;) + ids[++i] = ids[++idx]; + ids[0] = ids[0] - 1; +} + +void +mdb_midl_remove(MDB_IDL ids, MDB_ID id) +{ + unsigned idx = mdb_midl_search(ids, id); + if (idx <= ids[0] && ids[idx] == id) + mdb_midl_remove_at(ids, idx); +} + +MDB_IDL +mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void +mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids - 1); +} + +void +mdb_midl_shrink(MDB_IDL *idp) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + } +} + +void +mdb_midl_shrink_to(MDB_IDL *idp, size_t size) +{ + MDB_IDL ids = *idp; + if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) { + *ids++ = size; + *idp = ids; + *idp[0] = *idp[0] > size ? size : *idp[0]; + } +} + +static int +mdb_midl_grow(MDB_IDL *idp, int num) +{ + MDB_IDL idn = *idp - 1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int +mdb_midl_need(MDB_IDL *idp, unsigned num) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num / 4 + (256 + 2)) & -256; + if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num - 2; + *idp = ids; + } + return 0; +} + +int +mdb_midl_append(MDB_IDL *idp, MDB_ID id) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int +mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int +mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +void +mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge) +{ + MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; + idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ + old_id = idl[j]; + while (i) { + merge_id = merge[i--]; + for (; old_id < merge_id; old_id = idl[--j]) + idl[k--] = old_id; + idl[k--] = merge_id; + } + idl[0] = total; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define MIDL_SWAP(a, b) \ + { \ + itmp = (a); \ + (a) = (b); \ + (b) = itmp; \ + } + +void +mdb_midl_sort(MDB_IDL ids) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int) * CHAR_BIT * 2]; + int i, j, k, l, ir, jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for (;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j = l + 1; j <= ir; j++) { + a = ids[j]; + for (i = j - 1; i >= 1; i--) { + if (ids[i] >= a) + break; + ids[i + 1] = ids[i]; + } + ids[i + 1] = a; + } + if (jstack == 0) + break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + MIDL_SWAP(ids[k], ids[l + 1]); + if (ids[l] < ids[ir]) { + MIDL_SWAP(ids[l], ids[ir]); + } + if (ids[l + 1] < ids[ir]) { + MIDL_SWAP(ids[l + 1], ids[ir]); + } + if (ids[l] < ids[l + 1]) { + MIDL_SWAP(ids[l], ids[l + 1]); + } + i = l + 1; + j = ir; + a = ids[l + 1]; + for (;;) { + do + i++; + while (ids[i] > a); + do + j--; + while (ids[j] < a); + if (j < i) + break; + MIDL_SWAP(ids[i], ids[j]); + } + ids[l + 1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir - i + 1 >= j - l) { + istack[jstack] = ir; + istack[jstack - 1] = i; + ir = j - 1; + } else { + istack[jstack] = j - 1; + istack[jstack - 1] = l; + l = i; + } + } + } +} diff --git a/tests/soak.c b/tests/soak.c index ba68c63..c0fab52 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -827,7 +827,7 @@ main(int argc, char *argv[]) // Check if record file is specified if (record_file == NULL) { - recording = true;//TODO + recording = false; } else { // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { @@ -925,13 +925,12 @@ main(int argc, char *argv[]) } while (amt - left > amt / 100); } - if (toss(1000) > 800) { + if (toss(10) > 8) { size_t new_offset, new_amt; pgno_t max; larger_please: new_amt = 1024 + (xorshift32() % 2048) + toss(1024); - new_offset = xorshift32() % 4096 + 1024; - max = sparsemap_get_ending_offset(handles[SM]); + new_offset = sparsemap_get_ending_offset(handles[SM]); // Build a new container to merge with the existing one. foreach(types) @@ -962,324 +961,3 @@ main(int argc, char *argv[]) return 0; } - -#if 0 -/* - * A "soak test" that tries to replicate behavior in LMDB for page allocation. - */ -int -main(void) -{ - size_t replenish = 0, iterations = 0; - - // disable buffering -#ifdef DEBUG - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); -#endif - - l_span_loc = td_new(100); - b_span_loc = td_new(100); - l_span_take = td_new(100); - b_span_take = td_new(100); - l_span_merge = td_new(100); - b_span_merge = td_new(100); - - stats_header(); - - sparsemap_idx_t amt = INITIAL_AMOUNT; - MDB_IDL list = mdb_midl_alloc(amt); - sparsemap_t *map = sparsemap(INITIAL_AMOUNT); - roaring_bitmap_t *rbm = roaring_bitmap_create(); - - // start with 2GiB of 4KiB free pages to track: - // - MDB_IDL requires one int for each free page - // - Sparsemap will compress the set bits using less memory - mdb_midl_need(&list, amt); - for (sparsemap_idx_t pg = 0; pg < amt; pg++) { - // We list every free (unallocated) page in the IDL, while... - mdb_midl_xappend(list, pg); - // ... true (unset in the bitmap) indicates free in the bitmap, ... - assert(_sparsemap_set(&map, pg, true) == pg); - assert(roaring_bitmap_add_checked(rbm, pg)); - } - mdb_midl_sort(list); - roaring_bitmap_run_optimize(rbm); - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - - double b, e; - while (1) { - unsigned mi; - pgno_t ml, sl, rl; - - // get an amount [1, 16] of pages to find preferring smaller sizes - unsigned n = toss(15) + 1; - - // find a set of pages using the MDB_IDL - { - b = nsts(); - /* Seek a big enough contiguous page range. Prefer - * pages at the tail, just truncating the list. - */ - int retry = 1; - unsigned i = 0; - pgno_t pgno = 0, *mop = list; - unsigned n2 = len, mop_len = mop[0]; - if (mop_len > n2) { - i = mop_len; - do { - pgno = mop[i]; - if (mop[i - n2] == pgno + n2) - goto search_done; - } while (--i > n2); - if (--retry < 0) - break; - } - search_done:; - ml = pgno; - mi = i; - e = nsts(); - td_add(l_span_loc, e - b, 1); - } - assert(verify_span_midl(list, ml, n)); - assert(verify_span_sparsemap(map, ml, n)); - assert(verify_span_roaring(rbm, ml, n)); - - // find a set of pages using the Sparsemap - { - b = nsts(); - pgno_t pgno = sparsemap_span(map, 0, n, true); - assert(SPARSEMAP_NOT_FOUND(pgno) == false); - sl = pgno; - e = nsts(); - td_add(b_span_loc, e - b, 1); - assert(verify_sm_is_first_available_span(map, pgno, n, true)); - } - assert(verify_span_midl(list, sl, n)); - assert(verify_span_sparsemap(map, sl, n)); - assert(verify_span_roaring(rbm, sl, n)); - - // find a set of pages using the Roaring Bitmap - { - b = nsts(); - uint64_t max = roaring_bitmap_maximum(rbm); - uint64_t offset = roaring_bitmap_minimum(rbm); - do { - if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) { - break; - } - offset++; - } while (offset <= max); - rl = offset; - e = nsts(); - } - /* - if (rl != sl) { - assert(verify_span_midl(list, rl, n)); - assert(verify_span_sparsemap(map, rl, n)); - assert(verify_span_roaring(rbm, rl, n)); - } - */ - assert(rl == sl); - - bool prefer_mdb_idl_loc = (bool)xorshift32() % 2; - - // acquire the set of pages within the list - if (prefer_mdb_idl_loc) { - b = nsts(); - unsigned j, num = n; - int i = mi; - pgno_t *mop = list; - unsigned mop_len = mop[0]; - - mop[0] = mop_len -= num; - /* Move any stragglers down */ - for (j = i - num; j < mop_len;) - mop[++j] = mop[++i]; - e = nsts(); - for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - td_add(l_span_take, e - b, 1); - } else { - b = nsts(); - unsigned j, num = n; - int i = mdb_midl_search(list, sl) + num; - pgno_t *mop = list; - unsigned mop_len = mop[0]; - - mop[0] = mop_len -= num; - /* Move any stragglers down */ - for (j = i - num; j < mop_len;) - mop[++j] = mop[++i]; - e = nsts(); - for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - td_add(l_span_take, e - b, 1); - } - - // acquire the set of pages within the sparsemap - if (prefer_mdb_idl_loc) { - b = nsts(); - for (pgno_t i = ml; i < ml + n; i++) { - assert(_sparsemap_set(&map, i, false) == i); - } - e = nsts(); - td_add(b_span_take, e - b, 1); - } else { - b = nsts(); - for (pgno_t i = sl; i <= sl + n; i++) { - assert(_sparsemap_set(&map, i, false) == i); - } - e = nsts(); - td_add(b_span_take, e - b, 1); - } - - // acquire the set of pages within the roaring bitmap - if (prefer_mdb_idl_loc) { - b = nsts(); - roaring_bitmap_remove_range(rbm, ml, ml + n); - e = nsts(); - } else { - b = nsts(); - roaring_bitmap_remove_range(rbm, sl, sl + n); - e = nsts(); - } - roaring_bitmap_run_optimize(rbm); - - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - - // Once we've used a tenth of the free list, let's replenish it a bit. - if (list[0] < amt / 10) { - do { - pgno_t pgno; - size_t len, retries = amt; - do { - len = toss(15) + 1; - pgno = sparsemap_span(map, 0, len, false); - assert(verify_sm_is_first_available_span(map, pgno, n, false)); - //__diag("%zu\t%zu,%zu\n", iterations, replenish, retries); - } while (SPARSEMAP_NOT_FOUND(pgno) && --retries); - if (retries == 0) { - goto larger_please; - } - if (SPARSEMAP_FOUND(pgno)) { - assert(verify_empty_midl(list, pgno, len)); - assert(verify_empty_sparsemap(map, pgno, len)); - assert(verify_empty_roaring(rbm, pgno, len)); - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - if (list[-1] - list[0] < len) { - mdb_midl_need(&list, list[-1] + len); - } - for (size_t i = pgno; i < pgno + len; i++) { - assert(verify_midl_contains(list, i) == false); - assert(sparsemap_is_set(map, i) == false); - mdb_midl_insert(list, i); - assert(verify_midl_contains(list, i) == true); - assert(_sparsemap_set(&map, i, true) == i); - assert(sparsemap_is_set(map, i) == true); - assert(roaring_bitmap_add_checked(rbm, i) == true); - } - mdb_midl_sort(list); - assert(verify_midl_nodups(list)); - assert(verify_span_midl(list, pgno, len)); - assert(verify_span_sparsemap(map, pgno, len)); - assert(verify_span_roaring(rbm, pgno, len)); - } - assert(verify_sm_eq_ml(map, list)); - assert(verify_sm_eq_rm(map, rbm)); - replenish++; - } while (list[0] < amt - 32); - } - replenish = 0; - - // every so often, either ... - if (iterations % 1000 == 0) { - larger_please:; - size_t COUNT = xorshift32() % 3586 + 513; - // ... add some amount of 4KiB pages, or - size_t len = COUNT; - // The largest page is at list[1] because this is a reverse sorted list. - pgno_t pg = list[0] ? list[1] + 1 : 0; - if (true) { // disable shrinking for now... (toss(6) + 1 < 7) - MDB_IDL new_list = mdb_midl_alloc(len); - sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT); - roaring_bitmap_t *new_rbm = roaring_bitmap_create(); - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - new_list[i + 1] = gp; - new_list[0]++; - assert(verify_midl_contains(new_list, gp) == true); - assert(_sparsemap_set(&new_map, gp, true) == gp); - assert(sparsemap_is_set(new_map, gp)); - assert(roaring_bitmap_add_checked(new_rbm, gp)); - assert(roaring_bitmap_contains(new_rbm, gp)); - } - assert(verify_sm_eq_ml(new_map, new_list)); - assert(verify_sm_eq_rm(new_map, new_rbm)); - { - b = nsts(); - mdb_midl_append_list(&list, new_list); - mdb_midl_sort(list); - e = nsts(); - td_add(l_span_merge, e - b, 1); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(verify_midl_contains(list, gp) == true); - } - { - b = nsts(); - _sparsemap_merge(&map, new_map); - e = nsts(); - td_add(b_span_merge, e - b, 1); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(sparsemap_is_set(map, gp)); - } - free(new_map); - { - b = nsts(); - roaring_bitmap_or_inplace(rbm, new_rbm); - e = nsts(); - } - for (size_t i = 0; i < len; i++) { - pgno_t gp = (pg + len) - i; - assert(roaring_bitmap_contains(rbm, gp)); - } - roaring_free(new_rbm); - } else { - if (list[-1] > INITIAL_AMOUNT) { - // ... a fraction of the time, remove COUNT / 2 of 4KiB pages. - { - pgno_t pg; - for (size_t i = 0; i < COUNT; i++) { - pg = list[list[0] - i]; - assert(sparsemap_is_set(map, pg) == true); - assert(_sparsemap_set(&map, pg, false) == pg); - } - } - { - roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]); - } - { - mdb_midl_shrink_to(&list, list[0] - COUNT); - } - assert(list[list[0]] != pg); - assert(verify_midl_nodups(list)); - verify_sm_eq_ml(map, list); - verify_sm_eq_rm(map, rbm); - } - } - } - stats(iterations, map, list); - // printf("\033[K%zu\r", iterations); - iterations++; - } - - return 0; -} -#endif