From 294fbccfc88211a79b48028167574b1c9d6e1c9e Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Thu, 25 Apr 2024 14:37:21 -0400 Subject: [PATCH 1/3] adding soak test --- .gitignore | 1 + Makefile | 12 +- examples/soak.c | 648 ++++++++++++++++++++++++++++++++++++++++++++ include/sparsemap.h | 2 +- src/sparsemap.c | 11 +- tests/common.c | 14 +- tests/common.h | 4 +- tests/test.c | 49 +++- 8 files changed, 713 insertions(+), 28 deletions(-) create mode 100644 examples/soak.c diff --git a/.gitignore b/.gitignore index eb5a7e9..7adaeca 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ **/*.o tests/test examples/ex_? +examples/soak .cache hints.txt tmp/ diff --git a/Makefile b/Makefile index db623e9..076beec 100644 --- a/Makefile +++ b/Makefile @@ -5,16 +5,16 @@ SHARED_LIB = libsparsemap.so #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC -CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC +CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC -TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC +TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC TESTS = tests/test TEST_OBJS = tests/test.o tests/munit.o tests/tdigest.o tests/common.o -EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4 +EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4 examples/soak .PHONY: all shared static clean test examples mls @@ -50,7 +50,7 @@ clean: rm -f $(EXAMPLES) examples/*.o format: - clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/test.c tests/common.c tests/common.h + clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c examples/soak.c tests/test.c tests/common.c tests/common.h # clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c %.o: src/%.c @@ -77,7 +77,11 @@ examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB) examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB) $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) +examples/soak: examples/common.o examples/soak.o $(STATIC_LIB) + $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) + todo: rg -i 'todo|gsb|abort' # cp src/sparsemap.c /tmp && clang-tidy src/sparsemap.c -fix -fix-errors -checks="readability-braces-around-statements" -- -DDEBUG -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC +# clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test diff --git a/examples/soak.c b/examples/soak.c new file mode 100644 index 0000000..8c95d8d --- /dev/null +++ b/examples/soak.c @@ -0,0 +1,648 @@ +#include +#include +#include +#include +#include +#include + +#include "../include/sparsemap.h" +#include "../tests/common.h" + +/* midl.h ------------------------------------------------------------------ */ +/** @defgroup idls ID List Management + * @{ + */ +/** A generic unsigned ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + +/** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN) +#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1)) + +#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1) +#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1) + +#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID)) +#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0) +#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src))) +#define MDB_IDL_FIRST(ids) ((ids)[1]) +#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]]) + +/** Current max length of an #mdb_midl_alloc()ed IDL */ +#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1]) + +/** Append ID to IDL. The IDL must be big enough. */ +#define mdb_midl_xappend(idl, id) \ + do { \ + MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \ + xidl[xlen] = (id); \ + } while (0) + +/** Search for an ID in an IDL. + * @param[in] ids The IDL to search. + * @param[in] id The ID to search for. + * @return The index of the first ID greater than or equal to \b id. + */ +unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id); + +/** Allocate an IDL. + * Allocates memory for an IDL of the given size. + * @return IDL on success, NULL on failure. + */ +MDB_IDL mdb_midl_alloc(int num); + +/** Free an IDL. + * @param[in] ids The IDL to free. + */ +void mdb_midl_free(MDB_IDL ids); + +/** Shrink an IDL. + * Return the IDL to the default size if it has grown larger. + * @param[in,out] idp Address of the IDL to shrink. + */ +void mdb_midl_shrink(MDB_IDL *idp); + +/** Make room for num additional elements in an IDL. + * @param[in,out] idp Address of the IDL. + * @param[in] num Number of elements to make room for. + * @return 0 on success, ENOMEM on failure. + */ +int mdb_midl_need(MDB_IDL *idp, unsigned num); + +/** Append an ID onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The ID to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append(MDB_IDL *idp, MDB_ID id); + +/** Append an IDL onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] app The IDL to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app); + +/** Append an ID range onto an IDL. + * @param[in,out] idp Address of the IDL to append to. + * @param[in] id The lowest ID to append. + * @param[in] n Number of IDs to append. + * @return 0 on success, ENOMEM if the IDL is too large. + */ +int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n); + +/** Merge an IDL onto an IDL. The destination IDL must be big enough. + * @param[in] idl The IDL to merge into. + * @param[in] merge The IDL to merge. + */ +void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge); + +/** Sort an IDL. + * @param[in,out] ids The IDL to sort. + */ +void mdb_midl_sort(MDB_IDL ids); + +/* midl.c ------------------------------------------------------------------ */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y)) + +unsigned +mdb_midl_search(MDB_IDL ids, MDB_ID id) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while (0 < n) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP(ids[cursor], id); + + if (val < 0) { + n = pivot; + + } else if (val > 0) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if (val > 0) { + ++cursor; + } + return cursor; +} + +#if 0 /* superseded by append/sort */ +int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) +{ + unsigned x, i; + + x = mdb_midl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + assert(0); + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i=ids[0]; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = id; + } + + return 0; +} +#endif + +MDB_IDL +mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void +mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids - 1); +} + +void +mdb_midl_shrink(MDB_IDL *idp) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + } +} + +static int +mdb_midl_grow(MDB_IDL *idp, int num) +{ + MDB_IDL idn = *idp - 1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int +mdb_midl_need(MDB_IDL *idp, unsigned num) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num / 4 + (256 + 2)) & -256; + if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num - 2; + *idp = ids; + } + return 0; +} + +int +mdb_midl_append(MDB_IDL *idp, MDB_ID id) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int +mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int +mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +void +mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge) +{ + MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; + idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ + old_id = idl[j]; + while (i) { + merge_id = merge[i--]; + for (; old_id < merge_id; old_id = idl[--j]) + idl[k--] = old_id; + idl[k--] = merge_id; + } + idl[0] = total; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define MIDL_SWAP(a, b) \ + { \ + itmp = (a); \ + (a) = (b); \ + (b) = itmp; \ + } + +void +mdb_midl_sort(MDB_IDL ids) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int) * CHAR_BIT * 2]; + int i, j, k, l, ir, jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for (;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j = l + 1; j <= ir; j++) { + a = ids[j]; + for (i = j - 1; i >= 1; i--) { + if (ids[i] >= a) + break; + ids[i + 1] = ids[i]; + } + ids[i + 1] = a; + } + if (jstack == 0) + break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + MIDL_SWAP(ids[k], ids[l + 1]); + if (ids[l] < ids[ir]) { + MIDL_SWAP(ids[l], ids[ir]); + } + if (ids[l + 1] < ids[ir]) { + MIDL_SWAP(ids[l + 1], ids[ir]); + } + if (ids[l] < ids[l + 1]) { + MIDL_SWAP(ids[l], ids[l + 1]); + } + i = l + 1; + j = ir; + a = ids[l + 1]; + for (;;) { + do + i++; + while (ids[i] > a); + do + j--; + while (ids[j] < a); + if (j < i) + break; + MIDL_SWAP(ids[i], ids[j]); + } + ids[l + 1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir - i + 1 >= j - l) { + istack[jstack] = ir; + istack[jstack - 1] = i; + ir = j - 1; + } else { + istack[jstack] = j - 1; + istack[jstack - 1] = l; + l = i; + } + } + } +} +/* ------------------------------------------------------------------------- */ + +typedef MDB_ID pgno_t; + +char * +bytes_as(double bytes, char *s, size_t size) +{ + const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" }; + size_t i = 0; + + while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) { + bytes /= 1024; + i++; + } + + snprintf(s, size, "%.2f %s", bytes, units[i]); + return s; +} + +/** + * A "coin toss" function that is critical to the proper operation of the + * Skiplist. For example, when `max = 6` this function returns 0 with + * probability 0.5, 1 with 0.25, 2 with 0.125, etc. until 6 with 0.5^7. + */ +static int +toss(size_t max) +{ + size_t level = 0; + double probability = 0.5; + + double random_value = (double)xorshift32() / RAND_MAX; + while (random_value < probability && level < max) { + level++; + probability *= 0.5; + } + return level; +} + +bool +verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) +{ + pgno_t f = 1; + if (pg + len > list[0]) + return false; + while (list[f] != pg && f <= list[0]) + f++; + if (len == 1) + return true; + for (pgno_t i = f; i < f + len; i++) { + if (list[i + 1] != list[i] + 1) + return false; + } + return true; +} + +bool +verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) +{ + for (pgno_t i = pg; i < pg + len; i++) { + pgno_t f = 1; + while (list[f] != pg && f <= list[0]) + f++; + if (f != list[0]) + return false; + } + return true; +} + +bool +verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) +{ + for (pgno_t i = pg; i < pg + len; i++) { + if (sparsemap_is_set(map, i) != true) { + return false; + } + } + return true; +} + +bool +verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) +{ + for (pgno_t i = pg; i < pg + len; i++) { + if (sparsemap_is_set(map, i) != false) { + return false; + } + } + return true; +} + +bool +verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) +{ + // ensure all items in the MDB_IDL are set to true in the map + for (unsigned j = 1; j <= list[0]; j++) { + if (sparsemap_is_set(map, list[j]) == false) + return false; + } + // ensure all items not in the MDB_IDL are set to false in the map + unsigned j = 1, last_pgno = list[list[0]]; + for (unsigned i = 0; i <= last_pgno; i++) { + if (list[j] > i) { + do { + if (sparsemap_is_set(map, i) == true) + return false; + } while (i++ < list[j]); + } else { + if (sparsemap_is_set(map, i) == false) + return false; + if (list[j] == i) + j++; + } + } + return true; +} + +void +print_sizes(sparsemap_t *map, MDB_IDL list) +{ + char m[1024], l[1024]; + __diag("idl: %s bytes\tsm: %s bytes\n", bytes_as(MDB_IDL_SIZEOF(list), m, 1024), bytes_as(sparsemap_get_capacity(map), l, 1024)); +} + +/* + * A "soak test" that tries to replicate behavior in LMDB for page allocation. + */ +int +main() +{ + size_t iterations = 0; + bool prefer_mdb_idl_location = true; // xorshift32() % 2; + + // disable buffering + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + + __diag("starting...\n"); + + size_t amt = 1024 * 2; // 1024*1024 * 2; + MDB_IDL list = mdb_midl_alloc(amt); + sparsemap_t *map = sparsemap(3 * 1024); + + // start with 2GiB of 4KiB free pages to track: + // - MDB_IDL requires one int for each free page + // - Sparsemap will compress the set bits using less memory + mdb_midl_need(&list, amt); + for (size_t pg = 0; pg < amt; pg++) { + mdb_midl_xappend(list, pg); // listed page ids are free + sparsemap_set(map, pg, true); // true means free in our bitmap + } + print_sizes(map, list); + assert(verify_sm_eq_ml(map, list)); + + while (1) { + unsigned mi; + pgno_t ml = 0, sl = 0; + + // get an amount [1, 16] of pages to find prefering smaller sizes + unsigned n = toss(15) + 1; + + // find a set of pages using the MDB_IDL + { + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + int retry = 1; + unsigned i; + pgno_t pgno = 0, *mop = list; + unsigned n2 = n, mop_len = mop[0]; + if (mop_len > n2) { + i = mop_len; + if (n2 == 1) { + pgno = mop[mop_len]; + goto search_done; + } + do { + pgno = mop[i]; + if (mop[i - n2] == pgno - n2) + goto search_done; + } while (--i > n2); + if (--retry < 0) + break; + } + search_done:; + assert(pgno != 0); + ml = pgno - (n2 > 1 ? n2 - 1 : 0); + mi = i; + } + assert(verify_span_midl(list, ml, n)); + assert(verify_span_sparsemap(map, ml, n)); + + // find a set of pages using the Sparsemap + { + pgno_t pgno = sparsemap_span(map, 0, n, true); + assert(SPARSEMAP_NOT_FOUND(pgno) == false); + sl = pgno; + } + assert(verify_span_midl(list, sl, n)); + assert(verify_span_sparsemap(map, sl, n)); + + // acquire the set of pages within the list + if (prefer_mdb_idl_location) { + unsigned j, num = n; + int i = mi; + pgno_t *mop = list; + unsigned mop_len = mop[0]; + + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i - num; j < mop_len;) + mop[++j] = mop[++i]; + } else { + unsigned j, num = n; + int i = mdb_midl_search(list, sl) + num; + pgno_t *mop = list; + unsigned mop_len = mop[0]; + + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i - num; j < mop_len;) + mop[++j] = mop[++i]; + } + + // acquire the set of pages within the sparsemap + if (prefer_mdb_idl_location) { + for (pgno_t i = ml; i < ml + n; i++) { + sparsemap_set(map, i, false); + } + } else { + for (pgno_t i = sl; i <= sl + n; i++) { + sparsemap_set(map, i, false); + } + } + + assert(verify_sm_eq_ml(map, list)); + + // Once we've used half of the free list, let's replenish it a bit. + if (list[0] < amt / 2) { + do { + size_t len = toss(15) + 1; + pgno_t l, s = (pgno_t)xorshift32() % amt - len; + do { + l = sparsemap_span(map, s--, len, false); + } while(SPARSEMAP_NOT_FOUND(l)); + assert(verify_empty_midl(list, l, len)); + assert(verify_empty_sparsemap(map, l, len)); + for (int i = l; i < l + len; i++) { + mdb_midl_xappend(list, i); + sparsemap_set(map, i, true); + } + assert(verify_span_midl(list, l, len)); + assert(verify_span_sparsemap(map, l, len)); + } while (list[0] > amt - 32); + } + iterations++; + } + + return 0; +} diff --git a/include/sparsemap.h b/include/sparsemap.h index 689980b..233e530 100644 --- a/include/sparsemap.h +++ b/include/sparsemap.h @@ -89,7 +89,7 @@ extern "C" { typedef struct sparsemap sparsemap_t; typedef long int sparsemap_idx_t; -#define SPARSEMAP_IDX_MAX ((1UL << (sizeof(long) * CHAR_BIT - 1)) - 1) +#define SPARSEMAP_IDX_MAX ((1UL << (sizeof(long int) * CHAR_BIT - 1)) - 1) #define SPARSEMAP_IDX_MIN (-(SPARSEMAP_IDX_MAX)-1) #define SPARSEMAP_NOT_FOUND(_x) ((_x) == SPARSEMAP_IDX_MAX || (_x) == SPARSEMAP_IDX_MIN) typedef uint32_t sm_idx_t; diff --git a/src/sparsemap.c b/src/sparsemap.c index c5d88c5..3c05a55 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -902,11 +902,11 @@ sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size) } void -sparsemap_open(sparsemap_t *map, uint8_t *data, size_t data_size) +sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size) { map->m_data = data; map->m_data_used = map->m_data_used > 0 ? map->m_data_used : 0; - map->m_capacity = data_size; + map->m_capacity = size; } /* @@ -1410,17 +1410,18 @@ sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value) size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) { - size_t count, nth = 0; + size_t count, nth; sm_bitvec_t vec = 0; sparsemap_idx_t offset; + nth = (idx > 0) ? sparsemap_rank(map, 0, idx - 1, value) : 0; offset = sparsemap_select(map, nth++, value); if (len == 1) { return offset; } do { - count = sparsemap_rank_vec(map, offset, offset + len, value, &vec); - if (count == len) { + count = sparsemap_rank_vec(map, offset, offset + len - 1, value, &vec); + if (count >= len) { return offset; } else { // TODO: what is nth when len > SM_BITS_PER_VECTOR? diff --git a/tests/common.c b/tests/common.c index 4450f4b..70514d9 100644 --- a/tests/common.c +++ b/tests/common.c @@ -354,9 +354,9 @@ print_bits(char *name, uint64_t value) } void -sm_bitmap_from_uint64(sparsemap_t *map, uint64_t number) +sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number) { - for (int i = 0; i < 64; i++) { + for (int i = offset; i < 64; i++) { bool bit = number & ((uint64_t)1 << i); sparsemap_set(map, i, bit); } @@ -384,15 +384,15 @@ sm_add_span(sparsemap_t *map, int map_size, int span_length) } void -sm_whats_set(sparsemap_t *map, int m) +sm_whats_set(sparsemap_t *map, int off, int len) { - logf("what's set in the range [0, %d): ", m); - for (int i = 0; i < m; i++) { + printf("what's set in the range [%d, %d): ", off, off + len); + for (int i = off; i < off + len; i++) { if (sparsemap_is_set(map, i)) { - logf("%d ", i); + printf("%d ", i); } } - logf("\n"); + printf("\n"); } bool diff --git a/tests/common.h b/tests/common.h index 87b75fc..02dda1a 100644 --- a/tests/common.h +++ b/tests/common.h @@ -46,11 +46,11 @@ sparsemap_idx_t sm_add_span(sparsemap_t *map, int map_size, int span_length); void print_bits(char *name, uint64_t value); void bitmap_from_uint32(sparsemap_t *map, uint32_t number); -void sm_bitmap_from_uint64(sparsemap_t *map, uint64_t number); +void sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number); uint32_t rank_uint64(uint64_t number, int n, int p); int whats_set_uint64(uint64_t number, int bitPositions[64]); -void sm_whats_set(sparsemap_t *map, int m); +void sm_whats_set(sparsemap_t *map, int off, int len); bool sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value); bool sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value); diff --git a/tests/test.c b/tests/test.c index f52d833..c3d4ef6 100644 --- a/tests/test.c +++ b/tests/test.c @@ -523,7 +523,7 @@ test_api_scan_setup(const MunitParameter params[], void *user_data) sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data); sparsemap_init(map, buf, 1024); - sm_bitmap_from_uint64(map, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); + sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); return (void *)map; } @@ -610,7 +610,7 @@ test_api_select_setup(const MunitParameter params[], void *user_data) sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data); sparsemap_init(map, buf, 1024); - sm_bitmap_from_uint64(map, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); + sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); return (void *)map; } @@ -648,7 +648,7 @@ test_api_select_false_setup(const MunitParameter params[], void *user_data) sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data); sparsemap_init(map, buf, 1024); - sm_bitmap_from_uint64(map, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); + sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); return (void *)map; } @@ -688,7 +688,7 @@ test_api_select_neg_setup(const MunitParameter params[], void *user_data) sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data); sparsemap_init(map, buf, 1024); - sm_bitmap_from_uint64(map, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); + sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee); return (void *)map; } @@ -974,6 +974,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data) return MUNIT_OK; } +#ifdef SCALE_ONDREJ static void * test_scale_ondrej_setup(const MunitParameter params[], void *user_data) { @@ -999,7 +1000,8 @@ test_scale_ondrej(const MunitParameter params[], void *data) assert_ptr_not_null(map); sparsemap_idx_t stride = 18; - sparsemap_idx_t top = 268435456; + // sparsemap_idx_t top = 268435456; + sparsemap_idx_t top = 2000; sparsemap_idx_t needle = munit_rand_int_range(1, top / stride); for (sparsemap_idx_t i = 0; i < top / stride; i += stride) { for (sparsemap_idx_t j = 0; j < stride; j++) { @@ -1010,7 +1012,7 @@ test_scale_ondrej(const MunitParameter params[], void *data) errno = 0; } } - assert_true(sm_is_span(map, i + ((i != needle) ? 10 : 9), (i != needle) ? 8 : 9, false)); + assert_true(sm_is_span(map, i + ((i != needle) ? 10 : 9), (i != needle) ? 8 : 9, true)); } sparsemap_idx_t a = sparsemap_span(map, 0, 9, false); sparsemap_idx_t l = a / stride; @@ -1018,6 +1020,31 @@ test_scale_ondrej(const MunitParameter params[], void *data) assert_true(l == needle); return MUNIT_OK; } +#endif // SCALE_ONDREJ + +static void * +test_scale_fuzz_setup(const MunitParameter params[], void *user_data) +{ + (void)params; + (void)user_data; + sparsemap_t *map = sparsemap(10 * 1024); + assert_ptr_not_null(map); + return (void *)map; +} +static void +test_scale_fuzz_tear_down(void *fixture) +{ + sparsemap_t *map = (sparsemap_t *)fixture; + assert_ptr_not_null(map); + munit_free(map); +} +static MunitResult +test_scale_fuzz(const MunitParameter params[], void *data) +{ + sparsemap_t *map = (sparsemap_t *)data; + (void)params; + return MUNIT_OK; +} static void * test_scale_spans_come_spans_go_setup(const MunitParameter params[], void *user_data) @@ -1038,7 +1065,7 @@ test_scale_spans_come_spans_go_tear_down(void *fixture) static MunitResult test_scale_spans_come_spans_go(const MunitParameter params[], void *data) { - size_t amt = 8192; // 268435456, ~5e7 interations due to 2e9 / avg(l) + size_t amt = 8192; // 268435456; // ~5e7 interations due to 2e9 / avg(l) sparsemap_t *map = (sparsemap_t *)data; (void)params; @@ -1048,7 +1075,8 @@ test_scale_spans_come_spans_go(const MunitParameter params[], void *data) int l = i % 31 + 16; sm_add_span(map, amt, l); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024); + assert_ptr_not_null(map); errno = 0; } @@ -1275,7 +1303,10 @@ test_perf_span_tainted(const MunitParameter params[], void *data) // clang-format off static MunitTest scale_test_suite[] = { { (char *)"/lots-o-spans", test_scale_lots_o_spans, test_scale_lots_o_spans_setup, test_scale_lots_o_spans_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, - { (char *)"/ondrej", test_scale_ondrej, test_scale_ondrej_setup, test_scale_ondrej_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, +#ifdef SCALE_ONDREJ +{ (char *)"/ondrej", test_scale_ondrej, test_scale_ondrej_setup, test_scale_ondrej_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, +#endif + { (char *)"/fuzz", test_scale_fuzz, test_scale_fuzz_setup, test_scale_fuzz_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, { (char *)"/spans_come_spans_go", test_scale_spans_come_spans_go, test_scale_spans_come_spans_go_setup, test_scale_spans_come_spans_go_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, { (char *)"/best-case", test_scale_best_case, test_scale_best_case_setup, test_scale_best_case_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, { (char *)"/worst-case", test_scale_worst_case, test_scale_worst_case_setup, test_scale_worst_case_tear_down, MUNIT_TEST_OPTION_NONE, NULL }, -- 2.45.2 From 6630fc759314039cff4d84f730b40c7c52769d41 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 26 Apr 2024 11:46:26 -0400 Subject: [PATCH 2/3] soak seems to be working --- examples/soak.c | 112 ++++++++++---------- include/sparsemap.h | 246 +++++++++++++++++++++++++++++++++----------- src/sparsemap.c | 76 ++++++++------ tests/test.c | 14 ++- 4 files changed, 301 insertions(+), 147 deletions(-) diff --git a/examples/soak.c b/examples/soak.c index 8c95d8d..6827b85 100644 --- a/examples/soak.c +++ b/examples/soak.c @@ -157,7 +157,6 @@ mdb_midl_search(MDB_IDL ids, MDB_ID id) return cursor; } -#if 0 /* superseded by append/sort */ int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) { unsigned x, i; @@ -190,7 +189,6 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) return 0; } -#endif MDB_IDL mdb_midl_alloc(int num) @@ -425,20 +423,36 @@ toss(size_t max) return level; } +bool +verify_midl_contains(MDB_IDL list, pgno_t pg) +{ + unsigned index = mdb_midl_search(list, pg); + return index <= list[0] && list[index] == pg; +} + +bool +verify_midl_nodups(MDB_IDL list) +{ + pgno_t id = 1; + while (id < list[0]) { + if (list[id] == list[id + 1]) + return false; + id++; + } + return true; +} + bool verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) { - pgno_t f = 1; - if (pg + len > list[0]) + pgno_t f = mdb_midl_search(list, pg); + bool found = (list[f] == pg) && (f <= list[0]); + if (!found) return false; - while (list[f] != pg && f <= list[0]) - f++; if (len == 1) return true; - for (pgno_t i = f; i < f + len; i++) { - if (list[i + 1] != list[i] + 1) - return false; - } + if (list[len] + 1 != list[len - 1]) + return false; return true; } @@ -446,10 +460,9 @@ bool verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) { for (pgno_t i = pg; i < pg + len; i++) { - pgno_t f = 1; - while (list[f] != pg && f <= list[0]) - f++; - if (f != list[0]) + pgno_t f = mdb_midl_search(list, pg); + bool found = list[f] == pg && f <= list[0]; + if (found) return false; } return true; @@ -480,26 +493,12 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) bool verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) { - // ensure all items in the MDB_IDL are set to true in the map - for (unsigned j = 1; j <= list[0]; j++) { - if (sparsemap_is_set(map, list[j]) == false) + for (unsigned i = 0; i <= list[1]; i++) { + pgno_t f = mdb_midl_search(list, i); + bool found = list[f] == i && f <= list[0]; + if (sparsemap_is_set(map, i) != found) return false; } - // ensure all items not in the MDB_IDL are set to false in the map - unsigned j = 1, last_pgno = list[list[0]]; - for (unsigned i = 0; i <= last_pgno; i++) { - if (list[j] > i) { - do { - if (sparsemap_is_set(map, i) == true) - return false; - } while (i++ < list[j]); - } else { - if (sparsemap_is_set(map, i) == false) - return false; - if (list[j] == i) - j++; - } - } return true; } @@ -517,7 +516,7 @@ int main() { size_t iterations = 0; - bool prefer_mdb_idl_location = true; // xorshift32() % 2; + bool prefer_mdb_idl_location = (bool)xorshift32() % 2; // disable buffering setvbuf(stdout, NULL, _IONBF, 0); @@ -525,7 +524,7 @@ main() __diag("starting...\n"); - size_t amt = 1024 * 2; // 1024*1024 * 2; + size_t amt = 1024 * 2; // 1024 * 1024 * 2; MDB_IDL list = mdb_midl_alloc(amt); sparsemap_t *map = sparsemap(3 * 1024); @@ -537,6 +536,7 @@ main() mdb_midl_xappend(list, pg); // listed page ids are free sparsemap_set(map, pg, true); // true means free in our bitmap } + mdb_midl_sort(list); print_sizes(map, list); assert(verify_sm_eq_ml(map, list)); @@ -558,21 +558,16 @@ main() unsigned n2 = n, mop_len = mop[0]; if (mop_len > n2) { i = mop_len; - if (n2 == 1) { - pgno = mop[mop_len]; - goto search_done; - } do { pgno = mop[i]; - if (mop[i - n2] == pgno - n2) + if (mop[i - n2] == pgno + n2) goto search_done; } while (--i > n2); if (--retry < 0) break; } search_done:; - assert(pgno != 0); - ml = pgno - (n2 > 1 ? n2 - 1 : 0); + ml = pgno; mi = i; } assert(verify_span_midl(list, ml, n)); @@ -598,6 +593,8 @@ main() /* Move any stragglers down */ for (j = i - num; j < mop_len;) mop[++j] = mop[++i]; + for (j = mop_len + 1; j <= mop[-1]; j++) + mop[j] = 0; } else { unsigned j, num = n; int i = mdb_midl_search(list, sl) + num; @@ -626,21 +623,30 @@ main() // Once we've used half of the free list, let's replenish it a bit. if (list[0] < amt / 2) { do { - size_t len = toss(15) + 1; - pgno_t l, s = (pgno_t)xorshift32() % amt - len; + pgno_t pg; + size_t len, retries = amt; do { - l = sparsemap_span(map, s--, len, false); - } while(SPARSEMAP_NOT_FOUND(l)); - assert(verify_empty_midl(list, l, len)); - assert(verify_empty_sparsemap(map, l, len)); - for (int i = l; i < l + len; i++) { - mdb_midl_xappend(list, i); - sparsemap_set(map, i, true); + len = toss(15) + 1; + pg = sparsemap_span(map, 0, len, false); + } while (SPARSEMAP_NOT_FOUND(pg) && --retries); + if (SPARSEMAP_FOUND(pg)) { + assert(verify_empty_midl(list, pg, len)); + assert(verify_empty_sparsemap(map, pg, len)); + for (int i = pg; i < pg + len; i++) { + if (pg + len > list[-1]) + mdb_midl_need(&list, pg + len); + assert(verify_midl_contains(list, i) == false); + mdb_midl_insert(list, i); + sparsemap_set(map, i, true); + } + mdb_midl_sort(list); + assert(verify_midl_nodups(list)); + assert(verify_span_midl(list, pg, len)); + assert(verify_span_sparsemap(map, pg, len)); } - assert(verify_span_midl(list, l, len)); - assert(verify_span_sparsemap(map, l, len)); - } while (list[0] > amt - 32); + } while (list[0] < amt - 32); } + print_sizes(map, list); iterations++; } diff --git a/include/sparsemap.h b/include/sparsemap.h index 233e530..b58450c 100644 --- a/include/sparsemap.h +++ b/include/sparsemap.h @@ -89,113 +89,239 @@ extern "C" { typedef struct sparsemap sparsemap_t; typedef long int sparsemap_idx_t; -#define SPARSEMAP_IDX_MAX ((1UL << (sizeof(long int) * CHAR_BIT - 1)) - 1) -#define SPARSEMAP_IDX_MIN (-(SPARSEMAP_IDX_MAX)-1) -#define SPARSEMAP_NOT_FOUND(_x) ((_x) == SPARSEMAP_IDX_MAX || (_x) == SPARSEMAP_IDX_MIN) +#define SPARSEMAP_IDX_MAX LONG_MAX +#define SPARSEMAP_IDX_MIN LONG_MIN +#define SPARSEMAP_FOUND(x) ((x) < SPARSEMAP_IDX_MAX || (x) > SPARSEMAP_IDX_MIN) +#define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX || (x) == SPARSEMAP_IDX_MIN) typedef uint32_t sm_idx_t; typedef uint64_t sm_bitvec_t; -/** - * Create a new, empty sparsemap_t with a buffer of |size|. - * Default when set to 0 is 1024. +/** @brief Allocate a new, empty sparsemap_t with a buffer of \b size on the + * heap to use for storage of bitmap data. + * + * The buffer used for the bitmap is allocated in the same heap allocation as + * the structure, this means that you only need to call free() on the returned + * object to free all resources. Using this method it is allowable to grow the + * buffer size by calling #sparsemap_set_data_size(). This function calls + * #sparsemap_init(). + * + * @param[in] size The starting size of the buffer used for the bitmap, default + * is 1024 bytes. + * @returns The newly allocated sparsemap reference. */ sparsemap_t *sparsemap(size_t size); -/** - * Allocate on a sparsemap_t on the heap to wrap the provided fixed-size - * buffer (heap or stack allocated). +/** @brief Allocate a new, empty sparsemap_t that references (wraps) the buffer + * \b data of \b size bytes to use for storage of bitmap data. + * + * This function allocates a new sparsemap_t but not the buffer which is + * provided by the caller as \b data which can be allocated on the stack or + * heap. Caller is responsible for calling free() on the returned heap object + * and releasing the memory used for \b data. Resizing the buffer is not + * directly supported, you may attempt to resize by calling + * #sparsemap_set_data_size() with the potentially relocated address of \b data. + * This function calls #sparsemap_init(). + * + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. + * @returns The newly allocated sparsemap reference. */ sparsemap_t *sparsemap_wrap(uint8_t *data, size_t size); -/** - * Initialize a (possibly stack allocated) sparsemap_t with data (potentially - * also on the stack). +/** @brief Initialize an existing sparsemap_t by assigning \b data of \b size + * bytes for storage of bitmap data. + * + * Given the address of an existing \b map allocated on the stack or heap this + * function will initialize the datastructure and use the provided \b data of + * \b size for bitmap data. Caller is responsible for all memory management. + * Resizing the buffer is not directly supported, you + * may resize it and call #sparsemap_set_data_size() and then ensure that should + * the address of the object changed you need to update it by calling #sparsemap_ + * m_data field. + * + * @param[in] map The sparsemap reference. + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. */ void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size); -/** - * Opens an existing sparsemap contained within the specified buffer. +/** @brief Opens, without initializing, an existing sparsemap contained within + * the specified buffer. + * + * Given the address of an existing \b map this function will assign to the + * provided datastructure \b data of \b size for bitmap data. Caller is + * responsible for all memory management. Use this when as a way to + * "deserialize" bytes and make them ready for use as a bitmap. + * + * @param[in] map The sparsemap reference. + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. */ -void sparsemap_open(sparsemap_t *, uint8_t *data, size_t data_size); +void sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size); -/** - * Resets values and empties the buffer making it ready to accept new data. +/** @brief Resets values and empties the buffer making it ready to accept new + * data. + * + * @param[in] map The sparsemap reference. */ void sparsemap_clear(sparsemap_t *map); -/** - * Resizes the data range within the limits of the provided buffer, the map may - * move to a new address returned iff the map was created with the sparsemap() API. - * Take care to use the new reference (think: realloc()). NOTE: If the returned - * value equals NULL then the map was not resized. +/** @brief Update the size of the buffer \b data used for storing the bitmap. + * + * When called with \b data NULL on a \b map that was created with #sparsemap() + * this function will reallocate the storage for both the map and data possibly + * changing the address of the map itself so it is important for the caller to + * update all references to this map to the address returned in this scenario. + * Access to stale references will result in memory violations and program + * termination. Caller is not required to free() the old address, only the new + * one should it have changed. This uses #realloc() under the covers, all + * caveats apply here as well. + * + * When called referencing a \b map that was allocate by the caller this + * function will only update the values within the datastructure. + * + * @param[in] map The sparsemap reference. + * @param[in] size The desired size of the buffer \b data used for the bitmap. + * @returns The -- potentially changed -- sparsemap reference, or NULL should a + * #realloc() fail (\b ENOMEM) + * @note The resizing of caller supplied allocated objects is not yet fully + * supported. */ -sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, size_t data_size); +sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, size_t size, uint8_t *data); -/** - * Calculate remaining capacity, approaches 0 when full. +/** @brief Calculate remaining capacity, approaches 0 when full. + * + * Provides an estimate in the range [0.0, 100.0] of the remaining capacity of + * the buffer storing bitmap data. This can change up or down as more data + * is added/removed due to the method for compressed representation, do not + * expect a smooth progression either direction. This is a rough estimate only + * and may also jump in value after seemingly indiscriminate changes to the map. + * + * @param[in] map The sparsemap reference. + * @returns an estimate for remaining capacity that approaches 0.0 when full or + * 100.0 when empty */ double sparsemap_capacity_remaining(sparsemap_t *map); -/** - * Returns the capacity of the underlying byte array. +/** @brief Returns the capacity of the underlying byte array in bytes. + * + * Specifically, this returns the byte \b size provided for the underlying + * buffer used to store bitmap data. + * + * @param[in] map The sparsemap reference. + * @returns byte size of the buffer used for storing bitmap data */ size_t sparsemap_get_capacity(sparsemap_t *map); -/** - * Returns the value of a bit at index |idx|, either on/true/1 or off/false/0. +/** @brief Returns the value of a bit at index \b idx, either true for "set" (1) + * or \b false for "unset" (0). + * * When |idx| is negative it is an error. + * + * @param[in] map The sparsemap reference. + * @param[in] idx The 0-based offset into the bitmap index to examine. + * @returns either true or false; a negative idx is an error and always returns + * false + * @todo Support for negative relative offset in \idx. */ bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx); -/** - * Sets the bit at index |idx| to true or false, depending on |value|. - * When |idx| is negative is it an error. Returns the |idx| supplied or - * SPARSEMAP_IDX_MAX on error with |errno| set to ENOSP when the map is full. +/** @brief Sets the bit at index \b idx to \b value. + * + * A sparsemap has a fixed size buffer with a capacity that can be exhausted by + * when calling this function. In such cases the return value is not equal to + * the provided \b idx and errno is set to ENOSPC. In such situations it is + * possible to grow the data size and retry the set() operation under certain + * circumstances (see #sparsemap() and #sparsemap_set_data_size()). + * + * @param[in] map The sparsemap reference. + * @param[in] idx The 0-based offset into the bitmap index to modify. + * @returns the \b idx supplied on success or SPARSEMAP_IDX_MIN/MAX on error + * with \b errno set to ENOSPC when the map is full; a negative idx is an error + * and always returns SPARSEMAP_IDX_MIN. */ sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value); -/** - * Returns the offset of the very first/last bit in the map. - */ -sm_idx_t sparsemap_get_starting_offset(sparsemap_t *map); - -/** - * Returns the used size in the data buffer in bytes. +/** @brief Returns the byte size of the data buffer that has been used thus far. + * + * @param[in] map The sparsemap reference. + * @returns the byte size of the data buffer that has been used thus far */ size_t sparsemap_get_size(sparsemap_t *map); -/** - * Decompresses the whole bitmap; calls scanner for all bits with a set of - * |n| vectors |vec| each a sm_bitmap_t which can be masked and read using - * bit operators to read the values for each position in the bitmap index. - * Setting |skip| will start the scan after "skip" bits. +/** @brief Provides a method for a callback function to examine every bit set in + * the index. + * + * This decompresses the whole bitmap and invokes #scanner() passing a 64bit + * "vector" of bits in order from 0 index to the end of the map. Using standard + * bit masking techniques it is possible to read each bit from LSB to MSB in + * these vectors to read the entire content of the bitmap index (see + * examples/ex_4.c). + * + * @param[in] map The sparsemap reference. + * @param[in] skip Start the scan after "skip" bits. */ void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n), size_t skip); -/** - * Appends all chunk maps from |map| starting at |offset| to |other|, then - * reduces the chunk map-count appropriately. +/** @brief Splits the bitmap by assigning all bits starting at \b offset to the + * \b other bitmap while removing them from \b map. + * + * @param[in] map The sparsemap reference. + * @param[in] skip Start the scan after "skip" bits. */ void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other); -/** - * Finds the offset of the n'th bit either set (|value| is true) or unset - * (|value| is false) from the start (positive |n|), or end (negative |n|), - * of the bitmap and returns that (uses a 0-based index). Returns -inf or +inf - * if not found (where "inf" is SPARSEMAP_IDX_MAX and "-inf" is SPARSEMAP_IDX_MIN). +/** @brief Finds the index of the \b n'th bit set to \b value. + * + * Locates the \b n'th bit either set, \b value is true, or unset, \b value is + * false, from the start, positive \b n, or end, negative \b n, of the bitmap. + * So, if your bit pattern is: ```1101 1110 1010 1101 1011 1110 1110 1111``` and + * you request the first set bit the result is `0` (meaning the 1st bit in the + * map which is index 0 because this is 0-based indexing). The first unset bit + * is `2` (or the third bit in the pattern). When n is 3 and value is true the + * result would be `3` (the fourth bit, or the third set bit which is at index + * 3 when 0-based). + * + * @param[in] map The sparsemap reference. + * @param[in] n Specifies how many bits to ignore (when n=3 return the position + * of the third matching bit). + * @param[in] value Determines if the search is to examine set (true) or unset + * (false) bits in the bitmap index. + * @returns the 0-based index of the located bit position within the map; when + * not found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN. */ sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value); -/** - * Counts the set (|value| is true) or unset (|value| is false) bits starting - * at |x| bits (0-based) in the range [x, y] (inclusive on either end). +/** @brief Counts the bits matching \b value in the provided range, [\b x, \b + * y]. + * + * Counts the set, \b value is true, or unset, \b value is false, bits starting + * at the \b idx'th bit (0-based) in the range [\b x, \b y] (inclusive on either + * end). If range is [0, 0] this examines 1 bit, the first one in the map, and + * returns 1 if value is true and the bit was set. + * + * @param[in] map The sparsemap reference. + * @param[in] x 0-based start of the inclusive range to examine. + * @param[in] y 0-based end of the inclusive range to examine. + * @param[in] value Determines if the scan is to count the set (true) or unset + * (false) bits in the range. + * @returns the count of bits found within the range that match the \b value */ size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value); -/** - * Finds the first span (i.e. a contiguous set of bits), in the bitmap that - * are set (|value| is true) or unset (|value| is false) and returns the - * starting offset for the span (0-based). +/** @brief Locates the first contiguous set of bits of \b len starting at \b idx + * matching \b value in the bitmap. + * + * @param[in] map The sparsemap reference. + * @param[in] idx 0-based start of search within the bitmap. + * @param[in] len The length of contiguous bits we're seeking. + * @param[in] value Determines if the scan is to find all set (true) or unset + * (false) bits of \b len. + * @returns the index of the first bit matching the criteria; when not found not + * found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN. */ size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value); diff --git a/src/sparsemap.c b/src/sparsemap.c index 3c05a55..bc97738 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -395,19 +395,19 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, sm_bi } /** - * Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the - * n'th bit was found in this __sm_chunk_t, or to the new, reduced - * value of |n|. + * Returns the index of the offset'th set bit; sets |*pnew_n| to 0 if the + * offset'th bit was found in this __sm_chunk_t, or to the new, reduced + * value of |offset|. */ static size_t -__sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) +__sm_chunk_map_select(__sm_chunk_t *map, size_t offset, ssize_t *pnew_n, bool value) { size_t ret = 0; register uint8_t *p; p = (uint8_t *)map->m_data; for (size_t i = 0; i < sizeof(sm_bitvec_t); i++, p++) { - if (*p == 0) { + if (*p == 0 && value) { ret += (size_t)SM_FLAGS_PER_INDEX_BYTE * SM_BITS_PER_VECTOR; continue; } @@ -418,28 +418,28 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) continue; } if (flags == SM_PAYLOAD_ZEROS) { - if (value) { + if (value == true) { ret += SM_BITS_PER_VECTOR; continue; } else { - if (n > SM_BITS_PER_VECTOR) { - n -= SM_BITS_PER_VECTOR; + if (offset > SM_BITS_PER_VECTOR) { + offset -= SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR; continue; } *pnew_n = -1; - return ret + n; + return ret + offset; } } if (flags == SM_PAYLOAD_ONES) { if (value) { - if (n > SM_BITS_PER_VECTOR) { - n -= SM_BITS_PER_VECTOR; + if (offset > SM_BITS_PER_VECTOR) { + offset -= SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR; continue; } *pnew_n = -1; - return ret + n; + return ret + offset; } else { ret += SM_BITS_PER_VECTOR; continue; @@ -450,20 +450,20 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) for (int k = 0; k < SM_BITS_PER_VECTOR; k++) { if (value) { if (w & ((sm_bitvec_t)1 << k)) { - if (n == 0) { + if (offset == 0) { *pnew_n = -1; return ret; } - n--; + offset--; } ret++; } else { if (!(w & ((sm_bitvec_t)1 << k))) { - if (n == 0) { + if (offset == 0) { *pnew_n = -1; return ret; } - n--; + offset--; } ret++; } @@ -471,7 +471,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) } } } - *pnew_n = (ssize_t)n; + *pnew_n = (ssize_t)offset; return ret; } @@ -914,11 +914,13 @@ sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size) * data_size is not exceeding the size of the underlying buffer. */ sparsemap_t * -sparsemap_set_data_size(sparsemap_t *map, size_t size) +sparsemap_set_data_size(sparsemap_t *map, size_t size, uint8_t *data) { - if ((uintptr_t)map->m_data == (uintptr_t)map + sizeof(sparsemap_t) && size > map->m_capacity) { - /* This sparsemap was allocated by the sparsemap() API, we can resize it. */ - size_t data_size = (size * sizeof(uint8_t)); + size_t data_size = (size * sizeof(uint8_t)); + + /* If this sparsemap was allocated by the sparsemap() API and we're not handed + a new data, it's up to us to resize it. */ + if (data == NULL && (uintptr_t)map->m_data == (uintptr_t)map + sizeof(sparsemap_t) && size > map->m_capacity) { /* Ensure that m_data is 8-byte aligned. */ size_t total_size = sizeof(sparsemap_t) + data_size; @@ -934,6 +936,9 @@ sparsemap_set_data_size(sparsemap_t *map, size_t size) m->m_data = (uint8_t *)(((uintptr_t)m + sizeof(sparsemap_t)) & ~(uintptr_t)7); __sm_when_diag({ __sm_assert(IS_8_BYTE_ALIGNED(m->m_data)); }) return m; } else { + if (data != NULL && data_size > sparsemap_get_capacity(map) && data != map->m_data) { + map->m_data = data; + } map->m_capacity = size; return map; } @@ -1131,7 +1136,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value) return idx; } -sm_idx_t +sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map) { size_t count = __sm_get_chunk_map_count(map); @@ -1139,7 +1144,7 @@ sparsemap_get_starting_offset(sparsemap_t *map) return 0; } sm_idx_t *chunk = (sm_idx_t *)__sm_get_chunk_map_data(map, 0); - return *chunk; + return (sparsemap_idx_t)*chunk; } /** @@ -1304,13 +1309,19 @@ sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); - size_t result; + sm_idx_t start; size_t count = __sm_get_chunk_map_count(map); + if (n >= 0) { uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { - result = *(sm_idx_t *)p; + start = *(sm_idx_t *)p; + /* Start of this chunk is greater than n meaning there are a set of 0s + before the first 1 sufficient to consume n. */ + if (value == false && i == 0 && start > n) { + return n; + } p += sizeof(sm_idx_t); __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); @@ -1318,15 +1329,20 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) ssize_t new_n = (ssize_t)n; size_t index = __sm_chunk_map_select(&chunk, n, &new_n, value); if (new_n == -1) { - return result + index; + return start + index; } n = new_n; p += __sm_chunk_map_get_size(&chunk); } - return SPARSEMAP_IDX_MAX; // TODO... shouldn't be here? + if (value) { + return SPARSEMAP_IDX_MAX; + } else { + return count * SM_CHUNK_MAX_CAPACITY + 1; + } } else { - return SPARSEMAP_IDX_MIN; // TODO... sparsemap_select(map, -n, value); seek from end, not start + // TODO... sparsemap_select(map, -n, value); seek from end, not start + return SPARSEMAP_IDX_MIN; } } @@ -1416,7 +1432,9 @@ sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) nth = (idx > 0) ? sparsemap_rank(map, 0, idx - 1, value) : 0; offset = sparsemap_select(map, nth++, value); - if (len == 1) { + if (SPARSEMAP_NOT_FOUND(offset)) + offset = 0; + else if (len == 1) { return offset; } do { diff --git a/tests/test.c b/tests/test.c index c3d4ef6..bf4220b 100644 --- a/tests/test.c +++ b/tests/test.c @@ -103,7 +103,7 @@ test_api_new_realloc(const MunitParameter params[], void *data) assert_true(map->m_capacity == 1024); assert_true(map->m_data_used == sizeof(uint32_t)); - map = sparsemap_set_data_size(map, 2048); + map = sparsemap_set_data_size(map, 2048, NULL); assert_true(map->m_capacity == 2048); assert_true(map->m_data_used == sizeof(uint32_t)); @@ -276,7 +276,7 @@ test_api_set_data_size(const MunitParameter params[], void *data) assert_ptr_not_null(map); assert_true(map->m_capacity == 1024); assert_true(map->m_capacity == sparsemap_get_capacity(map)); - sparsemap_set_data_size(map, 512); + sparsemap_set_data_size(map, 512, NULL); assert_true(map->m_capacity == 512); assert_true(map->m_capacity == sparsemap_get_capacity(map)); return MUNIT_OK; @@ -445,6 +445,9 @@ test_api_set(const MunitParameter params[], void *data) return MUNIT_OK; } +// TODO remove? not public API anymore... +extern sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map); + static void * test_api_get_starting_offset_setup(const MunitParameter params[], void *user_data) { @@ -962,7 +965,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data) // TODO: sm_add_span(map, amt, l); sm_add_span(map, 10000, l); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL); errno = 0; } i += l; @@ -1008,7 +1011,7 @@ test_scale_ondrej(const MunitParameter params[], void *data) bool set = (i != needle) ? (j < 10) : (j < 9); sparsemap_set(map, i, set); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL); errno = 0; } } @@ -1043,6 +1046,7 @@ test_scale_fuzz(const MunitParameter params[], void *data) { sparsemap_t *map = (sparsemap_t *)data; (void)params; + (void)map; //TODO... return MUNIT_OK; } @@ -1075,7 +1079,7 @@ test_scale_spans_come_spans_go(const MunitParameter params[], void *data) int l = i % 31 + 16; sm_add_span(map, amt, l); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024, NULL); assert_ptr_not_null(map); errno = 0; } -- 2.45.2 From 834fe6d5883a1d5036764a51f541d255dcdeee65 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 26 Apr 2024 16:24:20 -0400 Subject: [PATCH 3/3] fixes --- examples/ex_2.c | 2 +- examples/soak.c | 119 ++++++++++++++++++++++++++++++++++++++---------- src/sparsemap.c | 2 +- 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/examples/ex_2.c b/examples/ex_2.c index d2250b6..d897be4 100644 --- a/examples/ex_2.c +++ b/examples/ex_2.c @@ -38,7 +38,7 @@ main(void) } } // On 1024 KiB of buffer with every other bit set the map holds 7744 bits - // and then runs out of space. This next _set() call will fail/abort. + // and then runs out of space. This next _set() call will fail. sparsemap_set(map, ++i, true); assert(sparsemap_is_set(map, i) == true); return 0; diff --git a/examples/soak.c b/examples/soak.c index 6827b85..b90ad58 100644 --- a/examples/soak.c +++ b/examples/soak.c @@ -75,6 +75,13 @@ void mdb_midl_free(MDB_IDL ids); */ void mdb_midl_shrink(MDB_IDL *idp); +/** Shrink an IDL to a specific size. + * Resize the IDL to \b size if it is larger. + * @param[in,out] idp Address of the IDL to shrink. + * @param[in] size Capacity to have once resized. + */ +void mdb_midl_shrink(MDB_IDL *idp); + /** Make room for num additional elements in an IDL. * @param[in,out] idp Address of the IDL. * @param[in] num Number of elements to make room for. @@ -218,6 +225,17 @@ mdb_midl_shrink(MDB_IDL *idp) } } +void +mdb_midl_shrink_to(MDB_IDL *idp, size_t size) +{ + MDB_IDL ids = *idp; + if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) { + *ids++ = size; + *idp = ids; + *idp[0] = *idp[0] > size ? size : *idp[0]; + } +} + static int mdb_midl_grow(MDB_IDL *idp, int num) { @@ -426,8 +444,8 @@ toss(size_t max) bool verify_midl_contains(MDB_IDL list, pgno_t pg) { - unsigned index = mdb_midl_search(list, pg); - return index <= list[0] && list[index] == pg; + unsigned idx = mdb_midl_search(list, pg); + return idx <= list[0] && list[idx] == pg; } bool @@ -445,8 +463,8 @@ verify_midl_nodups(MDB_IDL list) bool verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) { - pgno_t f = mdb_midl_search(list, pg); - bool found = (list[f] == pg) && (f <= list[0]); + pgno_t idx = mdb_midl_search(list, pg); + bool found = idx <= list[0] && list[idx] == pg; if (!found) return false; if (len == 1) @@ -460,8 +478,8 @@ bool verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) { for (pgno_t i = pg; i < pg + len; i++) { - pgno_t f = mdb_midl_search(list, pg); - bool found = list[f] == pg && f <= list[0]; + pgno_t idx = mdb_midl_search(list, pg); + bool found = idx <= list[0] && list[idx] == pg; if (found) return false; } @@ -493,22 +511,41 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) bool verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) { - for (unsigned i = 0; i <= list[1]; i++) { - pgno_t f = mdb_midl_search(list, i); - bool found = list[f] == i && f <= list[0]; - if (sparsemap_is_set(map, i) != found) + for (int i = 1; i <= list[0]; i++) { + pgno_t pg = list[i]; + unsigned skipped = i == 1 ? 0 : list[i-1] - list[i] - 1; + for (int j = 0; j < skipped; j++) { + if (sparsemap_is_set(map, pg - j) != false) + return false; + } + if (sparsemap_is_set(map, pg) != true) return false; } return true; } void -print_sizes(sparsemap_t *map, MDB_IDL list) +stats(size_t iterations, sparsemap_t *map, MDB_IDL list) { char m[1024], l[1024]; - __diag("idl: %s bytes\tsm: %s bytes\n", bytes_as(MDB_IDL_SIZEOF(list), m, 1024), bytes_as(sparsemap_get_capacity(map), l, 1024)); + __diag("%zu\tidl[%zu/%zu]: %s\tsm: %s\n", iterations, list[-1], list[0], bytes_as(MDB_IDL_SIZEOF(list), m, 1024), bytes_as(sparsemap_get_capacity(map), l, 1024)); } +sparsemap_idx_t +_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +{ + sparsemap_idx_t l = sparsemap_set(*map, idx, value); + if (errno == ENOSPC) { + *map = sparsemap_set_data_size(*map, sparsemap_get_capacity(*map) + 64, NULL); + assert(*map != NULL); + errno = 0; + } + return l; +} + + +#define INITIAL_AMOUNT 1024 * 2 + /* * A "soak test" that tries to replicate behavior in LMDB for page allocation. */ @@ -524,7 +561,7 @@ main() __diag("starting...\n"); - size_t amt = 1024 * 2; // 1024 * 1024 * 2; + size_t amt = INITIAL_AMOUNT; MDB_IDL list = mdb_midl_alloc(amt); sparsemap_t *map = sparsemap(3 * 1024); @@ -533,18 +570,20 @@ main() // - Sparsemap will compress the set bits using less memory mdb_midl_need(&list, amt); for (size_t pg = 0; pg < amt; pg++) { - mdb_midl_xappend(list, pg); // listed page ids are free - sparsemap_set(map, pg, true); // true means free in our bitmap + // We list every free (unallocated) page in the IDL, while... + mdb_midl_xappend(list, pg); + // ... true (unset in the bitmap) indicates free in the bitmap. + assert(_sparsemap_set(&map, pg, true) == pg); } mdb_midl_sort(list); - print_sizes(map, list); + stats(0, map, list); assert(verify_sm_eq_ml(map, list)); while (1) { unsigned mi; pgno_t ml = 0, sl = 0; - // get an amount [1, 16] of pages to find prefering smaller sizes + // get an amount [1, 16] of pages to find preferring smaller sizes unsigned n = toss(15) + 1; // find a set of pages using the MDB_IDL @@ -610,11 +649,11 @@ main() // acquire the set of pages within the sparsemap if (prefer_mdb_idl_location) { for (pgno_t i = ml; i < ml + n; i++) { - sparsemap_set(map, i, false); + assert(_sparsemap_set(&map, i, false) == i); } } else { for (pgno_t i = sl; i <= sl + n; i++) { - sparsemap_set(map, i, false); + assert(_sparsemap_set(&map, i, false) == i); } } @@ -632,12 +671,12 @@ main() if (SPARSEMAP_FOUND(pg)) { assert(verify_empty_midl(list, pg, len)); assert(verify_empty_sparsemap(map, pg, len)); + if (list[-1] - list[0] < len) + mdb_midl_need(&list, list[-1] + len); for (int i = pg; i < pg + len; i++) { - if (pg + len > list[-1]) - mdb_midl_need(&list, pg + len); assert(verify_midl_contains(list, i) == false); mdb_midl_insert(list, i); - sparsemap_set(map, i, true); + assert(_sparsemap_set(&map, i, true) == i); } mdb_midl_sort(list); assert(verify_midl_nodups(list)); @@ -646,7 +685,41 @@ main() } } while (list[0] < amt - 32); } - print_sizes(map, list); + stats(iterations, map, list); + + // every 100 iterations, either ... + if (iterations % 100 == 0) { + const int COUNT = 1024; + if (toss(6) + 1 < 7) { + // ... add a MiB of 4KiB pages, or + int len = COUNT; + // The largest page is at list[1] because this is a reverse sorted list. + int pg = list[1] + 1; + if (list[0] + COUNT > list[-1]) + mdb_midl_grow(&list, list[0] + len); + for (int i = pg; i < pg + len; i++) { + assert(verify_midl_contains(list, i) == false); + assert(sparsemap_is_set(map, i) == false); + mdb_midl_insert(list, i); + assert(_sparsemap_set(&map, i, true) == i); + } + mdb_midl_sort(list); + assert(verify_midl_nodups(list)); + verify_sm_eq_ml(map, list); + } else { + if (list[-1] > INITIAL_AMOUNT) { + // ... a fraction of the time, remove a MiB of 4KiB pages. + for (int i = 0; i < COUNT; i++) { + pgno_t pg = list[list[0] - i]; + assert(sparsemap_is_set(map, pg) == true); + assert(_sparsemap_set(&map, pg, false) == pg) ; + } + mdb_midl_shrink_to(&list, list[0] - COUNT); + assert(verify_midl_nodups(list)); + verify_sm_eq_ml(map, list); + } + } + } iterations++; } diff --git a/src/sparsemap.c b/src/sparsemap.c index bc97738..109ae65 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -1338,7 +1338,7 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) if (value) { return SPARSEMAP_IDX_MAX; } else { - return count * SM_CHUNK_MAX_CAPACITY + 1; + return count * SM_CHUNK_MAX_CAPACITY; } } else { // TODO... sparsemap_select(map, -n, value); seek from end, not start -- 2.45.2