From dad1d74c2cd67c75668acd6d9ba350d604676fe2 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 5 Apr 2024 19:16:18 -0400 Subject: [PATCH] WIP: select/rank --- examples/ex_1.c | 6 +-- examples/ex_4.c | 127 ++++++++++++++++++++++++++++++++++++++++++++ include/sparsemap.h | 5 +- src/sparsemap.c | 104 ++++++++++++++++++++++++++---------- 4 files changed, 209 insertions(+), 33 deletions(-) create mode 100644 examples/ex_4.c diff --git a/examples/ex_1.c b/examples/ex_1.c index ebc8f69..064e746 100644 --- a/examples/ex_1.c +++ b/examples/ex_1.c @@ -130,7 +130,7 @@ main() sparsemap_set(map, i, true); } for (int i = 0; i < 100000; i++) { - assert(sparsemap_select(map, i) == (unsigned)i); + assert(sparsemap_select(map, 0, i) == (unsigned)i); } sparsemap_clear(map); @@ -140,7 +140,7 @@ main() sparsemap_set(map, i, true); } for (int i = 1; i < 513; i++) { - assert(sparsemap_select(map, i - 1) == (unsigned)i); + assert(sparsemap_select(map, 0, i - 1) == (unsigned)i); } sparsemap_clear(map); @@ -150,7 +150,7 @@ main() sparsemap_set(map, i * 10, true); } for (size_t i = 0; i < 8; i++) { - assert(sparsemap_select(map, i) == i * 10); + assert(sparsemap_select(map, 0, i) == i * 10); } // split and move, aligned to MiniMap capacity diff --git a/examples/ex_4.c b/examples/ex_4.c new file mode 100644 index 0000000..da0f8a2 --- /dev/null +++ b/examples/ex_4.c @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include +#include + +#include "../include/sparsemap.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvariadic-macros" +#define __diag(...) \ + do { \ + fprintf(stderr, "%s:%d:%s(): ", __FILE__, __LINE__, __func__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +#pragma GCC diagnostic pop + +#define SEED + +/* https://burtleburtle.net/bob/rand/smallprng.html */ +typedef struct rnd_ctx { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +} rnd_ctx_t; +#define __rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) +uint32_t +__random(rnd_ctx_t *x) +{ + uint32_t e = x->a - __rot(x->b, 27); + x->a = x->b ^ __rot(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +void +__random_seed(rnd_ctx_t *x, uint32_t seed) +{ + uint32_t i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) { + (void)__random(x); + } +} + +void +shuffle(rnd_ctx_t *prng, int *array, size_t n) +{ + size_t i, j; + + if (n > 1) { + for (i = n - 1; i > 0; i--) { + j = (unsigned int)(__random(prng) % (i + 1)); + // XOR swap algorithm + if (i != j) { // avoid self-swap leading to zero-ing the element + array[i] = array[i] ^ array[j]; + array[j] = array[i] ^ array[j]; + array[i] = array[i] ^ array[j]; + } + } + } +} + +bool +was_set(size_t bit, int array[]) +{ + for (int i = 0; i < 1024; i++) { + if (array[i] == bit) + return true; + } + return false; +} + +int +main(void) +{ + int i = 0; + rnd_ctx_t prng; + int array[1024]; + + // disable buffering + setbuf(stderr, 0); + + // seed the PRNG +#ifdef SEED + __random_seed(&prng, 8675309); +#else + __random_seed(&prng, (unsigned int)time(NULL) ^ getpid()); +#endif + + for (i = 0; i < 1024; i++) { + array[i] = (int)__random(&prng) % 7000 + 1; + if (array[i] < 0) + i--; + } + // randomize setting the bits on + shuffle(&prng, array, 1024); + + // start with a 1KiB buffer, 1024 bits + uint8_t *buf = calloc(1024, sizeof(uint8_t)); + + // create the sparse bitmap + sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024, 0); + + // set all the bits on in a random order + for (i = 0; i < 1024; i++) { + //__diag("set %d\n", array[i]); + sparsemap_set(map, array[i], true); + assert(sparsemap_is_set(map, array[i]) == true); + } + + size_t l = sparsemap_span(map, 0, 8); + __diag("found span of 8 at %lu starting from 0\n", l); + for (i = l; i < l + 8; i++) { + bool set = sparsemap_is_set(map, l + i); + if (set) + __diag("verified %lu was set\n", l + i); + else + __diag("darn, %lu was not really set, %s\n", l + i, was_set(l + i, array) ? "but we thought it was" : "because it wasn't"); + } + + return 0; +} diff --git a/include/sparsemap.h b/include/sparsemap.h index a1f76be..d56180f 100644 --- a/include/sparsemap.h +++ b/include/sparsemap.h @@ -118,12 +118,13 @@ void sparsemap_combine(sparsemap_t *map, size_t sstart, sparsemap_t *other); #endif /* Returns the index of the n'th set bit; uses a 0-based index. */ -size_t sparsemap_select(sparsemap_t *map, size_t n); +size_t sparsemap_select(sparsemap_t *map, size_t offset, size_t n); /* Counts the set bits in the range [offset, idx]. */ size_t sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx); -/* Returns the 0-based index of a span of the first set bits of at least |len| starting after |offset|. */ +/* Returns the 0-based index of a span of the first set bits of at least |len| + * starting after |offset|. */ size_t sparsemap_span(sparsemap_t *map, size_t offset, size_t len); #endif diff --git a/src/sparsemap.c b/src/sparsemap.c index 61154fc..28617cb 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -17,13 +17,12 @@ #include #include +#include +#include #include #include #include -#include -#include - #ifdef SPARSEMAP_DIAGNOSTIC #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" @@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO { /* metadata overhead: 4 bytes for __sm_chunk_t count */ SM_SIZEOF_OVERHEAD = sizeof(uint32_t), - /* number of bits that can be stored in a BitVector */ + /* number of bits that can be stored in a sm_bitvec_t */ SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8), /* number of flags that can be stored in a single index byte */ @@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv) /** * Initialize __sm_chunk_t with provided data. */ -static void +static inline void __sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data) { map->m_data = (sm_bitvec_t *)data; @@ -381,8 +380,8 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, } /** - * Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the - * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n| + * Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the + * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|. */ static size_t __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) @@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) } /** - * Counts the set bits in the range [0, idx]. + * Counts the set bits in the range [start, idx]. */ static size_t -__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx) +__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx) { size_t ret = 0; @@ -454,22 +453,39 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx) } if (flags == SM_PAYLOAD_ZEROS) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + start = 0; + } } else { return (ret); } } else if (flags == SM_PAYLOAD_ONES) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; - ret += SM_BITS_PER_VECTOR; + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + if (start == 0) + ret += SM_BITS_PER_VECTOR; + start = 0; + } } else { return (ret + idx); } } else if (flags == SM_PAYLOAD_MIXED) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; - ret += popcountll((uint64_t)map->m_data[1 + - __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]); + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + if (start == 0) + ret += popcountll((uint64_t)map->m_data[1 + + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]); + start = 0; + } } else { sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; @@ -718,8 +734,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) } /** -* Clears the whole buffer -*/ + * Clears the whole buffer + */ void sparsemap_clear(sparsemap_t *map) { @@ -1124,7 +1140,7 @@ sparsemap_split(sparsemap_t *map, size_t sstart, sparsemap_t *other) * i.e. n == 0 for the first bit which is set, n == 1 for the second bit etc. */ size_t -sparsemap_select(sparsemap_t *map, size_t n) +sparsemap_select(sparsemap_t *map, size_t loc, size_t n) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); size_t result = 0; @@ -1138,6 +1154,14 @@ sparsemap_select(sparsemap_t *map, size_t n) __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); + /* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then + move to the next chunk. */ + size_t capacity = __sm_chunk_map_get_capacity(&chunk); + if (loc < result || loc - result >= capacity) { + loc -= capacity; + continue; + } + ssize_t new_n = n; size_t index = __sm_chunk_map_select(&chunk, n, &new_n); if (new_n == -1) { @@ -1152,16 +1176,16 @@ sparsemap_select(sparsemap_t *map, size_t n) } /** - * Counts the set bits in the range [offset, idx]. + * Counts the set bits in the range [loc, idx]. */ size_t -sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) +sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); size_t result = 0; size_t count = __sm_get_chunk_map_count(map); - uint8_t *p = __sm_get_chunk_map_data(map, offset); + uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { sm_idx_t start = *(sm_idx_t *)p; @@ -1172,18 +1196,42 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); - result += __sm_chunk_map_rank(&chunk, idx - start); + /* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then + move to the next chunk. */ + size_t capacity = __sm_chunk_map_get_capacity(&chunk); + if (loc < start || loc - start >= capacity) { + loc -= capacity; + continue; + } + + result += __sm_chunk_map_rank(&chunk, loc, idx - start); p += __sm_chunk_map_get_size(&chunk); } return (result); } /** - * Finds a span of set bits of at least |len| after |offset|. + * Finds a span of set bits of at least |len| after |loc|. Returns the index of + * the n'th set bit that starts a span of at least |len| bits set to true. + * Returns ???TODO??? when a span of suitable length was not found. */ -size_t sparsemap_span(sparsemap_t *map, size_t offset, size_t len) { - ((void)map); - ((void)offset); - ((void)len); - return 0; // TODO +size_t +sparsemap_span(sparsemap_t *map, size_t loc, size_t len) +{ + size_t size = 1024; +// size_t size = sparsemap_get_size(map); +// assert(size >= SM_SIZEOF_OVERHEAD); +// if (loc + 1 > size - len || len < size) { +// return size; +// } + + do { + size_t nth = sparsemap_select(map, loc, len); + size_t count = sparsemap_rank(map, nth - len, nth); + if (count == len) { + return nth - len; + } + } while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len); + + return size; }