From fa7d07044a3080dc20298613c1f7f0cdad546066 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 5 Apr 2024 21:42:46 -0400 Subject: [PATCH] rank --- examples/ex_4.c | 46 ++++++++++++++-------------- src/sparsemap.c | 81 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 48 deletions(-) diff --git a/examples/ex_4.c b/examples/ex_4.c index f8e97d9..26cf1c8 100644 --- a/examples/ex_4.c +++ b/examples/ex_4.c @@ -1,9 +1,6 @@ #include -#include #include #include -#include -#include #include "../include/sparsemap.h" @@ -66,11 +63,12 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n) } bool -was_set(size_t bit, int array[]) +was_set(size_t bit, const int array[]) { for (int i = 0; i < 1024; i++) { - if (array[i] == bit) + if (array[i] == (int)bit) { return true; + } } return false; } @@ -94,38 +92,38 @@ main(void) // create the sparse bitmap sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0); - for (i = 0; i < 8; i++) - sparsemap_set(map, i, true); + for (i = 0; i < 8; i++) sparsemap_set(map, i, true); rank = sparsemap_rank(map, 0, 8); __diag("rank was %lu at offset 0\n", rank); assert(rank == 8); for (i = 0; i < 8; i++) { bool set = sparsemap_is_set(map, i); - if (set) + if (set) { __diag("verified %d was set, %s\n", i, was_set(i, array) ? "but we thought it was" : "because it wasn't"); - else + } else { __diag("darn, %d was not really set, %s\n", i, was_set(i, array) ? "but we thought it was" : "because it wasn't"); + } } __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); rank = sparsemap_span(map, 0, 8); __diag("span was found at %lu\n", rank); sparsemap_clear(map); - for (i = 1; i < 9; i++) - sparsemap_set(map, i, true); + for (i = 2; i < 7; i++) sparsemap_set(map, i, true); rank = sparsemap_rank(map, 0, 10); - __diag("rank was %lu at offset 1\n", rank); - assert(rank == 8); + __diag("rank was %lu between [0, 10]\n", rank); + assert(rank == 5); for (i = 100; i < 108; i++) { bool set = sparsemap_is_set(map, i); - if (set) + if (set) { __diag("verified %d was set, %s\n", i, was_set(i, array) ? "but knew that" : "because it wasn't"); - else + } else { __diag("darn, %d was not set, %s\n", i, was_set(i, array) ? "and yet we did set it" : "because it wasn't"); + } } __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); rank = sparsemap_rank(map, 109, 8); @@ -134,19 +132,19 @@ main(void) __diag("span was found at %lu\n", rank); sparsemap_clear(map); - for (i = 2049; i < 2057; i++) - sparsemap_set(map, i, true); + for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true); rank = sparsemap_rank(map, 2048, 8); __diag("rank was %lu at offset 108\n", rank); assert(rank == 8); for (i = 100; i < 108; i++) { bool set = sparsemap_is_set(map, i); - if (set) + if (set) { __diag("verified %d was set, %s\n", i, was_set(i, array) ? "but we thought it was" : "because it wasn't"); - else + } else { __diag("darn, %d was not really set, %s\n", i, was_set(i, array) ? "but we thought it was" : "because it wasn't"); + } } __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); rank = sparsemap_rank(map, 2048, 2060); @@ -164,8 +162,9 @@ main(void) for (i = 0; i < TEST_ARRAY_SIZE; i++) { array[i] = (int)__random(&prng) % 7000 + 1; - if (array[i] < 0) + if (array[i] < 0) { i--; + } } // randomize setting the bits on shuffle(&prng, array, TEST_ARRAY_SIZE); @@ -179,13 +178,14 @@ main(void) size_t l = sparsemap_span(map, 0, 8); __diag("found span of 8 at %lu starting from 0\n", l); - for (i = l; i < l + 8; i++) { + for (i = (int)l; i < l + 8; i++) { bool set = sparsemap_is_set(map, l + i); - if (set) + if (set) { __diag("verified %lu was set\n", l + i); - else + } else { __diag("darn, %lu was not really set, %s\n", l + i, was_set(l + i, array) ? "but we thought it was" : "because it wasn't"); + } } return 0; diff --git a/src/sparsemap.c b/src/sparsemap.c index 40692e8..a0faa5d 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -17,13 +17,12 @@ #include #include +#include +#include #include #include #include -#include -#include - #ifdef SPARSEMAP_DIAGNOSTIC #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" @@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO { /* metadata overhead: 4 bytes for __sm_chunk_t count */ SM_SIZEOF_OVERHEAD = sizeof(uint32_t), - /* number of bits that can be stored in a BitVector */ + /* number of bits that can be stored in a sm_bitvec_t */ SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8), /* number of flags that can be stored in a single index byte */ @@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv) /** * Initialize __sm_chunk_t with provided data. */ -static void +static inline void __sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data) { map->m_data = (sm_bitvec_t *)data; @@ -381,7 +380,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, } /** - * Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the + * Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n| */ static size_t @@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) } /** - * Counts the set bits in the range [0, idx]. + * Counts the set bits in the range [start, idx]. */ static size_t -__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx) +__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx) { size_t ret = 0; @@ -454,25 +453,46 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx) } if (flags == SM_PAYLOAD_ZEROS) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + start = 0; + } } else { return (ret); } } else if (flags == SM_PAYLOAD_ONES) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; - ret += SM_BITS_PER_VECTOR; + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + if (start == 0) { + ret += SM_BITS_PER_VECTOR; + } + start = 0; + } } else { return (ret + idx); } } else if (flags == SM_PAYLOAD_MIXED) { if (idx > SM_BITS_PER_VECTOR) { - idx -= SM_BITS_PER_VECTOR; - ret += popcountll((uint64_t)map->m_data[1 + - __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]); + if (start > SM_BITS_PER_VECTOR) { + start -= SM_BITS_PER_VECTOR; + } else { + idx -= SM_BITS_PER_VECTOR - start; + if (start == 0) { + ret += popcountll((uint64_t)map->m_data[1 + + __sm_chunk_map_get_position(map, + i * SM_FLAGS_PER_INDEX_BYTE + j)]); + } + start = 0; + } } else { sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; + idx -= start; for (size_t k = 0; k < idx; k++) { if (w & ((sm_bitvec_t)1 << k)) { ret++; @@ -703,6 +723,7 @@ __sm_insert_data(sparsemap_t *map, size_t offset, uint8_t *buffer, memmove(p + buffer_size, p, map->m_data_used - offset); memcpy(p, buffer, buffer_size); map->m_data_used += buffer_size; + return 0; } /** @@ -718,8 +739,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) } /** -* Clears the whole buffer -*/ + * Clears the whole buffer + */ void sparsemap_clear(sparsemap_t *map) { @@ -1152,16 +1173,16 @@ sparsemap_select(sparsemap_t *map, size_t n) } /** - * Counts the set bits in the range [offset, idx]. + * Counts the set bits in the range [loc, idx]. */ size_t -sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) +sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); size_t result = 0; size_t count = __sm_get_chunk_map_count(map); - uint8_t *p = __sm_get_chunk_map_data(map, offset); + uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { sm_idx_t start = *(sm_idx_t *)p; @@ -1172,7 +1193,15 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); - result += __sm_chunk_map_rank(&chunk, idx - start); + /* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then + move to the next chunk. */ + size_t capacity = __sm_chunk_map_get_capacity(&chunk); + if (loc < start || loc - start >= capacity) { + loc -= capacity; + continue; + } + + result += __sm_chunk_map_rank(&chunk, loc, idx - start); p += __sm_chunk_map_get_size(&chunk); } return (result); @@ -1186,15 +1215,17 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) size_t sparsemap_span(sparsemap_t *map, size_t loc, size_t len) { - size_t size = 1024; + size_t nth, count, size = 1024; do { - size_t nth = sparsemap_select(map, len); - size_t count = sparsemap_rank(map, nth - len, nth); + nth = sparsemap_select(map, nth); + count = sparsemap_rank(map, nth, nth + len); if (count == len) { - return nth - len; + return nth; + } else { + nth += count; } - } while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len); + } while (1); // TODO... ? until what? return size; }