WIP: select/rank

This commit is contained in:
Gregory Burd 2024-04-05 19:16:18 -04:00
parent 3ecaf1d521
commit dad1d74c2c
4 changed files with 209 additions and 33 deletions

View file

@ -130,7 +130,7 @@ main()
sparsemap_set(map, i, true);
}
for (int i = 0; i < 100000; i++) {
assert(sparsemap_select(map, i) == (unsigned)i);
assert(sparsemap_select(map, 0, i) == (unsigned)i);
}
sparsemap_clear(map);
@ -140,7 +140,7 @@ main()
sparsemap_set(map, i, true);
}
for (int i = 1; i < 513; i++) {
assert(sparsemap_select(map, i - 1) == (unsigned)i);
assert(sparsemap_select(map, 0, i - 1) == (unsigned)i);
}
sparsemap_clear(map);
@ -150,7 +150,7 @@ main()
sparsemap_set(map, i * 10, true);
}
for (size_t i = 0; i < 8; i++) {
assert(sparsemap_select(map, i) == i * 10);
assert(sparsemap_select(map, 0, i) == i * 10);
}
// split and move, aligned to MiniMap capacity

127
examples/ex_4.c Normal file
View file

@ -0,0 +1,127 @@
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "../include/sparsemap.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvariadic-macros"
#define __diag(...) \
do { \
fprintf(stderr, "%s:%d:%s(): ", __FILE__, __LINE__, __func__); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
#pragma GCC diagnostic pop
#define SEED
/* https://burtleburtle.net/bob/rand/smallprng.html */
typedef struct rnd_ctx {
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
} rnd_ctx_t;
#define __rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
uint32_t
__random(rnd_ctx_t *x)
{
uint32_t e = x->a - __rot(x->b, 27);
x->a = x->b ^ __rot(x->c, 17);
x->b = x->c + x->d;
x->c = x->d + e;
x->d = e + x->a;
return x->d;
}
void
__random_seed(rnd_ctx_t *x, uint32_t seed)
{
uint32_t i;
x->a = 0xf1ea5eed, x->b = x->c = x->d = seed;
for (i = 0; i < 20; ++i) {
(void)__random(x);
}
}
void
shuffle(rnd_ctx_t *prng, int *array, size_t n)
{
size_t i, j;
if (n > 1) {
for (i = n - 1; i > 0; i--) {
j = (unsigned int)(__random(prng) % (i + 1));
// XOR swap algorithm
if (i != j) { // avoid self-swap leading to zero-ing the element
array[i] = array[i] ^ array[j];
array[j] = array[i] ^ array[j];
array[i] = array[i] ^ array[j];
}
}
}
}
bool
was_set(size_t bit, int array[])
{
for (int i = 0; i < 1024; i++) {
if (array[i] == bit)
return true;
}
return false;
}
int
main(void)
{
int i = 0;
rnd_ctx_t prng;
int array[1024];
// disable buffering
setbuf(stderr, 0);
// seed the PRNG
#ifdef SEED
__random_seed(&prng, 8675309);
#else
__random_seed(&prng, (unsigned int)time(NULL) ^ getpid());
#endif
for (i = 0; i < 1024; i++) {
array[i] = (int)__random(&prng) % 7000 + 1;
if (array[i] < 0)
i--;
}
// randomize setting the bits on
shuffle(&prng, array, 1024);
// start with a 1KiB buffer, 1024 bits
uint8_t *buf = calloc(1024, sizeof(uint8_t));
// create the sparse bitmap
sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024, 0);
// set all the bits on in a random order
for (i = 0; i < 1024; i++) {
//__diag("set %d\n", array[i]);
sparsemap_set(map, array[i], true);
assert(sparsemap_is_set(map, array[i]) == true);
}
size_t l = sparsemap_span(map, 0, 8);
__diag("found span of 8 at %lu starting from 0\n", l);
for (i = l; i < l + 8; i++) {
bool set = sparsemap_is_set(map, l + i);
if (set)
__diag("verified %lu was set\n", l + i);
else
__diag("darn, %lu was not really set, %s\n", l + i, was_set(l + i, array) ? "but we thought it was" : "because it wasn't");
}
return 0;
}

View file

@ -118,12 +118,13 @@ void sparsemap_combine(sparsemap_t *map, size_t sstart, sparsemap_t *other);
#endif
/* Returns the index of the n'th set bit; uses a 0-based index. */
size_t sparsemap_select(sparsemap_t *map, size_t n);
size_t sparsemap_select(sparsemap_t *map, size_t offset, size_t n);
/* Counts the set bits in the range [offset, idx]. */
size_t sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx);
/* Returns the 0-based index of a span of the first set bits of at least |len| starting after |offset|. */
/* Returns the 0-based index of a span of the first set bits of at least |len|
* starting after |offset|. */
size_t sparsemap_span(sparsemap_t *map, size_t offset, size_t len);
#endif

View file

@ -17,13 +17,12 @@
#include <assert.h>
#include <errno.h>
#include <popcount.h>
#include <sparsemap.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <popcount.h>
#include <sparsemap.h>
#ifdef SPARSEMAP_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO {
/* metadata overhead: 4 bytes for __sm_chunk_t count */
SM_SIZEOF_OVERHEAD = sizeof(uint32_t),
/* number of bits that can be stored in a BitVector */
/* number of bits that can be stored in a sm_bitvec_t */
SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8),
/* number of flags that can be stored in a single index byte */
@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv)
/**
* Initialize __sm_chunk_t with provided data.
*/
static void
static inline void
__sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data)
{
map->m_data = (sm_bitvec_t *)data;
@ -381,8 +380,8 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos,
}
/**
* Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|
* Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|.
*/
static size_t
__sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
}
/**
* Counts the set bits in the range [0, idx].
* Counts the set bits in the range [start, idx].
*/
static size_t
__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx)
{
size_t ret = 0;
@ -454,22 +453,39 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
}
if (flags == SM_PAYLOAD_ZEROS) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
start = 0;
}
} else {
return (ret);
}
} else if (flags == SM_PAYLOAD_ONES) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
ret += SM_BITS_PER_VECTOR;
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0)
ret += SM_BITS_PER_VECTOR;
start = 0;
}
} else {
return (ret + idx);
}
} else if (flags == SM_PAYLOAD_MIXED) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]);
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0)
ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]);
start = 0;
}
} else {
sm_bitvec_t w = map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
@ -718,8 +734,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
}
/**
* Clears the whole buffer
*/
* Clears the whole buffer
*/
void
sparsemap_clear(sparsemap_t *map)
{
@ -1124,7 +1140,7 @@ sparsemap_split(sparsemap_t *map, size_t sstart, sparsemap_t *other)
* i.e. n == 0 for the first bit which is set, n == 1 for the second bit etc.
*/
size_t
sparsemap_select(sparsemap_t *map, size_t n)
sparsemap_select(sparsemap_t *map, size_t loc, size_t n)
{
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0;
@ -1138,6 +1154,14 @@ sparsemap_select(sparsemap_t *map, size_t n)
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
/* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
move to the next chunk. */
size_t capacity = __sm_chunk_map_get_capacity(&chunk);
if (loc < result || loc - result >= capacity) {
loc -= capacity;
continue;
}
ssize_t new_n = n;
size_t index = __sm_chunk_map_select(&chunk, n, &new_n);
if (new_n == -1) {
@ -1152,16 +1176,16 @@ sparsemap_select(sparsemap_t *map, size_t n)
}
/**
* Counts the set bits in the range [offset, idx].
* Counts the set bits in the range [loc, idx].
*/
size_t
sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
{
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0;
size_t count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, offset);
uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)p;
@ -1172,18 +1196,42 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
result += __sm_chunk_map_rank(&chunk, idx - start);
/* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
move to the next chunk. */
size_t capacity = __sm_chunk_map_get_capacity(&chunk);
if (loc < start || loc - start >= capacity) {
loc -= capacity;
continue;
}
result += __sm_chunk_map_rank(&chunk, loc, idx - start);
p += __sm_chunk_map_get_size(&chunk);
}
return (result);
}
/**
* Finds a span of set bits of at least |len| after |offset|.
* Finds a span of set bits of at least |len| after |loc|. Returns the index of
* the n'th set bit that starts a span of at least |len| bits set to true.
* Returns ???TODO??? when a span of suitable length was not found.
*/
size_t sparsemap_span(sparsemap_t *map, size_t offset, size_t len) {
((void)map);
((void)offset);
((void)len);
return 0; // TODO
size_t
sparsemap_span(sparsemap_t *map, size_t loc, size_t len)
{
size_t size = 1024;
// size_t size = sparsemap_get_size(map);
// assert(size >= SM_SIZEOF_OVERHEAD);
// if (loc + 1 > size - len || len < size) {
// return size;
// }
do {
size_t nth = sparsemap_select(map, loc, len);
size_t count = sparsemap_rank(map, nth - len, nth);
if (count == len) {
return nth - len;
}
} while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len);
return size;
}