This commit is contained in:
Gregory Burd 2024-04-05 21:42:46 -04:00
parent e9128a8422
commit fa7d07044a
2 changed files with 79 additions and 48 deletions

View file

@ -1,9 +1,6 @@
#include <assert.h> #include <assert.h>
#include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "../include/sparsemap.h" #include "../include/sparsemap.h"
@ -66,11 +63,12 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n)
} }
bool bool
was_set(size_t bit, int array[]) was_set(size_t bit, const int array[])
{ {
for (int i = 0; i < 1024; i++) { for (int i = 0; i < 1024; i++) {
if (array[i] == bit) if (array[i] == (int)bit) {
return true; return true;
}
} }
return false; return false;
} }
@ -94,38 +92,38 @@ main(void)
// create the sparse bitmap // create the sparse bitmap
sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0); sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++) sparsemap_set(map, i, true);
sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 0, 8); rank = sparsemap_rank(map, 0, 8);
__diag("rank was %lu at offset 0\n", rank); __diag("rank was %lu at offset 0\n", rank);
assert(rank == 8); assert(rank == 8);
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
bool set = sparsemap_is_set(map, i); bool set = sparsemap_is_set(map, i);
if (set) if (set) {
__diag("verified %d was set, %s\n", i, __diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't"); was_set(i, array) ? "but we thought it was" : "because it wasn't");
else } else {
__diag("darn, %d was not really set, %s\n", i, __diag("darn, %d was not really set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't"); was_set(i, array) ? "but we thought it was" : "because it wasn't");
}
} }
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_span(map, 0, 8); rank = sparsemap_span(map, 0, 8);
__diag("span was found at %lu\n", rank); __diag("span was found at %lu\n", rank);
sparsemap_clear(map); sparsemap_clear(map);
for (i = 1; i < 9; i++) for (i = 2; i < 7; i++) sparsemap_set(map, i, true);
sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 0, 10); rank = sparsemap_rank(map, 0, 10);
__diag("rank was %lu at offset 1\n", rank); __diag("rank was %lu between [0, 10]\n", rank);
assert(rank == 8); assert(rank == 5);
for (i = 100; i < 108; i++) { for (i = 100; i < 108; i++) {
bool set = sparsemap_is_set(map, i); bool set = sparsemap_is_set(map, i);
if (set) if (set) {
__diag("verified %d was set, %s\n", i, __diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but knew that" : "because it wasn't"); was_set(i, array) ? "but knew that" : "because it wasn't");
else } else {
__diag("darn, %d was not set, %s\n", i, __diag("darn, %d was not set, %s\n", i,
was_set(i, array) ? "and yet we did set it" : "because it wasn't"); was_set(i, array) ? "and yet we did set it" : "because it wasn't");
}
} }
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_rank(map, 109, 8); rank = sparsemap_rank(map, 109, 8);
@ -134,19 +132,19 @@ main(void)
__diag("span was found at %lu\n", rank); __diag("span was found at %lu\n", rank);
sparsemap_clear(map); sparsemap_clear(map);
for (i = 2049; i < 2057; i++) for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true);
sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 2048, 8); rank = sparsemap_rank(map, 2048, 8);
__diag("rank was %lu at offset 108\n", rank); __diag("rank was %lu at offset 108\n", rank);
assert(rank == 8); assert(rank == 8);
for (i = 100; i < 108; i++) { for (i = 100; i < 108; i++) {
bool set = sparsemap_is_set(map, i); bool set = sparsemap_is_set(map, i);
if (set) if (set) {
__diag("verified %d was set, %s\n", i, __diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't"); was_set(i, array) ? "but we thought it was" : "because it wasn't");
else } else {
__diag("darn, %d was not really set, %s\n", i, __diag("darn, %d was not really set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't"); was_set(i, array) ? "but we thought it was" : "because it wasn't");
}
} }
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_rank(map, 2048, 2060); rank = sparsemap_rank(map, 2048, 2060);
@ -164,8 +162,9 @@ main(void)
for (i = 0; i < TEST_ARRAY_SIZE; i++) { for (i = 0; i < TEST_ARRAY_SIZE; i++) {
array[i] = (int)__random(&prng) % 7000 + 1; array[i] = (int)__random(&prng) % 7000 + 1;
if (array[i] < 0) if (array[i] < 0) {
i--; i--;
}
} }
// randomize setting the bits on // randomize setting the bits on
shuffle(&prng, array, TEST_ARRAY_SIZE); shuffle(&prng, array, TEST_ARRAY_SIZE);
@ -179,13 +178,14 @@ main(void)
size_t l = sparsemap_span(map, 0, 8); size_t l = sparsemap_span(map, 0, 8);
__diag("found span of 8 at %lu starting from 0\n", l); __diag("found span of 8 at %lu starting from 0\n", l);
for (i = l; i < l + 8; i++) { for (i = (int)l; i < l + 8; i++) {
bool set = sparsemap_is_set(map, l + i); bool set = sparsemap_is_set(map, l + i);
if (set) if (set) {
__diag("verified %lu was set\n", l + i); __diag("verified %lu was set\n", l + i);
else } else {
__diag("darn, %lu was not really set, %s\n", l + i, __diag("darn, %lu was not really set, %s\n", l + i,
was_set(l + i, array) ? "but we thought it was" : "because it wasn't"); was_set(l + i, array) ? "but we thought it was" : "because it wasn't");
}
} }
return 0; return 0;

View file

@ -17,13 +17,12 @@
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <popcount.h>
#include <sparsemap.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <popcount.h>
#include <sparsemap.h>
#ifdef SPARSEMAP_DIAGNOSTIC #ifdef SPARSEMAP_DIAGNOSTIC
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic" #pragma GCC diagnostic ignored "-Wpedantic"
@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO {
/* metadata overhead: 4 bytes for __sm_chunk_t count */ /* metadata overhead: 4 bytes for __sm_chunk_t count */
SM_SIZEOF_OVERHEAD = sizeof(uint32_t), SM_SIZEOF_OVERHEAD = sizeof(uint32_t),
/* number of bits that can be stored in a BitVector */ /* number of bits that can be stored in a sm_bitvec_t */
SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8), SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8),
/* number of flags that can be stored in a single index byte */ /* number of flags that can be stored in a single index byte */
@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv)
/** /**
* Initialize __sm_chunk_t with provided data. * Initialize __sm_chunk_t with provided data.
*/ */
static void static inline void
__sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data) __sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data)
{ {
map->m_data = (sm_bitvec_t *)data; map->m_data = (sm_bitvec_t *)data;
@ -381,7 +380,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos,
} }
/** /**
* Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the * Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n| * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|
*/ */
static size_t static size_t
@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
} }
/** /**
* Counts the set bits in the range [0, idx]. * Counts the set bits in the range [start, idx].
*/ */
static size_t static size_t
__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx) __sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx)
{ {
size_t ret = 0; size_t ret = 0;
@ -454,25 +453,46 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
} }
if (flags == SM_PAYLOAD_ZEROS) { if (flags == SM_PAYLOAD_ZEROS) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR; if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
start = 0;
}
} else { } else {
return (ret); return (ret);
} }
} else if (flags == SM_PAYLOAD_ONES) { } else if (flags == SM_PAYLOAD_ONES) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR; if (start > SM_BITS_PER_VECTOR) {
ret += SM_BITS_PER_VECTOR; start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0) {
ret += SM_BITS_PER_VECTOR;
}
start = 0;
}
} else { } else {
return (ret + idx); return (ret + idx);
} }
} else if (flags == SM_PAYLOAD_MIXED) { } else if (flags == SM_PAYLOAD_MIXED) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR; if (start > SM_BITS_PER_VECTOR) {
ret += popcountll((uint64_t)map->m_data[1 + start -= SM_BITS_PER_VECTOR;
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]); } else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0) {
ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map,
i * SM_FLAGS_PER_INDEX_BYTE + j)]);
}
start = 0;
}
} else { } else {
sm_bitvec_t w = map->m_data[1 + sm_bitvec_t w = map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
idx -= start;
for (size_t k = 0; k < idx; k++) { for (size_t k = 0; k < idx; k++) {
if (w & ((sm_bitvec_t)1 << k)) { if (w & ((sm_bitvec_t)1 << k)) {
ret++; ret++;
@ -703,6 +723,7 @@ __sm_insert_data(sparsemap_t *map, size_t offset, uint8_t *buffer,
memmove(p + buffer_size, p, map->m_data_used - offset); memmove(p + buffer_size, p, map->m_data_used - offset);
memcpy(p, buffer, buffer_size); memcpy(p, buffer, buffer_size);
map->m_data_used += buffer_size; map->m_data_used += buffer_size;
return 0;
} }
/** /**
@ -718,8 +739,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
} }
/** /**
* Clears the whole buffer * Clears the whole buffer
*/ */
void void
sparsemap_clear(sparsemap_t *map) sparsemap_clear(sparsemap_t *map)
{ {
@ -1152,16 +1173,16 @@ sparsemap_select(sparsemap_t *map, size_t n)
} }
/** /**
* Counts the set bits in the range [offset, idx]. * Counts the set bits in the range [loc, idx].
*/ */
size_t size_t
sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx) sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
{ {
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0; size_t result = 0;
size_t count = __sm_get_chunk_map_count(map); size_t count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, offset); uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)p; sm_idx_t start = *(sm_idx_t *)p;
@ -1172,7 +1193,15 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
__sm_chunk_t chunk; __sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p); __sm_chunk_map_init(&chunk, p);
result += __sm_chunk_map_rank(&chunk, idx - start); /* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
move to the next chunk. */
size_t capacity = __sm_chunk_map_get_capacity(&chunk);
if (loc < start || loc - start >= capacity) {
loc -= capacity;
continue;
}
result += __sm_chunk_map_rank(&chunk, loc, idx - start);
p += __sm_chunk_map_get_size(&chunk); p += __sm_chunk_map_get_size(&chunk);
} }
return (result); return (result);
@ -1186,15 +1215,17 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
size_t size_t
sparsemap_span(sparsemap_t *map, size_t loc, size_t len) sparsemap_span(sparsemap_t *map, size_t loc, size_t len)
{ {
size_t size = 1024; size_t nth, count, size = 1024;
do { do {
size_t nth = sparsemap_select(map, len); nth = sparsemap_select(map, nth);
size_t count = sparsemap_rank(map, nth - len, nth); count = sparsemap_rank(map, nth, nth + len);
if (count == len) { if (count == len) {
return nth - len; return nth;
} else {
nth += count;
} }
} while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len); } while (1); // TODO... ? until what?
return size; return size;
} }