This commit is contained in:
Gregory Burd 2024-04-05 21:42:46 -04:00
parent e9128a8422
commit fa7d07044a
2 changed files with 79 additions and 48 deletions

View file

@ -1,9 +1,6 @@
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "../include/sparsemap.h"
@ -66,11 +63,12 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n)
}
bool
was_set(size_t bit, int array[])
was_set(size_t bit, const int array[])
{
for (int i = 0; i < 1024; i++) {
if (array[i] == bit)
if (array[i] == (int)bit) {
return true;
}
}
return false;
}
@ -94,38 +92,38 @@ main(void)
// create the sparse bitmap
sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0);
for (i = 0; i < 8; i++)
sparsemap_set(map, i, true);
for (i = 0; i < 8; i++) sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 0, 8);
__diag("rank was %lu at offset 0\n", rank);
assert(rank == 8);
for (i = 0; i < 8; i++) {
bool set = sparsemap_is_set(map, i);
if (set)
if (set) {
__diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't");
else
} else {
__diag("darn, %d was not really set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't");
}
}
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_span(map, 0, 8);
__diag("span was found at %lu\n", rank);
sparsemap_clear(map);
for (i = 1; i < 9; i++)
sparsemap_set(map, i, true);
for (i = 2; i < 7; i++) sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 0, 10);
__diag("rank was %lu at offset 1\n", rank);
assert(rank == 8);
__diag("rank was %lu between [0, 10]\n", rank);
assert(rank == 5);
for (i = 100; i < 108; i++) {
bool set = sparsemap_is_set(map, i);
if (set)
if (set) {
__diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but knew that" : "because it wasn't");
else
} else {
__diag("darn, %d was not set, %s\n", i,
was_set(i, array) ? "and yet we did set it" : "because it wasn't");
}
}
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_rank(map, 109, 8);
@ -134,19 +132,19 @@ main(void)
__diag("span was found at %lu\n", rank);
sparsemap_clear(map);
for (i = 2049; i < 2057; i++)
sparsemap_set(map, i, true);
for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 2048, 8);
__diag("rank was %lu at offset 108\n", rank);
assert(rank == 8);
for (i = 100; i < 108; i++) {
bool set = sparsemap_is_set(map, i);
if (set)
if (set) {
__diag("verified %d was set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't");
else
} else {
__diag("darn, %d was not really set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't");
}
}
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
rank = sparsemap_rank(map, 2048, 2060);
@ -164,8 +162,9 @@ main(void)
for (i = 0; i < TEST_ARRAY_SIZE; i++) {
array[i] = (int)__random(&prng) % 7000 + 1;
if (array[i] < 0)
if (array[i] < 0) {
i--;
}
}
// randomize setting the bits on
shuffle(&prng, array, TEST_ARRAY_SIZE);
@ -179,13 +178,14 @@ main(void)
size_t l = sparsemap_span(map, 0, 8);
__diag("found span of 8 at %lu starting from 0\n", l);
for (i = l; i < l + 8; i++) {
for (i = (int)l; i < l + 8; i++) {
bool set = sparsemap_is_set(map, l + i);
if (set)
if (set) {
__diag("verified %lu was set\n", l + i);
else
} else {
__diag("darn, %lu was not really set, %s\n", l + i,
was_set(l + i, array) ? "but we thought it was" : "because it wasn't");
}
}
return 0;

View file

@ -17,13 +17,12 @@
#include <assert.h>
#include <errno.h>
#include <popcount.h>
#include <sparsemap.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <popcount.h>
#include <sparsemap.h>
#ifdef SPARSEMAP_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO {
/* metadata overhead: 4 bytes for __sm_chunk_t count */
SM_SIZEOF_OVERHEAD = sizeof(uint32_t),
/* number of bits that can be stored in a BitVector */
/* number of bits that can be stored in a sm_bitvec_t */
SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8),
/* number of flags that can be stored in a single index byte */
@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv)
/**
* Initialize __sm_chunk_t with provided data.
*/
static void
static inline void
__sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data)
{
map->m_data = (sm_bitvec_t *)data;
@ -381,7 +380,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos,
}
/**
* Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the
* Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|
*/
static size_t
@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
}
/**
* Counts the set bits in the range [0, idx].
* Counts the set bits in the range [start, idx].
*/
static size_t
__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx)
{
size_t ret = 0;
@ -454,25 +453,46 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
}
if (flags == SM_PAYLOAD_ZEROS) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
start = 0;
}
} else {
return (ret);
}
} else if (flags == SM_PAYLOAD_ONES) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
ret += SM_BITS_PER_VECTOR;
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0) {
ret += SM_BITS_PER_VECTOR;
}
start = 0;
}
} else {
return (ret + idx);
}
} else if (flags == SM_PAYLOAD_MIXED) {
if (idx > SM_BITS_PER_VECTOR) {
idx -= SM_BITS_PER_VECTOR;
ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]);
if (start > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR;
} else {
idx -= SM_BITS_PER_VECTOR - start;
if (start == 0) {
ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map,
i * SM_FLAGS_PER_INDEX_BYTE + j)]);
}
start = 0;
}
} else {
sm_bitvec_t w = map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
idx -= start;
for (size_t k = 0; k < idx; k++) {
if (w & ((sm_bitvec_t)1 << k)) {
ret++;
@ -703,6 +723,7 @@ __sm_insert_data(sparsemap_t *map, size_t offset, uint8_t *buffer,
memmove(p + buffer_size, p, map->m_data_used - offset);
memcpy(p, buffer, buffer_size);
map->m_data_used += buffer_size;
return 0;
}
/**
@ -718,8 +739,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
}
/**
* Clears the whole buffer
*/
* Clears the whole buffer
*/
void
sparsemap_clear(sparsemap_t *map)
{
@ -1152,16 +1173,16 @@ sparsemap_select(sparsemap_t *map, size_t n)
}
/**
* Counts the set bits in the range [offset, idx].
* Counts the set bits in the range [loc, idx].
*/
size_t
sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
{
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0;
size_t count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, offset);
uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)p;
@ -1172,7 +1193,15 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
result += __sm_chunk_map_rank(&chunk, idx - start);
/* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
move to the next chunk. */
size_t capacity = __sm_chunk_map_get_capacity(&chunk);
if (loc < start || loc - start >= capacity) {
loc -= capacity;
continue;
}
result += __sm_chunk_map_rank(&chunk, loc, idx - start);
p += __sm_chunk_map_get_size(&chunk);
}
return (result);
@ -1186,15 +1215,17 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
size_t
sparsemap_span(sparsemap_t *map, size_t loc, size_t len)
{
size_t size = 1024;
size_t nth, count, size = 1024;
do {
size_t nth = sparsemap_select(map, len);
size_t count = sparsemap_rank(map, nth - len, nth);
nth = sparsemap_select(map, nth);
count = sparsemap_rank(map, nth, nth + len);
if (count == len) {
return nth - len;
return nth;
} else {
nth += count;
}
} while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len);
} while (1); // TODO... ? until what?
return size;
}