rank
This commit is contained in:
parent
e9128a8422
commit
fa7d07044a
2 changed files with 79 additions and 48 deletions
|
@ -1,9 +1,6 @@
|
|||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../include/sparsemap.h"
|
||||
|
||||
|
@ -66,11 +63,12 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n)
|
|||
}
|
||||
|
||||
bool
|
||||
was_set(size_t bit, int array[])
|
||||
was_set(size_t bit, const int array[])
|
||||
{
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
if (array[i] == bit)
|
||||
if (array[i] == (int)bit) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -94,38 +92,38 @@ main(void)
|
|||
// create the sparse bitmap
|
||||
sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
sparsemap_set(map, i, true);
|
||||
for (i = 0; i < 8; i++) sparsemap_set(map, i, true);
|
||||
rank = sparsemap_rank(map, 0, 8);
|
||||
__diag("rank was %lu at offset 0\n", rank);
|
||||
assert(rank == 8);
|
||||
for (i = 0; i < 8; i++) {
|
||||
bool set = sparsemap_is_set(map, i);
|
||||
if (set)
|
||||
if (set) {
|
||||
__diag("verified %d was set, %s\n", i,
|
||||
was_set(i, array) ? "but we thought it was" : "because it wasn't");
|
||||
else
|
||||
} else {
|
||||
__diag("darn, %d was not really set, %s\n", i,
|
||||
was_set(i, array) ? "but we thought it was" : "because it wasn't");
|
||||
}
|
||||
}
|
||||
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
|
||||
rank = sparsemap_span(map, 0, 8);
|
||||
__diag("span was found at %lu\n", rank);
|
||||
sparsemap_clear(map);
|
||||
|
||||
for (i = 1; i < 9; i++)
|
||||
sparsemap_set(map, i, true);
|
||||
for (i = 2; i < 7; i++) sparsemap_set(map, i, true);
|
||||
rank = sparsemap_rank(map, 0, 10);
|
||||
__diag("rank was %lu at offset 1\n", rank);
|
||||
assert(rank == 8);
|
||||
__diag("rank was %lu between [0, 10]\n", rank);
|
||||
assert(rank == 5);
|
||||
for (i = 100; i < 108; i++) {
|
||||
bool set = sparsemap_is_set(map, i);
|
||||
if (set)
|
||||
if (set) {
|
||||
__diag("verified %d was set, %s\n", i,
|
||||
was_set(i, array) ? "but knew that" : "because it wasn't");
|
||||
else
|
||||
} else {
|
||||
__diag("darn, %d was not set, %s\n", i,
|
||||
was_set(i, array) ? "and yet we did set it" : "because it wasn't");
|
||||
}
|
||||
}
|
||||
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
|
||||
rank = sparsemap_rank(map, 109, 8);
|
||||
|
@ -134,19 +132,19 @@ main(void)
|
|||
__diag("span was found at %lu\n", rank);
|
||||
sparsemap_clear(map);
|
||||
|
||||
for (i = 2049; i < 2057; i++)
|
||||
sparsemap_set(map, i, true);
|
||||
for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true);
|
||||
rank = sparsemap_rank(map, 2048, 8);
|
||||
__diag("rank was %lu at offset 108\n", rank);
|
||||
assert(rank == 8);
|
||||
for (i = 100; i < 108; i++) {
|
||||
bool set = sparsemap_is_set(map, i);
|
||||
if (set)
|
||||
if (set) {
|
||||
__diag("verified %d was set, %s\n", i,
|
||||
was_set(i, array) ? "but we thought it was" : "because it wasn't");
|
||||
else
|
||||
} else {
|
||||
__diag("darn, %d was not really set, %s\n", i,
|
||||
was_set(i, array) ? "but we thought it was" : "because it wasn't");
|
||||
}
|
||||
}
|
||||
__diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set");
|
||||
rank = sparsemap_rank(map, 2048, 2060);
|
||||
|
@ -164,8 +162,9 @@ main(void)
|
|||
|
||||
for (i = 0; i < TEST_ARRAY_SIZE; i++) {
|
||||
array[i] = (int)__random(&prng) % 7000 + 1;
|
||||
if (array[i] < 0)
|
||||
if (array[i] < 0) {
|
||||
i--;
|
||||
}
|
||||
}
|
||||
// randomize setting the bits on
|
||||
shuffle(&prng, array, TEST_ARRAY_SIZE);
|
||||
|
@ -179,13 +178,14 @@ main(void)
|
|||
|
||||
size_t l = sparsemap_span(map, 0, 8);
|
||||
__diag("found span of 8 at %lu starting from 0\n", l);
|
||||
for (i = l; i < l + 8; i++) {
|
||||
for (i = (int)l; i < l + 8; i++) {
|
||||
bool set = sparsemap_is_set(map, l + i);
|
||||
if (set)
|
||||
if (set) {
|
||||
__diag("verified %lu was set\n", l + i);
|
||||
else
|
||||
} else {
|
||||
__diag("darn, %lu was not really set, %s\n", l + i,
|
||||
was_set(l + i, array) ? "but we thought it was" : "because it wasn't");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -17,13 +17,12 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <popcount.h>
|
||||
#include <sparsemap.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <popcount.h>
|
||||
#include <sparsemap.h>
|
||||
|
||||
#ifdef SPARSEMAP_DIAGNOSTIC
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
|
@ -59,7 +58,7 @@ enum __SM_CHUNK_INFO {
|
|||
/* metadata overhead: 4 bytes for __sm_chunk_t count */
|
||||
SM_SIZEOF_OVERHEAD = sizeof(uint32_t),
|
||||
|
||||
/* number of bits that can be stored in a BitVector */
|
||||
/* number of bits that can be stored in a sm_bitvec_t */
|
||||
SM_BITS_PER_VECTOR = (sizeof(sm_bitvec_t) * 8),
|
||||
|
||||
/* number of flags that can be stored in a single index byte */
|
||||
|
@ -161,7 +160,7 @@ __sm_chunk_map_get_position(__sm_chunk_t *map, size_t bv)
|
|||
/**
|
||||
* Initialize __sm_chunk_t with provided data.
|
||||
*/
|
||||
static void
|
||||
static inline void
|
||||
__sm_chunk_map_init(__sm_chunk_t *map, uint8_t *data)
|
||||
{
|
||||
map->m_data = (sm_bitvec_t *)data;
|
||||
|
@ -381,7 +380,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos,
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the 'nth' set bit; sets |*pnew_n| to 0 if the
|
||||
* Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the
|
||||
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|
|
||||
*/
|
||||
static size_t
|
||||
|
@ -438,10 +437,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
|
|||
}
|
||||
|
||||
/**
|
||||
* Counts the set bits in the range [0, idx].
|
||||
* Counts the set bits in the range [start, idx].
|
||||
*/
|
||||
static size_t
|
||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
|
||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx)
|
||||
{
|
||||
size_t ret = 0;
|
||||
|
||||
|
@ -454,25 +453,46 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx)
|
|||
}
|
||||
if (flags == SM_PAYLOAD_ZEROS) {
|
||||
if (idx > SM_BITS_PER_VECTOR) {
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
if (start > SM_BITS_PER_VECTOR) {
|
||||
start -= SM_BITS_PER_VECTOR;
|
||||
} else {
|
||||
idx -= SM_BITS_PER_VECTOR - start;
|
||||
start = 0;
|
||||
}
|
||||
} else {
|
||||
return (ret);
|
||||
}
|
||||
} else if (flags == SM_PAYLOAD_ONES) {
|
||||
if (idx > SM_BITS_PER_VECTOR) {
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
ret += SM_BITS_PER_VECTOR;
|
||||
if (start > SM_BITS_PER_VECTOR) {
|
||||
start -= SM_BITS_PER_VECTOR;
|
||||
} else {
|
||||
idx -= SM_BITS_PER_VECTOR - start;
|
||||
if (start == 0) {
|
||||
ret += SM_BITS_PER_VECTOR;
|
||||
}
|
||||
start = 0;
|
||||
}
|
||||
} else {
|
||||
return (ret + idx);
|
||||
}
|
||||
} else if (flags == SM_PAYLOAD_MIXED) {
|
||||
if (idx > SM_BITS_PER_VECTOR) {
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
ret += popcountll((uint64_t)map->m_data[1 +
|
||||
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]);
|
||||
if (start > SM_BITS_PER_VECTOR) {
|
||||
start -= SM_BITS_PER_VECTOR;
|
||||
} else {
|
||||
idx -= SM_BITS_PER_VECTOR - start;
|
||||
if (start == 0) {
|
||||
ret += popcountll((uint64_t)map->m_data[1 +
|
||||
__sm_chunk_map_get_position(map,
|
||||
i * SM_FLAGS_PER_INDEX_BYTE + j)]);
|
||||
}
|
||||
start = 0;
|
||||
}
|
||||
} else {
|
||||
sm_bitvec_t w = map->m_data[1 +
|
||||
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
||||
idx -= start;
|
||||
for (size_t k = 0; k < idx; k++) {
|
||||
if (w & ((sm_bitvec_t)1 << k)) {
|
||||
ret++;
|
||||
|
@ -703,6 +723,7 @@ __sm_insert_data(sparsemap_t *map, size_t offset, uint8_t *buffer,
|
|||
memmove(p + buffer_size, p, map->m_data_used - offset);
|
||||
memcpy(p, buffer, buffer_size);
|
||||
map->m_data_used += buffer_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -718,8 +739,8 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
|
|||
}
|
||||
|
||||
/**
|
||||
* Clears the whole buffer
|
||||
*/
|
||||
* Clears the whole buffer
|
||||
*/
|
||||
void
|
||||
sparsemap_clear(sparsemap_t *map)
|
||||
{
|
||||
|
@ -1152,16 +1173,16 @@ sparsemap_select(sparsemap_t *map, size_t n)
|
|||
}
|
||||
|
||||
/**
|
||||
* Counts the set bits in the range [offset, idx].
|
||||
* Counts the set bits in the range [loc, idx].
|
||||
*/
|
||||
size_t
|
||||
sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
|
||||
sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
|
||||
{
|
||||
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
||||
size_t result = 0;
|
||||
size_t count = __sm_get_chunk_map_count(map);
|
||||
|
||||
uint8_t *p = __sm_get_chunk_map_data(map, offset);
|
||||
uint8_t *p = __sm_get_chunk_map_data(map, 0);
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sm_idx_t start = *(sm_idx_t *)p;
|
||||
|
@ -1172,7 +1193,15 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
|
|||
__sm_chunk_t chunk;
|
||||
__sm_chunk_map_init(&chunk, p);
|
||||
|
||||
result += __sm_chunk_map_rank(&chunk, idx - start);
|
||||
/* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
|
||||
move to the next chunk. */
|
||||
size_t capacity = __sm_chunk_map_get_capacity(&chunk);
|
||||
if (loc < start || loc - start >= capacity) {
|
||||
loc -= capacity;
|
||||
continue;
|
||||
}
|
||||
|
||||
result += __sm_chunk_map_rank(&chunk, loc, idx - start);
|
||||
p += __sm_chunk_map_get_size(&chunk);
|
||||
}
|
||||
return (result);
|
||||
|
@ -1186,15 +1215,17 @@ sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
|
|||
size_t
|
||||
sparsemap_span(sparsemap_t *map, size_t loc, size_t len)
|
||||
{
|
||||
size_t size = 1024;
|
||||
size_t nth, count, size = 1024;
|
||||
|
||||
do {
|
||||
size_t nth = sparsemap_select(map, len);
|
||||
size_t count = sparsemap_rank(map, nth - len, nth);
|
||||
nth = sparsemap_select(map, nth);
|
||||
count = sparsemap_rank(map, nth, nth + len);
|
||||
if (count == len) {
|
||||
return nth - len;
|
||||
return nth;
|
||||
} else {
|
||||
nth += count;
|
||||
}
|
||||
} while ((loc = sparsemap_select(map, loc + 1, 1)) < size - len);
|
||||
} while (1); // TODO... ? until what?
|
||||
|
||||
return size;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue