This commit is contained in:
Gregory Burd 2024-04-28 12:26:31 -04:00
parent 0297757856
commit c742185b73
6 changed files with 305 additions and 195 deletions

View file

@ -5,11 +5,13 @@ SHARED_LIB = libsparsemap.so
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
TESTS = tests/test
@ -77,11 +79,14 @@ examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB)
examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/soak: examples/common.o examples/soak.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/soak: examples/common.o tests/tdigest.o examples/soak.o $(STATIC_LIB)
$(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS)
todo:
rg -i 'todo|gsb|abort'
# cp src/sparsemap.c /tmp && clang-tidy src/sparsemap.c -fix -fix-errors -checks="readability-braces-around-statements" -- -DDEBUG -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
# clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
# clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./examples/soak

View file

@ -1,5 +1,6 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@ -7,6 +8,7 @@
#include "../include/sparsemap.h"
#include "../tests/common.h"
#include "../tests/tdigest.h"
/* midl.h ------------------------------------------------------------------ */
/** @defgroup idls ID List Management
@ -164,37 +166,60 @@ mdb_midl_search(MDB_IDL ids, MDB_ID id)
return cursor;
}
int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
int
mdb_midl_insert(MDB_IDL ids, MDB_ID id)
{
unsigned x, i;
unsigned x, i;
x = mdb_midl_search( ids, id );
assert( x > 0 );
x = mdb_midl_search(ids, id);
assert(x > 0);
if( x < 1 ) {
/* internal error */
return -2;
}
if (x < 1) {
/* internal error */
return -2;
}
if ( x <= ids[0] && ids[x] == id ) {
/* duplicate */
assert(0);
return -1;
}
if (x <= ids[0] && ids[x] == id) {
/* duplicate */
assert(0);
return -1;
}
if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
/* no room */
--ids[0];
return -2;
if (++ids[0] >= MDB_IDL_DB_MAX) {
/* no room */
--ids[0];
return -2;
} else {
/* insert id */
for (i=ids[0]; i>x; i--)
ids[i] = ids[i-1];
ids[x] = id;
}
} else {
/* insert id */
for (i = ids[0]; i > x; i--)
ids[i] = ids[i - 1];
ids[x] = id;
}
return 0;
return 0;
}
inline void
mdb_midl_popn(MDB_IDL ids, unsigned n)
{
ids[0] = ids[0] - n;
}
void
mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
{
for (int i = idx - 1; idx < ids[0] - 1;)
ids[++i] = ids[++idx];
ids[0] = ids[0] - 1;
}
void
mdb_midl_remove(MDB_IDL ids, MDB_ID id)
{
unsigned idx = mdb_midl_search(ids, id);
if (idx <= ids[0] && ids[idx] == id)
mdb_midl_remove_at(ids, idx);
}
MDB_IDL
@ -500,8 +525,8 @@ verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
bool
verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
{
for (pgno_t i = pg; i < pg + len; i++) {
if (sparsemap_is_set(map, i) != false) {
for (pgno_t i = 0; i < len; i++) {
if (sparsemap_is_set(map, pg + i) != false) {
return false;
}
}
@ -511,26 +536,25 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
bool
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
{
for (int i = 1; i <= list[0]; i++) {
for (MDB_ID i = 1; i <= list[0]; i++) {
pgno_t pg = list[i];
unsigned skipped = i == 1 ? 0 : list[i-1] - list[i] - 1;
for (int j = 0; j < skipped; j++) {
if (sparsemap_is_set(map, pg - j) != false)
return false;
unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1;
if (skipped) {
for (MDB_ID j = list[i - 1]; j > list[i]; j--) {
if (sparsemap_is_set(map, pg - j) != false) {
__diag("%zu\n", pg - j);
return false;
}
}
}
if (sparsemap_is_set(map, pg) != true)
if (sparsemap_is_set(map, pg) != true) {
__diag("%zu\n", pg);
return false;
}
}
return true;
}
void
stats(size_t iterations, sparsemap_t *map, MDB_IDL list)
{
char m[1024], l[1024];
__diag("%zu\tidl[%zu/%zu]: %s\tsm: %s\n", iterations, list[-1], list[0], bytes_as(MDB_IDL_SIZEOF(list), m, 1024), bytes_as(sparsemap_get_capacity(map), l, 1024));
}
sparsemap_idx_t
_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
{
@ -543,6 +567,47 @@ _sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
return l;
}
td_histogram_t *l_span_loc;
td_histogram_t *b_span_loc;
td_histogram_t *l_span_take;
td_histogram_t *b_span_take;
void
stats_header()
{
printf(
"iterations,idl_cap,idl_used,idl_bytes,sm_cap,sm_used,idl_loc_p50,idl_loc_p75,idl_loc_p90,idl_loc_p99,idl_loc_p999,sm_loc_p50,sm_loc_p75,sm_loc_p90,sm_loc_p99,sm_loc_p999,idl_take_p50,idl_take_p75,idl_take_p90,idl_take_p99,idl_take_p999,sm_take_p50,sm_take_p75,sm_take_p90,sm_take_p99,sm_take_p999\n");
}
void
stats(size_t iterations, sparsemap_t *map, MDB_IDL list)
{
if (iterations < 10)
return;
td_compress(l_span_loc);
td_compress(b_span_loc);
td_compress(l_span_take);
td_compress(b_span_take);
printf("%f,%zu,%zu,%zu,%zu,%zu,%zu,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f\n",
nsts(), iterations, list[-1], list[0], MDB_IDL_SIZEOF(list), sparsemap_get_capacity(map), sparsemap_get_size(map), td_quantile(l_span_loc, .5),
td_quantile(l_span_loc, .75), td_quantile(l_span_loc, .90), td_quantile(l_span_loc, .99), td_quantile(l_span_loc, .999), td_quantile(b_span_loc, .5),
td_quantile(b_span_loc, .75), td_quantile(b_span_loc, .90), td_quantile(b_span_loc, .99), td_quantile(b_span_loc, .999), td_quantile(l_span_take, .5),
td_quantile(l_span_take, .75), td_quantile(l_span_take, .90), td_quantile(l_span_take, .99), td_quantile(l_span_take, .999), td_quantile(b_span_take, .5),
td_quantile(b_span_take, .75), td_quantile(b_span_take, .90), td_quantile(b_span_take, .99), td_quantile(b_span_take, .999));
#if 0
static double pct[] = { .5, .75, .90, .99, .999 };
bytes_as(MDB_IDL_SIZEOF(list), m, 1024);
bytes_as(sparsemap_get_capacity(map), l, 1024);
for (int i = 0; i < 5; i++)
printf("%.10f,%.10f,%.10f,%.10f,%.10f", iterations, pct[i] * 100, td_quantile(l_span_loc, pct[i]), td_quantile(b_span_loc, pct[i]));
for (int i = 0; i < 5; i++)
__diag("%lu\tspan_take:\t%f l: %.10f\tb: %.10f\n", iterations, pct[i] * 100, td_quantile(l_span_take, pct[i]), td_quantile(b_span_take, pct[i]));
#endif
}
#define INITIAL_AMOUNT 1024 * 2
@ -552,24 +617,29 @@ _sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
int
main()
{
size_t iterations = 0;
size_t replenish = 0, iterations = 0;
bool prefer_mdb_idl_location = (bool)xorshift32() % 2;
// disable buffering
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
__diag("starting...\n");
l_span_loc = td_new(100);
b_span_loc = td_new(100);
l_span_take = td_new(100);
b_span_take = td_new(100);
size_t amt = INITIAL_AMOUNT;
stats_header();
sparsemap_idx_t amt = INITIAL_AMOUNT;
MDB_IDL list = mdb_midl_alloc(amt);
sparsemap_t *map = sparsemap(3 * 1024);
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
// start with 2GiB of 4KiB free pages to track:
// - MDB_IDL requires one int for each free page
// - Sparsemap will compress the set bits using less memory
mdb_midl_need(&list, amt);
for (size_t pg = 0; pg < amt; pg++) {
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
// We list every free (unallocated) page in the IDL, while...
mdb_midl_xappend(list, pg);
// ... true (unset in the bitmap) indicates free in the bitmap.
@ -579,20 +649,22 @@ main()
stats(0, map, list);
assert(verify_sm_eq_ml(map, list));
double b, e;
while (1) {
unsigned mi;
pgno_t ml = 0, sl = 0;
pgno_t ml, sl;
// get an amount [1, 16] of pages to find preferring smaller sizes
unsigned n = toss(15) + 1;
// find a set of pages using the MDB_IDL
{
b = nsts();
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
int retry = 1;
unsigned i;
unsigned i = 0;
pgno_t pgno = 0, *mop = list;
unsigned n2 = n, mop_len = mop[0];
if (mop_len > n2) {
@ -608,21 +680,27 @@ main()
search_done:;
ml = pgno;
mi = i;
e = nsts();
td_add(l_span_loc, e - b, 1);
}
assert(verify_span_midl(list, ml, n));
assert(verify_span_sparsemap(map, ml, n));
// find a set of pages using the Sparsemap
{
b = nsts();
pgno_t pgno = sparsemap_span(map, 0, n, true);
assert(SPARSEMAP_NOT_FOUND(pgno) == false);
sl = pgno;
e = nsts();
td_add(b_span_loc, e - b, 1);
}
assert(verify_span_midl(list, sl, n));
assert(verify_span_sparsemap(map, sl, n));
// acquire the set of pages within the list
if (prefer_mdb_idl_location) {
b = nsts();
unsigned j, num = n;
int i = mi;
pgno_t *mop = list;
@ -632,9 +710,12 @@ main()
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
td_add(l_span_take, e - b, 1);
} else {
b = nsts();
unsigned j, num = n;
int i = mdb_midl_search(list, sl) + num;
pgno_t *mop = list;
@ -644,17 +725,27 @@ main()
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
td_add(l_span_take, e - b, 1);
}
// acquire the set of pages within the sparsemap
if (prefer_mdb_idl_location) {
b = nsts();
for (pgno_t i = ml; i < ml + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
e = nsts();
td_add(b_span_take, e - b, 1);
} else {
b = nsts();
for (pgno_t i = sl; i <= sl + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
e = nsts();
td_add(b_span_take, e - b, 1);
}
assert(verify_sm_eq_ml(map, list));
@ -667,37 +758,50 @@ main()
do {
len = toss(15) + 1;
pg = sparsemap_span(map, 0, len, false);
//__diag("%zu\t%zu,%zu\n", iterations, replenish, retries);
} while (SPARSEMAP_NOT_FOUND(pg) && --retries);
if (retries == 0) {
goto larger_please;
}
if (SPARSEMAP_FOUND(pg)) {
assert(verify_empty_midl(list, pg, len));
assert(verify_empty_sparsemap(map, pg, len));
if (list[-1] - list[0] < len)
assert(verify_sm_eq_ml(map, list));
if (list[-1] - list[0] < len) {
mdb_midl_need(&list, list[-1] + len);
for (int i = pg; i < pg + len; i++) {
}
for (size_t i = pg; i < pg + len; i++) {
assert(verify_midl_contains(list, i) == false);
assert(sparsemap_is_set(map, i) == false);
mdb_midl_insert(list, i);
assert(verify_midl_contains(list, i) == true);
assert(_sparsemap_set(&map, i, true) == i);
assert(sparsemap_is_set(map, i) == true);
}
mdb_midl_sort(list);
assert(verify_midl_nodups(list));
assert(verify_span_midl(list, pg, len));
assert(verify_span_sparsemap(map, pg, len));
}
assert(verify_sm_eq_ml(map, list));
replenish++;
} while (list[0] < amt - 32);
}
stats(iterations, map, list);
replenish = 0;
// every 100 iterations, either ...
if (iterations % 100 == 0) {
// every so often, either ...
if (iterations % 1000 == 0) {
larger_please:;
const int COUNT = 1024;
if (toss(6) + 1 < 7) {
// ... add a MiB of 4KiB pages, or
// ... add COUNT 4KiB pages, or
int len = COUNT;
// The largest page is at list[1] because this is a reverse sorted list.
int pg = list[1] + 1;
if (list[0] + COUNT > list[-1])
pgno_t pg = list[1] + 1;
if (list[0] + COUNT > list[-1]) {
mdb_midl_grow(&list, list[0] + len);
for (int i = pg; i < pg + len; i++) {
}
for (size_t i = pg; i < pg + len; i++) {
assert(verify_midl_contains(list, i) == false);
assert(sparsemap_is_set(map, i) == false);
mdb_midl_insert(list, i);
@ -706,21 +810,25 @@ main()
mdb_midl_sort(list);
assert(verify_midl_nodups(list));
verify_sm_eq_ml(map, list);
amt += COUNT;
} else {
if (list[-1] > INITIAL_AMOUNT) {
// ... a fraction of the time, remove a MiB of 4KiB pages.
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
pgno_t pg;
for (int i = 0; i < COUNT; i++) {
pgno_t pg = list[list[0] - i];
pg = list[list[0] - i];
assert(sparsemap_is_set(map, pg) == true);
assert(_sparsemap_set(&map, pg, false) == pg) ;
assert(_sparsemap_set(&map, pg, false) == pg);
}
mdb_midl_shrink_to(&list, list[0] - COUNT);
assert(list[list[0]] != pg);
assert(verify_midl_nodups(list));
verify_sm_eq_ml(map, list);
}
}
}
iterations++;
stats(iterations, map, list);
}
return 0;

View file

@ -88,11 +88,10 @@ extern "C" {
*/
typedef struct sparsemap sparsemap_t;
typedef long int sparsemap_idx_t;
#define SPARSEMAP_IDX_MAX LONG_MAX
#define SPARSEMAP_IDX_MIN LONG_MIN
#define SPARSEMAP_FOUND(x) ((x) < SPARSEMAP_IDX_MAX || (x) > SPARSEMAP_IDX_MIN)
#define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX || (x) == SPARSEMAP_IDX_MIN)
typedef size_t sparsemap_idx_t;
#define SPARSEMAP_IDX_MAX SIZE_MAX
#define SPARSEMAP_FOUND(x) ((x) != SPARSEMAP_IDX_MAX)
#define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX)
typedef uint32_t sm_idx_t;
typedef uint64_t sm_bitvec_t;
@ -219,13 +218,9 @@ size_t sparsemap_get_capacity(sparsemap_t *map);
/** @brief Returns the value of a bit at index \b idx, either true for "set" (1)
* or \b false for "unset" (0).
*
* When |idx| is negative it is an error.
*
* @param[in] map The sparsemap reference.
* @param[in] idx The 0-based offset into the bitmap index to examine.
* @returns either true or false; a negative idx is an error and always returns
* false
* @todo Support for negative relative offset in \idx.
* @returns either true or false
*/
bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx);
@ -239,9 +234,8 @@ bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx);
*
* @param[in] map The sparsemap reference.
* @param[in] idx The 0-based offset into the bitmap index to modify.
* @returns the \b idx supplied on success or SPARSEMAP_IDX_MIN/MAX on error
* with \b errno set to ENOSPC when the map is full; a negative idx is an error
* and always returns SPARSEMAP_IDX_MIN.
* @returns the \b idx supplied on success or SPARSEMAP_IDX_MAX on error
* with \b errno set to ENOSPC when the map is full.
*/
sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value);
@ -277,7 +271,7 @@ void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *othe
/** @brief Finds the index of the \b n'th bit set to \b value.
*
* Locates the \b n'th bit either set, \b value is true, or unset, \b value is
* false, from the start, positive \b n, or end, negative \b n, of the bitmap.
* false, from the start of the bitmap.
* So, if your bit pattern is: ```1101 1110 1010 1101 1011 1110 1110 1111``` and
* you request the first set bit the result is `0` (meaning the 1st bit in the
* map which is index 0 because this is 0-based indexing). The first unset bit
@ -291,7 +285,7 @@ void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *othe
* @param[in] value Determines if the search is to examine set (true) or unset
* (false) bits in the bitmap index.
* @returns the 0-based index of the located bit position within the map; when
* not found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN.
* not found either SPARSEMAP_IDX_MAX.
*/
sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value);
@ -320,8 +314,8 @@ size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value);
* @param[in] len The length of contiguous bits we're seeking.
* @param[in] value Determines if the scan is to find all set (true) or unset
* (false) bits of \b len.
* @returns the index of the first bit matching the criteria; when not found not
* found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN.
* @returns the index of the first bit matching the criteria; when not found
* found SPARSEMAP_IDX_MAX
*/
size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value);

View file

@ -432,7 +432,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t offset, ssize_t *pnew_n, bool va
}
}
if (flags == SM_PAYLOAD_ONES) {
if (value) {
if (value == true) {
if (offset > SM_BITS_PER_VECTOR) {
offset -= SM_BITS_PER_VECTOR;
ret += SM_BITS_PER_VECTOR;
@ -584,16 +584,15 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, size_t *pos,
to count then call popcount(). So, let's create a mask for the range between
offset and idx inclusive [*offset, idx]. */
mask = idx_mask & offset_mask;
if (value == true) {
if (value) {
mw = w & mask;
} else {
mw = ~w & mask;
}
int pc = popcountll(mw);
ret += pc;
*vec = mw >> *offset;
*offset = *offset > idx ? *offset - idx + 1 : 0;
*vec = mw;
(*vec) <<= *offset;
return ret;
}
}
@ -748,46 +747,26 @@ __sm_get_aligned_offset(size_t idx)
static ssize_t
__sm_get_chunk_map_offset(sparsemap_t *map, sparsemap_idx_t idx)
{
int count;
count = __sm_get_chunk_map_count(map);
size_t count = __sm_get_chunk_map_count(map);
if (count == 0) {
return -1;
}
if (idx > 0 || idx == 0) {
uint8_t *start = __sm_get_chunk_map_data(map, 0);
uint8_t *p = start;
uint8_t *start = __sm_get_chunk_map_data(map, 0);
uint8_t *p = start;
for (sparsemap_idx_t i = 0; i < count - 1; i++) {
sm_idx_t s = *(sm_idx_t *)p;
__sm_assert(s == __sm_get_aligned_offset(s));
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p + sizeof(sm_idx_t));
if (s >= idx || (unsigned long)idx < s + __sm_chunk_map_get_capacity(&chunk)) {
break;
}
p += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&chunk);
for (sparsemap_idx_t i = 0; i < count - 1; i++) {
sm_idx_t s = *(sm_idx_t *)p;
__sm_assert(s == __sm_get_aligned_offset(s));
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p + sizeof(sm_idx_t));
if (s >= idx || (unsigned long)idx < s + __sm_chunk_map_get_capacity(&chunk)) {
break;
}
return (ssize_t)(p - start);
} else {
uint8_t *end = __sm_get_chunk_map_data(map, count - 1);
uint8_t *p = end;
for (sparsemap_idx_t i = count - 1; i >= 0; i--) {
sm_idx_t e = *(sm_idx_t *)p;
__sm_assert(e == __sm_get_aligned_offset(e));
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p + sizeof(sm_idx_t));
if (e >= idx || (unsigned long)idx < e + __sm_chunk_map_get_capacity(&chunk)) {
break;
}
p += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&chunk);
}
return (ssize_t)(p - end);
p += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&chunk);
}
return (ssize_t)(p - start);
}
/**
@ -967,10 +946,6 @@ sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx)
{
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
if (idx < 0) {
return false;
}
/* Get the __sm_chunk_t which manages this index */
ssize_t offset = __sm_get_chunk_map_offset(map, idx);
@ -1109,10 +1084,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
offset += (ssize_t)(sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t));
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t));
}
__sm_when_diag({
code = __sm_chunk_map_set(&chunk, idx - start, value, &position, &fill, true);
__sm_assert(code == SM_OK);
});
__sm_chunk_map_set(&chunk, idx - start, value, &position, &fill, true);
break;
case SM_NEEDS_TO_SHRINK:
/* If the __sm_chunk_t is empty then remove it. */
@ -1191,9 +1163,6 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
{
assert(offset % SM_BITS_PER_VECTOR == 0);
if (offset < 0)
return;
/* |dst| points to the destination buffer */
uint8_t *dst = __sm_get_chunk_map_end(other);
@ -1312,45 +1281,36 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value)
sm_idx_t start;
size_t count = __sm_get_chunk_map_count(map);
if (n >= 0) {
uint8_t *p = __sm_get_chunk_map_data(map, 0);
uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) {
start = *(sm_idx_t *)p;
/* Start of this chunk is greater than n meaning there are a set of 0s
before the first 1 sufficient to consume n. */
if (value == false && i == 0 && start > n) {
return n;
}
p += sizeof(sm_idx_t);
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
ssize_t new_n = (ssize_t)n;
size_t index = __sm_chunk_map_select(&chunk, n, &new_n, value);
if (new_n == -1) {
return start + index;
}
n = new_n;
p += __sm_chunk_map_get_size(&chunk);
for (size_t i = 0; i < count; i++) {
start = *(sm_idx_t *)p;
/* Start of this chunk is greater than n meaning there are a set of 0s
before the first 1 sufficient to consume n. */
if (value == false && i == 0 && start > n) {
return n;
}
if (value) {
return SPARSEMAP_IDX_MAX;
} else {
return count * SM_CHUNK_MAX_CAPACITY;
p += sizeof(sm_idx_t);
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
ssize_t new_n = (ssize_t)n;
size_t index = __sm_chunk_map_select(&chunk, n, &new_n, value);
if (new_n == -1) {
return start + index;
}
} else {
// TODO... sparsemap_select(map, -n, value); seek from end, not start
return SPARSEMAP_IDX_MIN;
n = new_n;
p += __sm_chunk_map_get_size(&chunk);
}
return SPARSEMAP_IDX_MAX;
}
size_t
sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec)
{
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t amt, gap, pos = 0, result = 0, prev = 0, count;
size_t amt, gap, pos = 0, result = 0, prev = 0, count, len = y - x + 1;
uint8_t *p;
if (x > y) {
@ -1362,7 +1322,7 @@ sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t
if (count == 0) {
if (value == false) {
/* The count/rank of unset bits in an empty map is inf, so what you requested is the answer. */
return y - x + 1;
return len;
}
}
@ -1371,14 +1331,43 @@ sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t
for (size_t i = 0; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)p;
/* [prev, start + pos), prev is the last bit examined 0-based. */
gap = start - (prev + pos);
if (i == 0) {
gap = start;
} else {
if (prev + SM_CHUNK_MAX_CAPACITY == start) {
gap = 0;
} else {
gap = start - (prev + pos);
}
}
/* Start of this chunk is greater than the end of the desired range. */
if (start > y) {
/* This chunk starts after our range [x, y]. */
if (value == true) {
/* We're counting set bits and this chunk starts after the range
[x, y], we're done. */
return result;
} else {
return result + (y - x) + 1;
if (i == 0) {
/* We're counting unset bits and the first chunk starts after the
range meaning everything proceeding this chunk was zero and should
be counted, also we're done. */
result += (y - x) + 1;
return result;
} else {
/* We're counting unset bits and some chunk starts after the range, so
we've counted enough, we're done. */
if (pos > y) {
return result;
} else {
if (y - pos < gap) {
result += y - pos;
return result;
} else {
result += gap;
return result;
}
}
}
}
} else {
/* The range and this chunk overlap. */
@ -1426,34 +1415,43 @@ sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value)
size_t
sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
{
size_t count, nth;
size_t rank, nth;
sm_bitvec_t vec = 0;
sparsemap_idx_t offset;
sparsemap_idx_t offset = 0;
nth = (idx > 0) ? sparsemap_rank(map, 0, idx - 1, value) : 0;
offset = sparsemap_select(map, nth++, value);
if (SPARSEMAP_NOT_FOUND(offset))
offset = 0;
else if (len == 1) {
return offset;
}
/* When skipping forward to `idx` offset in the map we can determine how
many selects we can avoid by taking the rank of the range and starting
at that bit. */
nth = (idx < 1) ? 0 : sparsemap_rank(map, 0, idx - 1, value);
/* Find the first bit that matches value, then... */
offset = sparsemap_select(map, nth, value);
do {
count = sparsemap_rank_vec(map, offset, offset + len - 1, value, &vec);
if (count >= len) {
return offset;
/* See if the rank of the bits in the range starting at offset is equal
to the desired amount. */
rank = len == 1 ? 1 : sparsemap_rank_vec(map, offset, offset + len - 1, value, &vec);
if (rank >= len) {
/* We've found what we're looking for, return the index of the first
bit in the range. */
break;
}
/* Now we try to jump forward as much as possible before we look for a
new match. We do this by counting the remaining bits in the returned
vec from the call to rank_vec(). */
int amt = 0;
if (vec == 0) {
/* The returned vec had no set bits, let's move forward in the map. */
amt = (rank == 0) ? len : 1;
} else {
// TODO: what is nth when len > SM_BITS_PER_VECTOR?
int c = len > SM_BITS_PER_VECTOR ? SM_BITS_PER_VECTOR : len;
for (int b = 0; b < c && (vec & 1 << b); b++) {
nth++;
/* We might be able to jump forward up to 64 bit positions saving us repeated
calls to select()/rank(). */
int max = len > SM_BITS_PER_VECTOR ? SM_BITS_PER_VECTOR : len;
while (amt < max && (vec & 1 << amt)) {
amt++;
}
}
if (count) {
nth++;
}
/* Use select to potentially jump very far forward in the map. */
nth += amt;
offset = sparsemap_select(map, nth, value);
} while (offset != SPARSEMAP_IDX_MAX);
} while (SPARSEMAP_FOUND(offset));
return idx >= 0 ? SPARSEMAP_IDX_MAX : SPARSEMAP_IDX_MIN;
return offset;
}

View file

@ -4,6 +4,8 @@
#include <assert.h>
#include <pthread.h> // If using threads
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -57,8 +59,9 @@ uint32_t
xorshift32()
{
uint32_t x = __xorshift32_state;
if (x == 0)
if (x == 0) {
x = 123456789;
}
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
@ -138,7 +141,8 @@ ensure_sequential_set(int a[], int l, int r)
// Generate a random value between min_value and max_value
int value = random_uint32() % (max_value - min_value - r + 1);
// Generate a random location between 0 and l - r
offset = random_uint32() % (l - r - 1);
int d = l - r - 1;
offset = d == 0 ? 0 : random_uint32() % d;
// Adjust the array to include a sequential set of 'r' integers at the random offset
for (int i = 0; i < r; ++i) {
@ -255,7 +259,7 @@ bool
is_set(const int array[], int bit)
{
for (int i = 0; i < 1024; i++) {
if (array[i] == (int)bit) {
if (array[i] == bit) {
return true;
}
}
@ -290,8 +294,9 @@ whats_set_uint64(uint64_t number, int pos[64])
void
setup_test_array(int a[], int l, int max_value)
{
if (a == NULL || max_value < 0)
if (a == NULL || max_value < 0) {
return; // Basic error handling and validation
}
for (int i = 0; i < l; ++i) {
int candidate;

View file

@ -671,8 +671,8 @@ test_api_select_false(const MunitParameter params[], void *data)
assert_ptr_not_null(map);
/* First few 0/off/unset-bits in ((uint64_t)0xfeedface << 32) | 0xbadc0ffee) expressed as an array of offsets. */
int off[] = { 0, 4, 16, 17, 18, 19, 20, 21, 25, 28, 30, 36, 37, 40, 42, 49, 52, 56, 64, 65 };
for (int i = 0; i < 20; i++) {
size_t off[] = { 0, 4, 16, 17, 18, 19, 20, 21, 25, 28, 30, 36, 37, 40, 42, 49, 52, 56, 64, 65 };
for (size_t i = 0; i < 20; i++) {
sparsemap_idx_t f = sparsemap_select(map, i, false);
assert_true(f == off[i]);
assert_true(sparsemap_is_set(map, f) == false);
@ -776,10 +776,10 @@ test_api_rank_true(const MunitParameter params[], void *data)
}
sparsemap_idx_t hole = 4999;
sparsemap_set(map, hole, false);
for (int i = 0; i < 10000; i++) {
for (int j = i; j < 10000; j++) {
int amt = (i > j) ? 0 : j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
int r = sparsemap_rank(map, i, j, true);
for (size_t i = 0; i < 10000; i++) {
for (size_t j = i; j < 10000; j++) {
size_t amt = (i > j) ? 0 : j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
size_t r = sparsemap_rank(map, i, j, true);
assert_true(r == amt);
}
}
@ -826,8 +826,8 @@ test_api_rank_false(const MunitParameter params[], void *data)
// One chunk means not so empty now!
sparsemap_idx_t hole = 4999;
sparsemap_set(map, hole, true);
for (int i = 0; i < 10000; i++) {
for (int j = i; j < 10000; j++) {
for (size_t i = 0; i < 10000; i++) {
for (size_t j = i; j < 10000; j++) {
int amt = (i > j) ? 0 : j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
r = sparsemap_rank(map, i, j, false);
assert_true(r == amt);
@ -1046,7 +1046,7 @@ test_scale_fuzz(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
(void)map; //TODO...
(void)map; // TODO...
return MUNIT_OK;
}