compare against roaring bitmaps

This commit is contained in:
Gregory Burd 2024-05-03 15:15:39 -04:00
parent 57a8f99a32
commit b9612f12cc
14 changed files with 28989 additions and 84 deletions

3
.gitignore vendored
View file

@ -3,7 +3,7 @@
**/*.o **/*.o
tests/test tests/test
examples/ex_? examples/ex_?
examples/soak tests/soak
.cache .cache
hints.txt hints.txt
tmp/ tmp/
@ -28,6 +28,7 @@ compile_commands.json
*.dat *.dat
*.fsm *.fsm
*.db *.db
.vscode/
# Created by https://www.gitignore.io/api/jetbrains # Created by https://www.gitignore.io/api/jetbrains
# Edit at https://www.gitignore.io/?templates=jetbrains # Edit at https://www.gitignore.io/?templates=jetbrains

View file

@ -3,23 +3,25 @@ OBJS = sparsemap.o
STATIC_LIB = libsparsemap.a STATIC_LIB = libsparsemap.a
SHARED_LIB = libsparsemap.so SHARED_LIB = libsparsemap.so
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC LIBS = -lm
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC $(LIBS)
CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC $(LIBS)
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC $(LIBS)
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC $(LIBS)
TESTS = tests/test TESTS = tests/test tests/soak
TEST_OBJS = tests/test.o tests/munit.o tests/tdigest.o tests/common.o TEST_OBJS = tests/test.o lib/munit.o lib/tdigest.o lib/common.o
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4 examples/soak LIB_OBJS = lib/munit.o lib/tdigest.o lib/common.o lib/roaring.o
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4
.PHONY: all shared static clean test examples mls .PHONY: all shared static clean test examples mls
@ -35,10 +37,10 @@ $(STATIC_LIB): $(OBJS)
$(SHARED_LIB): $(OBJS) $(SHARED_LIB): $(OBJS)
$(CC) $(CFLAGS) -o $@ $? -shared $(CC) $(CFLAGS) -o $@ $? -shared
examples: $(STATIC_LIB) $(EXAMPLES) examples/common.o examples: $(STATIC_LIB) $(EXAMPLES) $(TEST_OBJS)
soak: examples/soak.c soak: tests/soak.c
examples/soak tests/soak
mls: examples/mls mls: examples/mls
@ -47,7 +49,7 @@ test: $(TESTS)
check: test check: test
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
tests/test: $(TEST_OBJS) $(STATIC_LIB) tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
$(CC) $^ -lm -o $@ $(TEST_FLAGS) $(CC) $^ -lm -o $@ $(TEST_FLAGS)
clean: clean:
@ -58,34 +60,34 @@ clean:
rm -f $(EXAMPLES) examples/*.o rm -f $(EXAMPLES) examples/*.o
format: format:
clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c examples/soak.c tests/test.c tests/common.c tests/common.h clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c lib/common.c include/common.h
# clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c # clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c
%.o: src/%.c %.o: src/%.c
$(CC) $(CFLAGS) -c -o $@ $^ $(CC) $(CFLAGS) -c -o $@ $^
lib/%.o: tests/%.c
$(CC) $(CFLAGS) -c -o $@ $^
tests/%.o: tests/%.c tests/%.o: tests/%.c
$(CC) $(CFLAGS) -c -o $@ $^ $(CC) $(CFLAGS) -c -o $@ $^
examples/%.o: examples/%.c examples/%.o: examples/%.c
$(CC) $(CFLAGS) -c -o $@ $^ $(CC) $(CFLAGS) -c -o $@ $^
examples/common.o: tests/common.c examples/ex_1: $(LIB_OBJS) examples/ex_1.o $(STATIC_LIB)
$(CC) $(CFLAGS) -c -o $@ $^
examples/ex_1: examples/common.o examples/ex_1.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_2: examples/common.o examples/ex_2.o $(STATIC_LIB) examples/ex_2: $(LIB_OBJS) examples/ex_2.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB) examples/ex_3: $(LIB_OBJS) examples/ex_3.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB) examples/ex_4: $(LIB_OBJS) examples/ex_4.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS) $(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/soak: examples/common.o tests/tdigest.o examples/soak.o $(STATIC_LIB) tests/soak: $(LIB_OBJS) tests/soak.o $(STATIC_LIB)
$(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS) $(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS)
todo: todo:

View file

@ -1,5 +1,9 @@
# Sparsemap # Sparsemap
Bitsets, also called bitmaps, are commonly used as fast data structures.
Unfortunately, they can use too much memory. To compensate, we often use
compressed bitmaps.
`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048 `sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
requires additional 8 bytes of overhead. requires additional 8 bytes of overhead.
@ -14,7 +18,7 @@ On the lowest level stores bits in sm_bitvec_t's (a uint32_t or uint64_t).
Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
to each sm_bitvec_t describes its condition. The descriptor word and the to each sm_bitvec_t describes its condition. The descriptor word and the
sm_bitvec_t's have the same size.) The descriptor of a sm_bitvec_t sm_bitvec_t's have the same size. The descriptor of a sm_bitvec_t
specifies whether the sm_bitvec_t consists only of set bits ("1"), unset specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
bits ("0") or has a mixed payload. In the first and second case the bits ("0") or has a mixed payload. In the first and second case the
sm_bitvec_t is not stored. sm_bitvec_t is not stored.
@ -45,7 +49,8 @@ offset 0, the second starts at offset 8192).
## Usage instructions ## Usage instructions
The file `examples/ex_1.c` has example code. Copy the files `src/sparsemap.c` and `include/sparsemap.h` into your project.
Review the `examples/*` and `tests/*` code.
## Final words ## Final words
@ -58,7 +63,10 @@ However, if the sequence is not consecutive and has gaps, it's possible that
the compression is inefficient, and the size (in the worst case) is identical the compression is inefficient, and the size (in the worst case) is identical
to an uncompressed bit vector (sometimes higher due to the bytes required for to an uncompressed bit vector (sometimes higher due to the bytes required for
metadata). In such cases, other compression schemes are more efficient (i.e. metadata). In such cases, other compression schemes are more efficient (i.e.
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/). http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/). We
include in `lib` the amalgamated (git `2dc8070`) and well-known
[Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring/tree/master) and
use it in the soak test to ensure our results are as accurate as theirs.
This library was originally created for [hamsterdb](http://hamsterdb.com) in This library was originally created for [hamsterdb](http://hamsterdb.com) in
C++ and then translated to C and further improved by Greg Burd <greg@burd.me> C++ and then translated to C and further improved by Greg Burd <greg@burd.me>

View file

@ -1,11 +1,10 @@
#include <assert.h> #include <assert.h>
#include <common.h>
#include <sparsemap.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "../include/sparsemap.h"
#include "../tests/common.h"
int int
main(void) main(void)
{ {

View file

@ -1,11 +1,10 @@
#include <assert.h> #include <assert.h>
#include <common.h>
#include <sparsemap.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include "../include/sparsemap.h"
#include "../tests/common.h"
#define TEST_ARRAY_SIZE 1024 #define TEST_ARRAY_SIZE 1024
int int

View file

@ -5,7 +5,6 @@
# nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; # nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/23.11"; nixpkgs.url = "github:NixOS/nixpkgs/23.11";
utils.url = "github:numtide/flake-utils"; utils.url = "github:numtide/flake-utils";
utils.inputs.nixpkgs.follows = "nixpkgs";
}; };
outputs = { self, nixpkgs, ... } outputs = { self, nixpkgs, ... }
@ -18,38 +17,39 @@
config.allowUnfree = true; config.allowUnfree = true;
}; };
in { in {
devShell = pkgs.mkShell rec { flake-utils.inputs.systems.follows = "system";
name = "sparsemap"; devShell = pkgs.mkShell rec {
packages = with pkgs; [ name = "sparsemap";
act packages = with pkgs; [
autoconf act
clang autoconf
ed clang
gcc ed
gdb gcc
gettext gdb
graphviz-nox gettext
libtool graphviz-nox
m4 libtool
perl m4
pkg-config perl
python3 pkg-config
ripgrep python3
valgrind ripgrep
]; valgrind
];
buildInputs = with pkgs; [ buildInputs = with pkgs; [
libbacktrace libbacktrace
glibc.out glibc.out
glibc.static glibc.static
]; ];
shellHook = let shellHook = let
icon = "f121"; icon = "f121";
in '' in ''
export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]" export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
''; '';
}; };
DOCKER_BUILDKIT = 1; DOCKER_BUILDKIT = 1;
}); });
} }

2908
include/roaring.h Normal file

File diff suppressed because it is too large Load diff

25883
lib/roaring.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -1270,6 +1270,7 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other)
uint8_t *src, *dst; uint8_t *src, *dst;
size_t src_count = __sm_get_chunk_map_count(other), dst_count = __sm_get_chunk_map_count(map), max_chunk_count = src_count + dst_count; size_t src_count = __sm_get_chunk_map_count(other), dst_count = __sm_get_chunk_map_count(map), max_chunk_count = src_count + dst_count;
// TODO: ensure there is space, or ENOSPC
dst = __sm_get_chunk_map_data(map, 0); dst = __sm_get_chunk_map_data(map, 0);
src = __sm_get_chunk_map_data(other, 0); src = __sm_get_chunk_map_data(other, 0);
for (size_t i = 0; i < max_chunk_count && src_count; i++) { for (size_t i = 0; i < max_chunk_count && src_count; i++) {

View file

@ -6,9 +6,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "../include/common.h"
#include "../include/roaring.h"
#include "../include/sparsemap.h" #include "../include/sparsemap.h"
#include "../tests/common.h" #include "../include/tdigest.h"
#include "../tests/tdigest.h"
/* midl.h ------------------------------------------------------------------ */ /* midl.h ------------------------------------------------------------------ */
/** @defgroup idls ID List Management /** @defgroup idls ID List Management
@ -511,6 +512,17 @@ verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len)
return true; return true;
} }
bool
verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
{
for (pgno_t i = pg; i < pg + len; i++) {
if (roaring_bitmap_contains(rbm, i) != true) {
return false;
}
}
return true;
}
bool bool
verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
{ {
@ -533,6 +545,17 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
return true; return true;
} }
bool
verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
{
for (pgno_t i = 0; i < len; i++) {
if (roaring_bitmap_contains(rbm, pg + i) != false) {
return false;
}
}
return true;
}
bool bool
verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
{ {
@ -548,6 +571,23 @@ verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t
return false; return false;
} }
bool
verify_sm_eq_rm(sparsemap_t *map, roaring_bitmap_t *rbm)
{
uint64_t max = roaring_bitmap_maximum(rbm);
roaring_uint32_iterator_t iter;
roaring_iterator_init(rbm, &iter);
for (uint64_t i = 0; i <= max; i++) {
if (i == iter.current_value) {
assert(sparsemap_is_set(map, i) == true);
roaring_uint32_iterator_advance(&iter);
} else {
assert(sparsemap_is_set(map, i) == false);
}
}
return true;
}
bool bool
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
{ {
@ -637,7 +677,6 @@ int
main(void) main(void)
{ {
size_t replenish = 0, iterations = 0; size_t replenish = 0, iterations = 0;
bool prefer_mdb_idl_location = (bool)xorshift32() % 2;
// disable buffering // disable buffering
#ifdef DEBUG #ifdef DEBUG
@ -657,6 +696,7 @@ main(void)
sparsemap_idx_t amt = INITIAL_AMOUNT; sparsemap_idx_t amt = INITIAL_AMOUNT;
MDB_IDL list = mdb_midl_alloc(amt); MDB_IDL list = mdb_midl_alloc(amt);
sparsemap_t *map = sparsemap(INITIAL_AMOUNT); sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
roaring_bitmap_t *rbm = roaring_bitmap_create();
// start with 2GiB of 4KiB free pages to track: // start with 2GiB of 4KiB free pages to track:
// - MDB_IDL requires one int for each free page // - MDB_IDL requires one int for each free page
@ -665,16 +705,19 @@ main(void)
for (sparsemap_idx_t pg = 0; pg < amt; pg++) { for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
// We list every free (unallocated) page in the IDL, while... // We list every free (unallocated) page in the IDL, while...
mdb_midl_xappend(list, pg); mdb_midl_xappend(list, pg);
// ... true (unset in the bitmap) indicates free in the bitmap. // ... true (unset in the bitmap) indicates free in the bitmap, ...
assert(_sparsemap_set(&map, pg, true) == pg); assert(_sparsemap_set(&map, pg, true) == pg);
assert(roaring_bitmap_add_checked(rbm, pg));
} }
mdb_midl_sort(list); mdb_midl_sort(list);
roaring_bitmap_run_optimize(rbm);
assert(verify_sm_eq_ml(map, list)); assert(verify_sm_eq_ml(map, list));
assert(verify_sm_eq_rm(map, rbm));
double b, e; double b, e;
while (1) { while (1) {
unsigned mi; unsigned mi;
pgno_t ml, sl; pgno_t ml, sl, rl;
// get an amount [1, 16] of pages to find preferring smaller sizes // get an amount [1, 16] of pages to find preferring smaller sizes
unsigned n = toss(15) + 1; unsigned n = toss(15) + 1;
@ -707,6 +750,7 @@ main(void)
} }
assert(verify_span_midl(list, ml, n)); assert(verify_span_midl(list, ml, n));
assert(verify_span_sparsemap(map, ml, n)); assert(verify_span_sparsemap(map, ml, n));
assert(verify_span_roaring(rbm, ml, n));
// find a set of pages using the Sparsemap // find a set of pages using the Sparsemap
{ {
@ -720,9 +764,30 @@ main(void)
} }
assert(verify_span_midl(list, sl, n)); assert(verify_span_midl(list, sl, n));
assert(verify_span_sparsemap(map, sl, n)); assert(verify_span_sparsemap(map, sl, n));
assert(verify_span_roaring(rbm, sl, n));
// find a set of pages using the Roaring Bitmap
{
b = nsts();
uint64_t max = roaring_bitmap_maximum(rbm);
uint64_t offset = roaring_bitmap_minimum(rbm);
do {
if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) {
break;
}
offset++;
} while (offset <= max);
rl = offset;
e = nsts();
}
assert(verify_span_midl(list, rl, n));
assert(verify_span_sparsemap(map, rl, n));
assert(verify_span_roaring(rbm, rl, n));
bool prefer_mdb_idl_loc = (bool)xorshift32() % 2;
// acquire the set of pages within the list // acquire the set of pages within the list
if (prefer_mdb_idl_location) { if (prefer_mdb_idl_loc) {
b = nsts(); b = nsts();
unsigned j, num = n; unsigned j, num = n;
int i = mi; int i = mi;
@ -755,7 +820,7 @@ main(void)
} }
// acquire the set of pages within the sparsemap // acquire the set of pages within the sparsemap
if (prefer_mdb_idl_location) { if (prefer_mdb_idl_loc) {
b = nsts(); b = nsts();
for (pgno_t i = ml; i < ml + n; i++) { for (pgno_t i = ml; i < ml + n; i++) {
assert(_sparsemap_set(&map, i, false) == i); assert(_sparsemap_set(&map, i, false) == i);
@ -771,7 +836,20 @@ main(void)
td_add(b_span_take, e - b, 1); td_add(b_span_take, e - b, 1);
} }
// acquire the set of pages within the roaring bitmap
if (prefer_mdb_idl_loc) {
b = nsts();
roaring_bitmap_remove_range(rbm, ml, ml + n);
e = nsts();
} else {
b = nsts();
roaring_bitmap_remove_range(rbm, sl, sl + n);
e = nsts();
}
roaring_bitmap_run_optimize(rbm);
assert(verify_sm_eq_ml(map, list)); assert(verify_sm_eq_ml(map, list));
assert(verify_sm_eq_rm(map, rbm));
// Once we've used a tenth of the free list, let's replenish it a bit. // Once we've used a tenth of the free list, let's replenish it a bit.
if (list[0] < amt / 10) { if (list[0] < amt / 10) {
@ -790,7 +868,9 @@ main(void)
if (SPARSEMAP_FOUND(pgno)) { if (SPARSEMAP_FOUND(pgno)) {
assert(verify_empty_midl(list, pgno, len)); assert(verify_empty_midl(list, pgno, len));
assert(verify_empty_sparsemap(map, pgno, len)); assert(verify_empty_sparsemap(map, pgno, len));
assert(verify_empty_roaring(rbm, pgno, len));
assert(verify_sm_eq_ml(map, list)); assert(verify_sm_eq_ml(map, list));
assert(verify_sm_eq_rm(map, rbm));
if (list[-1] - list[0] < len) { if (list[-1] - list[0] < len) {
mdb_midl_need(&list, list[-1] + len); mdb_midl_need(&list, list[-1] + len);
} }
@ -801,13 +881,16 @@ main(void)
assert(verify_midl_contains(list, i) == true); assert(verify_midl_contains(list, i) == true);
assert(_sparsemap_set(&map, i, true) == i); assert(_sparsemap_set(&map, i, true) == i);
assert(sparsemap_is_set(map, i) == true); assert(sparsemap_is_set(map, i) == true);
assert(roaring_bitmap_add_checked(rbm, i) == true);
} }
mdb_midl_sort(list); mdb_midl_sort(list);
assert(verify_midl_nodups(list)); assert(verify_midl_nodups(list));
assert(verify_span_midl(list, pgno, len)); assert(verify_span_midl(list, pgno, len));
assert(verify_span_sparsemap(map, pgno, len)); assert(verify_span_sparsemap(map, pgno, len));
assert(verify_span_roaring(rbm, pgno, len));
} }
assert(verify_sm_eq_ml(map, list)); assert(verify_sm_eq_ml(map, list));
assert(verify_sm_eq_rm(map, rbm));
replenish++; replenish++;
} while (list[0] < amt - 32); } while (list[0] < amt - 32);
} }
@ -821,10 +904,10 @@ main(void)
size_t len = COUNT; size_t len = COUNT;
// The largest page is at list[1] because this is a reverse sorted list. // The largest page is at list[1] because this is a reverse sorted list.
pgno_t pg = list[0] ? list[1] + 1 : 0; pgno_t pg = list[0] ? list[1] + 1 : 0;
// if (toss(6) + 1 < 7) { if (true) { // disable shrinking for now... (toss(6) + 1 < 7)
if (true) { // disable shrinking for now...
MDB_IDL new_list = mdb_midl_alloc(len); MDB_IDL new_list = mdb_midl_alloc(len);
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT); sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
roaring_bitmap_t *new_rbm = roaring_bitmap_create();
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i; pgno_t gp = (pg + len) - i;
new_list[i + 1] = gp; new_list[i + 1] = gp;
@ -832,8 +915,11 @@ main(void)
assert(verify_midl_contains(new_list, gp) == true); assert(verify_midl_contains(new_list, gp) == true);
assert(_sparsemap_set(&new_map, gp, true) == gp); assert(_sparsemap_set(&new_map, gp, true) == gp);
assert(sparsemap_is_set(new_map, gp)); assert(sparsemap_is_set(new_map, gp));
assert(roaring_bitmap_add_checked(new_rbm, gp));
assert(roaring_bitmap_contains(new_rbm, gp));
} }
assert(verify_sm_eq_ml(new_map, new_list)); assert(verify_sm_eq_ml(new_map, new_list));
assert(verify_sm_eq_rm(new_map, new_rbm));
{ {
b = nsts(); b = nsts();
mdb_midl_append_list(&list, new_list); mdb_midl_append_list(&list, new_list);
@ -856,19 +942,37 @@ main(void)
assert(sparsemap_is_set(map, gp)); assert(sparsemap_is_set(map, gp));
} }
free(new_map); free(new_map);
{
b = nsts();
roaring_bitmap_or_inplace(rbm, new_rbm);
e = nsts();
}
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(roaring_bitmap_contains(rbm, gp));
}
roaring_free(new_rbm);
} else { } else {
if (list[-1] > INITIAL_AMOUNT) { if (list[-1] > INITIAL_AMOUNT) {
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages. // ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
pgno_t pg; {
for (size_t i = 0; i < COUNT; i++) { pgno_t pg;
pg = list[list[0] - i]; for (size_t i = 0; i < COUNT; i++) {
assert(sparsemap_is_set(map, pg) == true); pg = list[list[0] - i];
assert(_sparsemap_set(&map, pg, false) == pg); assert(sparsemap_is_set(map, pg) == true);
assert(_sparsemap_set(&map, pg, false) == pg);
}
}
{
roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]);
}
{
mdb_midl_shrink_to(&list, list[0] - COUNT);
} }
mdb_midl_shrink_to(&list, list[0] - COUNT);
assert(list[list[0]] != pg); assert(list[list[0]] != pg);
assert(verify_midl_nodups(list)); assert(verify_midl_nodups(list));
verify_sm_eq_ml(map, list); verify_sm_eq_ml(map, list);
verify_sm_eq_rm(map, rbm);
} }
} }
} }