compare against roaring bitmaps
This commit is contained in:
parent
57a8f99a32
commit
b9612f12cc
14 changed files with 28989 additions and 84 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,7 +3,7 @@
|
||||||
**/*.o
|
**/*.o
|
||||||
tests/test
|
tests/test
|
||||||
examples/ex_?
|
examples/ex_?
|
||||||
examples/soak
|
tests/soak
|
||||||
.cache
|
.cache
|
||||||
hints.txt
|
hints.txt
|
||||||
tmp/
|
tmp/
|
||||||
|
@ -28,6 +28,7 @@ compile_commands.json
|
||||||
*.dat
|
*.dat
|
||||||
*.fsm
|
*.fsm
|
||||||
*.db
|
*.db
|
||||||
|
.vscode/
|
||||||
|
|
||||||
# Created by https://www.gitignore.io/api/jetbrains
|
# Created by https://www.gitignore.io/api/jetbrains
|
||||||
# Edit at https://www.gitignore.io/?templates=jetbrains
|
# Edit at https://www.gitignore.io/?templates=jetbrains
|
||||||
|
|
58
Makefile
58
Makefile
|
@ -3,23 +3,25 @@ OBJS = sparsemap.o
|
||||||
STATIC_LIB = libsparsemap.a
|
STATIC_LIB = libsparsemap.a
|
||||||
SHARED_LIB = libsparsemap.so
|
SHARED_LIB = libsparsemap.so
|
||||||
|
|
||||||
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
|
LIBS = -lm
|
||||||
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
|
CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
|
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
|
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
|
|
||||||
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
|
TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
|
||||||
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
|
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
|
||||||
TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
|
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC $(LIBS)
|
||||||
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
|
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC $(LIBS)
|
||||||
|
|
||||||
TESTS = tests/test
|
TESTS = tests/test tests/soak
|
||||||
TEST_OBJS = tests/test.o tests/munit.o tests/tdigest.o tests/common.o
|
TEST_OBJS = tests/test.o lib/munit.o lib/tdigest.o lib/common.o
|
||||||
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4 examples/soak
|
LIB_OBJS = lib/munit.o lib/tdigest.o lib/common.o lib/roaring.o
|
||||||
|
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4
|
||||||
|
|
||||||
.PHONY: all shared static clean test examples mls
|
.PHONY: all shared static clean test examples mls
|
||||||
|
|
||||||
|
@ -35,10 +37,10 @@ $(STATIC_LIB): $(OBJS)
|
||||||
$(SHARED_LIB): $(OBJS)
|
$(SHARED_LIB): $(OBJS)
|
||||||
$(CC) $(CFLAGS) -o $@ $? -shared
|
$(CC) $(CFLAGS) -o $@ $? -shared
|
||||||
|
|
||||||
examples: $(STATIC_LIB) $(EXAMPLES) examples/common.o
|
examples: $(STATIC_LIB) $(EXAMPLES) $(TEST_OBJS)
|
||||||
|
|
||||||
soak: examples/soak.c
|
soak: tests/soak.c
|
||||||
examples/soak
|
tests/soak
|
||||||
|
|
||||||
mls: examples/mls
|
mls: examples/mls
|
||||||
|
|
||||||
|
@ -47,7 +49,7 @@ test: $(TESTS)
|
||||||
check: test
|
check: test
|
||||||
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
|
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
|
||||||
|
|
||||||
tests/test: $(TEST_OBJS) $(STATIC_LIB)
|
tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
|
||||||
$(CC) $^ -lm -o $@ $(TEST_FLAGS)
|
$(CC) $^ -lm -o $@ $(TEST_FLAGS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
@ -58,34 +60,34 @@ clean:
|
||||||
rm -f $(EXAMPLES) examples/*.o
|
rm -f $(EXAMPLES) examples/*.o
|
||||||
|
|
||||||
format:
|
format:
|
||||||
clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c examples/soak.c tests/test.c tests/common.c tests/common.h
|
clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c lib/common.c include/common.h
|
||||||
# clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c
|
# clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c
|
||||||
|
|
||||||
%.o: src/%.c
|
%.o: src/%.c
|
||||||
$(CC) $(CFLAGS) -c -o $@ $^
|
$(CC) $(CFLAGS) -c -o $@ $^
|
||||||
|
|
||||||
|
lib/%.o: tests/%.c
|
||||||
|
$(CC) $(CFLAGS) -c -o $@ $^
|
||||||
|
|
||||||
tests/%.o: tests/%.c
|
tests/%.o: tests/%.c
|
||||||
$(CC) $(CFLAGS) -c -o $@ $^
|
$(CC) $(CFLAGS) -c -o $@ $^
|
||||||
|
|
||||||
examples/%.o: examples/%.c
|
examples/%.o: examples/%.c
|
||||||
$(CC) $(CFLAGS) -c -o $@ $^
|
$(CC) $(CFLAGS) -c -o $@ $^
|
||||||
|
|
||||||
examples/common.o: tests/common.c
|
examples/ex_1: $(LIB_OBJS) examples/ex_1.o $(STATIC_LIB)
|
||||||
$(CC) $(CFLAGS) -c -o $@ $^
|
|
||||||
|
|
||||||
examples/ex_1: examples/common.o examples/ex_1.o $(STATIC_LIB)
|
|
||||||
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
||||||
|
|
||||||
examples/ex_2: examples/common.o examples/ex_2.o $(STATIC_LIB)
|
examples/ex_2: $(LIB_OBJS) examples/ex_2.o $(STATIC_LIB)
|
||||||
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
||||||
|
|
||||||
examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB)
|
examples/ex_3: $(LIB_OBJS) examples/ex_3.o $(STATIC_LIB)
|
||||||
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
||||||
|
|
||||||
examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB)
|
examples/ex_4: $(LIB_OBJS) examples/ex_4.o $(STATIC_LIB)
|
||||||
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
|
||||||
|
|
||||||
examples/soak: examples/common.o tests/tdigest.o examples/soak.o $(STATIC_LIB)
|
tests/soak: $(LIB_OBJS) tests/soak.o $(STATIC_LIB)
|
||||||
$(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS)
|
$(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS)
|
||||||
|
|
||||||
todo:
|
todo:
|
||||||
|
|
14
README.md
14
README.md
|
@ -1,5 +1,9 @@
|
||||||
# Sparsemap
|
# Sparsemap
|
||||||
|
|
||||||
|
Bitsets, also called bitmaps, are commonly used as fast data structures.
|
||||||
|
Unfortunately, they can use too much memory. To compensate, we often use
|
||||||
|
compressed bitmaps.
|
||||||
|
|
||||||
`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
|
`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
|
||||||
bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
|
bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
|
||||||
requires additional 8 bytes of overhead.
|
requires additional 8 bytes of overhead.
|
||||||
|
@ -14,7 +18,7 @@ On the lowest level stores bits in sm_bitvec_t's (a uint32_t or uint64_t).
|
||||||
|
|
||||||
Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
|
Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
|
||||||
to each sm_bitvec_t describes its condition. The descriptor word and the
|
to each sm_bitvec_t describes its condition. The descriptor word and the
|
||||||
sm_bitvec_t's have the same size.) The descriptor of a sm_bitvec_t
|
sm_bitvec_t's have the same size. The descriptor of a sm_bitvec_t
|
||||||
specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
|
specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
|
||||||
bits ("0") or has a mixed payload. In the first and second case the
|
bits ("0") or has a mixed payload. In the first and second case the
|
||||||
sm_bitvec_t is not stored.
|
sm_bitvec_t is not stored.
|
||||||
|
@ -45,7 +49,8 @@ offset 0, the second starts at offset 8192).
|
||||||
|
|
||||||
## Usage instructions
|
## Usage instructions
|
||||||
|
|
||||||
The file `examples/ex_1.c` has example code.
|
Copy the files `src/sparsemap.c` and `include/sparsemap.h` into your project.
|
||||||
|
Review the `examples/*` and `tests/*` code.
|
||||||
|
|
||||||
## Final words
|
## Final words
|
||||||
|
|
||||||
|
@ -58,7 +63,10 @@ However, if the sequence is not consecutive and has gaps, it's possible that
|
||||||
the compression is inefficient, and the size (in the worst case) is identical
|
the compression is inefficient, and the size (in the worst case) is identical
|
||||||
to an uncompressed bit vector (sometimes higher due to the bytes required for
|
to an uncompressed bit vector (sometimes higher due to the bytes required for
|
||||||
metadata). In such cases, other compression schemes are more efficient (i.e.
|
metadata). In such cases, other compression schemes are more efficient (i.e.
|
||||||
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).
|
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/). We
|
||||||
|
include in `lib` the amalgamated (git `2dc8070`) and well-known
|
||||||
|
[Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring/tree/master) and
|
||||||
|
use it in the soak test to ensure our results are as accurate as theirs.
|
||||||
|
|
||||||
This library was originally created for [hamsterdb](http://hamsterdb.com) in
|
This library was originally created for [hamsterdb](http://hamsterdb.com) in
|
||||||
C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
|
C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <common.h>
|
||||||
|
#include <sparsemap.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "../include/sparsemap.h"
|
|
||||||
#include "../tests/common.h"
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main(void)
|
main(void)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <common.h>
|
||||||
|
#include <sparsemap.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "../include/sparsemap.h"
|
|
||||||
#include "../tests/common.h"
|
|
||||||
|
|
||||||
#define TEST_ARRAY_SIZE 1024
|
#define TEST_ARRAY_SIZE 1024
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
62
flake.nix
62
flake.nix
|
@ -5,7 +5,6 @@
|
||||||
# nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
|
# nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/23.11";
|
nixpkgs.url = "github:NixOS/nixpkgs/23.11";
|
||||||
utils.url = "github:numtide/flake-utils";
|
utils.url = "github:numtide/flake-utils";
|
||||||
utils.inputs.nixpkgs.follows = "nixpkgs";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, ... }
|
outputs = { self, nixpkgs, ... }
|
||||||
|
@ -18,38 +17,39 @@
|
||||||
config.allowUnfree = true;
|
config.allowUnfree = true;
|
||||||
};
|
};
|
||||||
in {
|
in {
|
||||||
devShell = pkgs.mkShell rec {
|
flake-utils.inputs.systems.follows = "system";
|
||||||
name = "sparsemap";
|
devShell = pkgs.mkShell rec {
|
||||||
packages = with pkgs; [
|
name = "sparsemap";
|
||||||
act
|
packages = with pkgs; [
|
||||||
autoconf
|
act
|
||||||
clang
|
autoconf
|
||||||
ed
|
clang
|
||||||
gcc
|
ed
|
||||||
gdb
|
gcc
|
||||||
gettext
|
gdb
|
||||||
graphviz-nox
|
gettext
|
||||||
libtool
|
graphviz-nox
|
||||||
m4
|
libtool
|
||||||
perl
|
m4
|
||||||
pkg-config
|
perl
|
||||||
python3
|
pkg-config
|
||||||
ripgrep
|
python3
|
||||||
valgrind
|
ripgrep
|
||||||
];
|
valgrind
|
||||||
|
];
|
||||||
|
|
||||||
buildInputs = with pkgs; [
|
buildInputs = with pkgs; [
|
||||||
libbacktrace
|
libbacktrace
|
||||||
glibc.out
|
glibc.out
|
||||||
glibc.static
|
glibc.static
|
||||||
];
|
];
|
||||||
|
|
||||||
shellHook = let
|
shellHook = let
|
||||||
icon = "f121";
|
icon = "f121";
|
||||||
in ''
|
in ''
|
||||||
export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
|
export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
DOCKER_BUILDKIT = 1;
|
DOCKER_BUILDKIT = 1;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
2908
include/roaring.h
Normal file
2908
include/roaring.h
Normal file
File diff suppressed because it is too large
Load diff
25883
lib/roaring.c
Normal file
25883
lib/roaring.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1270,6 +1270,7 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other)
|
||||||
uint8_t *src, *dst;
|
uint8_t *src, *dst;
|
||||||
size_t src_count = __sm_get_chunk_map_count(other), dst_count = __sm_get_chunk_map_count(map), max_chunk_count = src_count + dst_count;
|
size_t src_count = __sm_get_chunk_map_count(other), dst_count = __sm_get_chunk_map_count(map), max_chunk_count = src_count + dst_count;
|
||||||
|
|
||||||
|
// TODO: ensure there is space, or ENOSPC
|
||||||
dst = __sm_get_chunk_map_data(map, 0);
|
dst = __sm_get_chunk_map_data(map, 0);
|
||||||
src = __sm_get_chunk_map_data(other, 0);
|
src = __sm_get_chunk_map_data(other, 0);
|
||||||
for (size_t i = 0; i < max_chunk_count && src_count; i++) {
|
for (size_t i = 0; i < max_chunk_count && src_count; i++) {
|
||||||
|
|
|
@ -6,9 +6,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "../include/common.h"
|
||||||
|
#include "../include/roaring.h"
|
||||||
#include "../include/sparsemap.h"
|
#include "../include/sparsemap.h"
|
||||||
#include "../tests/common.h"
|
#include "../include/tdigest.h"
|
||||||
#include "../tests/tdigest.h"
|
|
||||||
|
|
||||||
/* midl.h ------------------------------------------------------------------ */
|
/* midl.h ------------------------------------------------------------------ */
|
||||||
/** @defgroup idls ID List Management
|
/** @defgroup idls ID List Management
|
||||||
|
@ -511,6 +512,17 @@ verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
|
||||||
|
{
|
||||||
|
for (pgno_t i = pg; i < pg + len; i++) {
|
||||||
|
if (roaring_bitmap_contains(rbm, i) != true) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
|
verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
|
||||||
{
|
{
|
||||||
|
@ -533,6 +545,17 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
|
||||||
|
{
|
||||||
|
for (pgno_t i = 0; i < len; i++) {
|
||||||
|
if (roaring_bitmap_contains(rbm, pg + i) != false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
|
verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
|
||||||
{
|
{
|
||||||
|
@ -548,6 +571,23 @@ verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
verify_sm_eq_rm(sparsemap_t *map, roaring_bitmap_t *rbm)
|
||||||
|
{
|
||||||
|
uint64_t max = roaring_bitmap_maximum(rbm);
|
||||||
|
roaring_uint32_iterator_t iter;
|
||||||
|
roaring_iterator_init(rbm, &iter);
|
||||||
|
for (uint64_t i = 0; i <= max; i++) {
|
||||||
|
if (i == iter.current_value) {
|
||||||
|
assert(sparsemap_is_set(map, i) == true);
|
||||||
|
roaring_uint32_iterator_advance(&iter);
|
||||||
|
} else {
|
||||||
|
assert(sparsemap_is_set(map, i) == false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
|
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
|
||||||
{
|
{
|
||||||
|
@ -637,7 +677,6 @@ int
|
||||||
main(void)
|
main(void)
|
||||||
{
|
{
|
||||||
size_t replenish = 0, iterations = 0;
|
size_t replenish = 0, iterations = 0;
|
||||||
bool prefer_mdb_idl_location = (bool)xorshift32() % 2;
|
|
||||||
|
|
||||||
// disable buffering
|
// disable buffering
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -657,6 +696,7 @@ main(void)
|
||||||
sparsemap_idx_t amt = INITIAL_AMOUNT;
|
sparsemap_idx_t amt = INITIAL_AMOUNT;
|
||||||
MDB_IDL list = mdb_midl_alloc(amt);
|
MDB_IDL list = mdb_midl_alloc(amt);
|
||||||
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
|
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
|
||||||
|
roaring_bitmap_t *rbm = roaring_bitmap_create();
|
||||||
|
|
||||||
// start with 2GiB of 4KiB free pages to track:
|
// start with 2GiB of 4KiB free pages to track:
|
||||||
// - MDB_IDL requires one int for each free page
|
// - MDB_IDL requires one int for each free page
|
||||||
|
@ -665,16 +705,19 @@ main(void)
|
||||||
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
|
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
|
||||||
// We list every free (unallocated) page in the IDL, while...
|
// We list every free (unallocated) page in the IDL, while...
|
||||||
mdb_midl_xappend(list, pg);
|
mdb_midl_xappend(list, pg);
|
||||||
// ... true (unset in the bitmap) indicates free in the bitmap.
|
// ... true (unset in the bitmap) indicates free in the bitmap, ...
|
||||||
assert(_sparsemap_set(&map, pg, true) == pg);
|
assert(_sparsemap_set(&map, pg, true) == pg);
|
||||||
|
assert(roaring_bitmap_add_checked(rbm, pg));
|
||||||
}
|
}
|
||||||
mdb_midl_sort(list);
|
mdb_midl_sort(list);
|
||||||
|
roaring_bitmap_run_optimize(rbm);
|
||||||
assert(verify_sm_eq_ml(map, list));
|
assert(verify_sm_eq_ml(map, list));
|
||||||
|
assert(verify_sm_eq_rm(map, rbm));
|
||||||
|
|
||||||
double b, e;
|
double b, e;
|
||||||
while (1) {
|
while (1) {
|
||||||
unsigned mi;
|
unsigned mi;
|
||||||
pgno_t ml, sl;
|
pgno_t ml, sl, rl;
|
||||||
|
|
||||||
// get an amount [1, 16] of pages to find preferring smaller sizes
|
// get an amount [1, 16] of pages to find preferring smaller sizes
|
||||||
unsigned n = toss(15) + 1;
|
unsigned n = toss(15) + 1;
|
||||||
|
@ -707,6 +750,7 @@ main(void)
|
||||||
}
|
}
|
||||||
assert(verify_span_midl(list, ml, n));
|
assert(verify_span_midl(list, ml, n));
|
||||||
assert(verify_span_sparsemap(map, ml, n));
|
assert(verify_span_sparsemap(map, ml, n));
|
||||||
|
assert(verify_span_roaring(rbm, ml, n));
|
||||||
|
|
||||||
// find a set of pages using the Sparsemap
|
// find a set of pages using the Sparsemap
|
||||||
{
|
{
|
||||||
|
@ -720,9 +764,30 @@ main(void)
|
||||||
}
|
}
|
||||||
assert(verify_span_midl(list, sl, n));
|
assert(verify_span_midl(list, sl, n));
|
||||||
assert(verify_span_sparsemap(map, sl, n));
|
assert(verify_span_sparsemap(map, sl, n));
|
||||||
|
assert(verify_span_roaring(rbm, sl, n));
|
||||||
|
|
||||||
|
// find a set of pages using the Roaring Bitmap
|
||||||
|
{
|
||||||
|
b = nsts();
|
||||||
|
uint64_t max = roaring_bitmap_maximum(rbm);
|
||||||
|
uint64_t offset = roaring_bitmap_minimum(rbm);
|
||||||
|
do {
|
||||||
|
if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset++;
|
||||||
|
} while (offset <= max);
|
||||||
|
rl = offset;
|
||||||
|
e = nsts();
|
||||||
|
}
|
||||||
|
assert(verify_span_midl(list, rl, n));
|
||||||
|
assert(verify_span_sparsemap(map, rl, n));
|
||||||
|
assert(verify_span_roaring(rbm, rl, n));
|
||||||
|
|
||||||
|
bool prefer_mdb_idl_loc = (bool)xorshift32() % 2;
|
||||||
|
|
||||||
// acquire the set of pages within the list
|
// acquire the set of pages within the list
|
||||||
if (prefer_mdb_idl_location) {
|
if (prefer_mdb_idl_loc) {
|
||||||
b = nsts();
|
b = nsts();
|
||||||
unsigned j, num = n;
|
unsigned j, num = n;
|
||||||
int i = mi;
|
int i = mi;
|
||||||
|
@ -755,7 +820,7 @@ main(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
// acquire the set of pages within the sparsemap
|
// acquire the set of pages within the sparsemap
|
||||||
if (prefer_mdb_idl_location) {
|
if (prefer_mdb_idl_loc) {
|
||||||
b = nsts();
|
b = nsts();
|
||||||
for (pgno_t i = ml; i < ml + n; i++) {
|
for (pgno_t i = ml; i < ml + n; i++) {
|
||||||
assert(_sparsemap_set(&map, i, false) == i);
|
assert(_sparsemap_set(&map, i, false) == i);
|
||||||
|
@ -771,7 +836,20 @@ main(void)
|
||||||
td_add(b_span_take, e - b, 1);
|
td_add(b_span_take, e - b, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// acquire the set of pages within the roaring bitmap
|
||||||
|
if (prefer_mdb_idl_loc) {
|
||||||
|
b = nsts();
|
||||||
|
roaring_bitmap_remove_range(rbm, ml, ml + n);
|
||||||
|
e = nsts();
|
||||||
|
} else {
|
||||||
|
b = nsts();
|
||||||
|
roaring_bitmap_remove_range(rbm, sl, sl + n);
|
||||||
|
e = nsts();
|
||||||
|
}
|
||||||
|
roaring_bitmap_run_optimize(rbm);
|
||||||
|
|
||||||
assert(verify_sm_eq_ml(map, list));
|
assert(verify_sm_eq_ml(map, list));
|
||||||
|
assert(verify_sm_eq_rm(map, rbm));
|
||||||
|
|
||||||
// Once we've used a tenth of the free list, let's replenish it a bit.
|
// Once we've used a tenth of the free list, let's replenish it a bit.
|
||||||
if (list[0] < amt / 10) {
|
if (list[0] < amt / 10) {
|
||||||
|
@ -790,7 +868,9 @@ main(void)
|
||||||
if (SPARSEMAP_FOUND(pgno)) {
|
if (SPARSEMAP_FOUND(pgno)) {
|
||||||
assert(verify_empty_midl(list, pgno, len));
|
assert(verify_empty_midl(list, pgno, len));
|
||||||
assert(verify_empty_sparsemap(map, pgno, len));
|
assert(verify_empty_sparsemap(map, pgno, len));
|
||||||
|
assert(verify_empty_roaring(rbm, pgno, len));
|
||||||
assert(verify_sm_eq_ml(map, list));
|
assert(verify_sm_eq_ml(map, list));
|
||||||
|
assert(verify_sm_eq_rm(map, rbm));
|
||||||
if (list[-1] - list[0] < len) {
|
if (list[-1] - list[0] < len) {
|
||||||
mdb_midl_need(&list, list[-1] + len);
|
mdb_midl_need(&list, list[-1] + len);
|
||||||
}
|
}
|
||||||
|
@ -801,13 +881,16 @@ main(void)
|
||||||
assert(verify_midl_contains(list, i) == true);
|
assert(verify_midl_contains(list, i) == true);
|
||||||
assert(_sparsemap_set(&map, i, true) == i);
|
assert(_sparsemap_set(&map, i, true) == i);
|
||||||
assert(sparsemap_is_set(map, i) == true);
|
assert(sparsemap_is_set(map, i) == true);
|
||||||
|
assert(roaring_bitmap_add_checked(rbm, i) == true);
|
||||||
}
|
}
|
||||||
mdb_midl_sort(list);
|
mdb_midl_sort(list);
|
||||||
assert(verify_midl_nodups(list));
|
assert(verify_midl_nodups(list));
|
||||||
assert(verify_span_midl(list, pgno, len));
|
assert(verify_span_midl(list, pgno, len));
|
||||||
assert(verify_span_sparsemap(map, pgno, len));
|
assert(verify_span_sparsemap(map, pgno, len));
|
||||||
|
assert(verify_span_roaring(rbm, pgno, len));
|
||||||
}
|
}
|
||||||
assert(verify_sm_eq_ml(map, list));
|
assert(verify_sm_eq_ml(map, list));
|
||||||
|
assert(verify_sm_eq_rm(map, rbm));
|
||||||
replenish++;
|
replenish++;
|
||||||
} while (list[0] < amt - 32);
|
} while (list[0] < amt - 32);
|
||||||
}
|
}
|
||||||
|
@ -821,10 +904,10 @@ main(void)
|
||||||
size_t len = COUNT;
|
size_t len = COUNT;
|
||||||
// The largest page is at list[1] because this is a reverse sorted list.
|
// The largest page is at list[1] because this is a reverse sorted list.
|
||||||
pgno_t pg = list[0] ? list[1] + 1 : 0;
|
pgno_t pg = list[0] ? list[1] + 1 : 0;
|
||||||
// if (toss(6) + 1 < 7) {
|
if (true) { // disable shrinking for now... (toss(6) + 1 < 7)
|
||||||
if (true) { // disable shrinking for now...
|
|
||||||
MDB_IDL new_list = mdb_midl_alloc(len);
|
MDB_IDL new_list = mdb_midl_alloc(len);
|
||||||
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
|
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
|
||||||
|
roaring_bitmap_t *new_rbm = roaring_bitmap_create();
|
||||||
for (size_t i = 0; i < len; i++) {
|
for (size_t i = 0; i < len; i++) {
|
||||||
pgno_t gp = (pg + len) - i;
|
pgno_t gp = (pg + len) - i;
|
||||||
new_list[i + 1] = gp;
|
new_list[i + 1] = gp;
|
||||||
|
@ -832,8 +915,11 @@ main(void)
|
||||||
assert(verify_midl_contains(new_list, gp) == true);
|
assert(verify_midl_contains(new_list, gp) == true);
|
||||||
assert(_sparsemap_set(&new_map, gp, true) == gp);
|
assert(_sparsemap_set(&new_map, gp, true) == gp);
|
||||||
assert(sparsemap_is_set(new_map, gp));
|
assert(sparsemap_is_set(new_map, gp));
|
||||||
|
assert(roaring_bitmap_add_checked(new_rbm, gp));
|
||||||
|
assert(roaring_bitmap_contains(new_rbm, gp));
|
||||||
}
|
}
|
||||||
assert(verify_sm_eq_ml(new_map, new_list));
|
assert(verify_sm_eq_ml(new_map, new_list));
|
||||||
|
assert(verify_sm_eq_rm(new_map, new_rbm));
|
||||||
{
|
{
|
||||||
b = nsts();
|
b = nsts();
|
||||||
mdb_midl_append_list(&list, new_list);
|
mdb_midl_append_list(&list, new_list);
|
||||||
|
@ -856,19 +942,37 @@ main(void)
|
||||||
assert(sparsemap_is_set(map, gp));
|
assert(sparsemap_is_set(map, gp));
|
||||||
}
|
}
|
||||||
free(new_map);
|
free(new_map);
|
||||||
|
{
|
||||||
|
b = nsts();
|
||||||
|
roaring_bitmap_or_inplace(rbm, new_rbm);
|
||||||
|
e = nsts();
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
pgno_t gp = (pg + len) - i;
|
||||||
|
assert(roaring_bitmap_contains(rbm, gp));
|
||||||
|
}
|
||||||
|
roaring_free(new_rbm);
|
||||||
} else {
|
} else {
|
||||||
if (list[-1] > INITIAL_AMOUNT) {
|
if (list[-1] > INITIAL_AMOUNT) {
|
||||||
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
|
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
|
||||||
pgno_t pg;
|
{
|
||||||
for (size_t i = 0; i < COUNT; i++) {
|
pgno_t pg;
|
||||||
pg = list[list[0] - i];
|
for (size_t i = 0; i < COUNT; i++) {
|
||||||
assert(sparsemap_is_set(map, pg) == true);
|
pg = list[list[0] - i];
|
||||||
assert(_sparsemap_set(&map, pg, false) == pg);
|
assert(sparsemap_is_set(map, pg) == true);
|
||||||
|
assert(_sparsemap_set(&map, pg, false) == pg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
mdb_midl_shrink_to(&list, list[0] - COUNT);
|
||||||
}
|
}
|
||||||
mdb_midl_shrink_to(&list, list[0] - COUNT);
|
|
||||||
assert(list[list[0]] != pg);
|
assert(list[list[0]] != pg);
|
||||||
assert(verify_midl_nodups(list));
|
assert(verify_midl_nodups(list));
|
||||||
verify_sm_eq_ml(map, list);
|
verify_sm_eq_ml(map, list);
|
||||||
|
verify_sm_eq_rm(map, rbm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue