Compare commits

...

47 commits

Author SHA1 Message Date
6c8ad3b25f shrink the buffer as chunks empty (#12)
Reviewed-on: #12
Co-authored-by: Greg Burd <greg@burd.me>
Co-committed-by: Greg Burd <greg@burd.me>
2024-07-02 13:07:25 +00:00
2f6e9bdf7b Merge pull request 'fuzz testing' (#11) from gburd/fixing-the-fuzz into main
Reviewed-on: #11
2024-06-27 08:13:15 +00:00
7335c8e1d8 find a span of unset bits in an empty map 2024-06-27 04:10:08 -04:00
f7fd8eed9a spelling 2024-05-23 19:59:24 -04:00
7ecd2e5dc2 cleanup 2024-05-21 11:59:07 -04:00
eae0743b56 rewrite soak with more flexibility and ability to record/playback events to reproduce bugs (#10)
Reviewed-on: #10
2024-05-21 00:29:26 +00:00
68c8cd0858 cmake 2024-05-16 22:13:27 +00:00
e398d014e8 fixes 2024-05-16 12:00:09 -04:00
a5c62cfe1e Add CMake build 2024-05-16 10:38:33 -04:00
7bb26dbe88 vector offset, fix span 2024-05-15 22:03:36 -04:00
69dd960558 use chunk alignment; new fill factor API 2024-05-15 15:50:15 -04:00
b028408150 split/merge (#9)
Reviewed-on: #9
Co-authored-by: Greg Burd <greg@burd.me>
Co-committed-by: Greg Burd <greg@burd.me>
2024-05-15 17:57:40 +00:00
f5b500087d split 2024-05-13 12:46:25 -04:00
604d48617e revert split and merge for now 2024-05-13 02:07:45 +00:00
7a572453c9 Merge pull request 'gburd/full-merge' (#8) from gburd/full-merge into main
Reviewed-on: #8
2024-05-11 01:26:44 +00:00
6990f94278 Merge branch 'main' into gburd/full-merge 2024-05-11 01:26:19 +00:00
984a1a920d working 2024-05-11 01:25:15 +00:00
e641e6cc63 WIP 2024-05-10 20:26:40 +00:00
32721da645 disable neovim for a bit 2024-05-10 20:26:40 +00:00
cea31ab184 disable neovim for a bit 2024-05-10 15:32:50 +00:00
f525a097c7 WIP 2024-05-10 11:27:43 -04:00
Greg Burd
a24a4e1cc5
fixes 2024-05-10 09:40:05 -04:00
08e8c0a5d1 chunk align 2024-05-09 16:01:28 -04:00
7494a9a3a2 many new API; fixes 2024-05-09 15:50:56 -04:00
0e348efaf6 a few more fixes 2024-05-07 08:47:56 -04:00
367d15a160 integrat ecrupp review suggestions 2024-05-06 15:43:47 -04:00
3b8e083806 add copy() 2024-05-05 16:46:07 -04:00
c53bd53f9a try using env to force ssh key 2024-05-04 20:23:52 +00:00
a2b1a22a79 fix 2024-05-04 09:45:43 -04:00
1909954b42 merge amt 2024-05-04 09:44:46 -04:00
cbd53976f9 merge when out of space 2024-05-04 09:39:19 -04:00
40de89bd8a fixes 2024-05-03 22:15:36 +00:00
756476561f updates 2024-05-03 22:14:11 +00:00
4e2b4bde26 spelling 2024-05-03 21:12:57 +00:00
9dccdcbf76 spelling 2024-05-03 21:00:30 +00:00
ad910f0adf add clang-format, etc. 2024-05-03 20:56:14 +00:00
7c9ecc6d79 fixes to ide 2024-05-03 20:47:41 +00:00
gregburd
ea32274524 add Google IDX config 2024-05-03 20:32:14 +00:00
4bf1a5a00c grow map when merge requires it 2024-05-03 16:07:46 -04:00
2afbfa946e fixups 2024-05-03 16:03:09 -04:00
ffc762a796 cleanup 2024-05-03 15:24:57 -04:00
fcab709f62 roaring and sparsemap should identify the same spans 2024-05-03 15:20:35 -04:00
b9612f12cc compare against roaring bitmaps 2024-05-03 15:15:39 -04:00
Greg Burd
57a8f99a32
fixes 2024-05-02 21:35:37 -04:00
a7754b05ba cleanup 2024-05-02 21:13:17 -04:00
86798b32bd formatting 2024-05-02 14:57:56 -04:00
c9ae042b40 fix scan 2024-05-02 14:55:04 -04:00
26 changed files with 32185 additions and 1475 deletions

View file

@ -3,8 +3,11 @@ Checks: >
bugprone-*,
clang-analyzer-*,
google-*,
misc-*,
-google-objectivec-*,
modernize-*,
-modernize-deprecated-headers,
-modernize-use-using,
misc-*,
performance-*,
portability-*,
-bugprone-branch-clone,

3
.gitignore vendored
View file

@ -3,7 +3,7 @@
**/*.o
tests/test
examples/ex_?
examples/soak
tests/soak
.cache
hints.txt
tmp/
@ -28,6 +28,7 @@ compile_commands.json
*.dat
*.fsm
*.db
.vscode/
# Created by https://www.gitignore.io/api/jetbrains
# Edit at https://www.gitignore.io/?templates=jetbrains

15
.idea/customTargets.xml Normal file
View file

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CLionExternalBuildManager">
<target id="db0ccaeb-4851-470b-83d0-afa663f6ceb9" name="tests/soak" defaultType="MAKE">
<configuration id="98973a90-a9d0-431b-9071-9ce6960b0b01" name="tests/soak">
<build type="MAKE">
<make targetName="tests/soak" />
</build>
<clean type="MAKE">
<make targetName="clean" />
</clean>
</configuration>
</target>
</component>
</project>

25
.idea/makefile.xml Normal file
View file

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MakefileBuildTargetsManager">
<user-build-targets>
<build-target name="tests/soak">
<build-configurations>
<build-configuration>
<make-targets>
<make-target>tests/soak</make-target>
</make-targets>
</build-configuration>
</build-configurations>
</build-target>
<build-target name="clean">
<build-configurations>
<build-configuration>
<make-targets>
<make-target>clean</make-target>
</make-targets>
</build-configuration>
</build-configurations>
</build-target>
</user-build-targets>
</component>
</project>

View file

@ -8,6 +8,7 @@
<sourceRoots>
<file path="$PROJECT_DIR$/examples" />
<file path="$PROJECT_DIR$/include" />
<file path="$PROJECT_DIR$/lib" />
<file path="$PROJECT_DIR$/src" />
<file path="$PROJECT_DIR$/tests" />
</sourceRoots>

105
.idx/dev.nix Normal file
View file

@ -0,0 +1,105 @@
# To learn more about how to use Nix to configure your environment
# see: https://developers.google.com/idx/guides/customize-idx-env
{ pkgs, ... }: {
# Which nixpkgs channel to use.
channel = "stable-23.11"; # or "unstable"
# Use https://search.nixos.org/packages to find packages
packages = with pkgs; [
act
autoconf
clang
clang-tools
cmake
ed
fira-code-nerdfont
gcc
gdb
gettext
glibc.out
glibc.static
gnumake
graphviz-nox
libbacktrace
libtool
lldb
m4
neovim
ninja
openssh
perl
pkg-config
python3
ripgrep
# pkgs.python311
# pkgs.python311Packages.pip
];
# Sets environment variables in the workspace
env = {
GIT_SSH_COMMAND="ssh -i ~/.ssh/id_ed25519 -F /dev/null";
};
idx = {
# Search for the extensions you want on https://open-vsx.org/ and use "publisher.id"
extensions = [
"coolbear.systemd-unit-file"
"dotjoshjohnson.xml"
"editorconfig.editorconfig"
"esbenp.prettier-vscode"
"mads-hartmann.bash-ide-vscode"
"ms-python.python"
"ms-vscode.clangd"
"ms-vscode.cmake-tools"
"ms-vscode.cpptools"
"ms-vscode.cpptools-extension-pack"
"ms-vscode.makefile-tools"
"ms-vsliveshare.vsliveshare"
"redhat.vscode-yaml"
"rogalmic.bash-debug"
"ryu1kn.partial-diff"
"streetsidesoftware.code-spell-checker"
"timonwong.shellcheck"
"twxs.cmake"
"vadimcn.vscode-lldb"
#"vscode-icons-team.vscode-icons"
"yzhang.markdown-all-in-one"
"znck.grammarly"
#"llvm-vs-code-extensions.vscode-clangd"
#"eamodio.gitlens"
"asvetliakov.vscode-neovim"
#"golang.go"
#"jnoortheen.nix-ide"
#"ms-python.vscode-pylance"
#"mspython.debugpy"
#"scala-lang.scala"
#"scalameta.metals"
#"vscodevim.vim"
];
# Enable previews
previews = {
enable = true;
previews = {
# web = {
# # Example: run "npm run dev" with PORT set to IDX's defined port for previews,
# # and show it in IDX's web preview panel
# command = ["npm" "run" "dev"];
# manager = "web";
# env = {
# # Environment variables to set for your server
# PORT = "$PORT";
# };
# };
};
};
# Workspace lifecycle hooks
workspace = {
# Runs when a workspace is first created
onCreate = {
# Example: install JS dependencies from NPM
# npm-install = 'npm install';
};
onStart = {
# Example: start a background task to watch and re-build backend code
# watch-backend = "npm run watch-backend";
};
};
};
}

94
CMakeLists.txt Normal file
View file

@ -0,0 +1,94 @@
cmake_minimum_required(VERSION 3.27)
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
message(FATAL_ERROR "Do not build in-source. Please remove CMakeCache.txt and the CMakeFiles/ directory. Then build out-of-source.")
endif()
project(sparsemap LANGUAGES C)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_C_OUTPUT_EXTENSION .o)
# Set source and header file locations
set(SOURCE_DIR src)
set(HEADER_DIR include)
set(COMMON_CMAKE_C_FLAGS "-Wall -Wextra -Wpedantic")
set(CMAKE_C_FLAGS_DEBUG "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -O0")
set(CMAKE_C_FLAGS_PROFILE "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -Og -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope")
set(CMAKE_C_FLAGS_RELEASE "-Ofast")
# Include all header files from the header directory
file(GLOB_RECURSE HEADERS CONFIGURE_FILES ${HEADER_DIR}/*.h)
# Configure library sources
set(LIB_SRC
${SOURCE_DIR}/sparsemap.c
)
# Option to control building shared/static libraries
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
# Add shared library
add_library(sparsemap_SHARED SHARED ${LIB_SRC} ${HEADERS})
# Set target properties for the shared library (adjust if needed)
set_target_properties(sparsemap_SHARED PROPERTIES
VERSION 1.0.0 # Set library version
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" # Set output directory
COMPILE_FLAGS "${CMAKE_C_FLAGS_${CMAKE_CURRENT_LIST_MODE}}"
)
target_include_directories(sparsemap_SHARED PRIVATE ${HEADER_DIR})
# Add static library
add_library(sparsemap STATIC ${LIB_SRC} ${HEADERS})
# Set target properties for static library (adjust if needed)
set_target_properties(sparsemap PROPERTIES
OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" # Set output directory
)
target_include_directories(sparsemap PRIVATE ${HEADER_DIR})
# Add ex_1 program
add_executable(ex_1 examples/ex_1.c tests/munit.c lib/common.c)
target_link_libraries(ex_1 PRIVATE sparsemap)
target_include_directories(ex_1 PRIVATE ${HEADER_DIR})
add_custom_target(run_ex_1 COMMAND ex_1 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add ex_2 program
add_executable(ex_2 examples/ex_2.c tests/munit.c lib/common.c)
target_link_libraries(ex_2 PRIVATE sparsemap)
target_include_directories(ex_2 PRIVATE ${HEADER_DIR})
add_custom_target(run_ex_2 COMMAND ex_2 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add ex_3 program
add_executable(ex_3 examples/ex_3.c tests/munit.c lib/common.c)
target_link_libraries(ex_3 PRIVATE sparsemap)
target_include_directories(ex_3 PRIVATE ${HEADER_DIR})
add_custom_target(run_ex_3 COMMAND ex_3 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add ex_4 program
add_executable(ex_4 examples/ex_4.c tests/munit.c lib/common.c)
target_link_libraries(ex_4 PRIVATE sparsemap)
target_include_directories(ex_4 PRIVATE ${HEADER_DIR})
add_custom_target(run_ex_4 COMMAND ex_4 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add test program
add_executable(test tests/test.c tests/munit.c lib/common.c)
target_link_libraries(test PRIVATE sparsemap)
target_include_directories(test PRIVATE ${HEADER_DIR})
add_custom_target(run_test COMMAND test WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add soak program
add_executable(soak tests/soak.c lib/common.c lib/tdigest.c lib/roaring.c)
target_link_libraries(soak PRIVATE sparsemap)
target_include_directories(soak PRIVATE ${HEADER_DIR} lib)
target_link_libraries(soak PUBLIC m)
add_custom_target(run_soak COMMAND soak WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
# Add fuzzer program
# add_executable(fuzzer tests/fuzzer.c)
# target_link_libraries(fuzzer PRIVATE sparsemap)
# target_include_directories(fuzzer PRIVATE ${HEADER_DIR} lib)
# target_link_libraries(fuzzer PUBLIC m)
# add_custom_target(run_fuzzer COMMAND fuzzer WORKING_DIRECTORY ${CMAKE_BINARY_DIR})

View file

@ -3,23 +3,25 @@ OBJS = sparsemap.o
STATIC_LIB = libsparsemap.a
SHARED_LIB = libsparsemap.so
LIBS = -lm
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Ofast -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
TESTS = tests/test
TEST_OBJS = tests/test.o tests/munit.o tests/tdigest.o tests/common.o
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4 examples/soak
TESTS = tests/test tests/soak
TEST_OBJS = tests/test.o lib/munit.o lib/tdigest.o lib/common.o
LIB_OBJS = lib/munit.o lib/tdigest.o lib/common.o lib/roaring.o
EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4
.PHONY: all shared static clean test examples mls
@ -35,17 +37,23 @@ $(STATIC_LIB): $(OBJS)
$(SHARED_LIB): $(OBJS)
$(CC) $(CFLAGS) -o $@ $? -shared
examples: $(STATIC_LIB) $(EXAMPLES) examples/common.o
examples: $(STATIC_LIB) $(EXAMPLES) $(TEST_OBJS)
mls: examples/mls
test: $(TESTS)
tests: $(TESTS)
check: test
test: tests
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
tests/test: $(TEST_OBJS) $(STATIC_LIB)
$(CC) $^ -lm -o $@ $(TEST_FLAGS)
soak: tests
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak
fuzzer: tests
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/fuzzer ./crash.case
tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
clean:
rm -f $(OBJS)
@ -55,35 +63,38 @@ clean:
rm -f $(EXAMPLES) examples/*.o
format:
clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c examples/soak.c tests/test.c tests/common.c tests/common.h
clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c tests/midl.c lib/common.c include/common.h
# clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c
%.o: src/%.c
$(CC) $(CFLAGS) -c -o $@ $^
lib/%.o: tests/%.c
$(CC) $(CFLAGS) -c -o $@ $^
tests/%.o: tests/%.c
$(CC) $(CFLAGS) -c -o $@ $^
examples/%.o: examples/%.c
$(CC) $(CFLAGS) -c -o $@ $^
examples/common.o: tests/common.c
$(CC) $(CFLAGS) -c -o $@ $^
examples/ex_1: $(LIB_OBJS) examples/ex_1.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
examples/ex_1: examples/common.o examples/ex_1.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_2: $(LIB_OBJS) examples/ex_2.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
examples/ex_2: examples/common.o examples/ex_2.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_3: $(LIB_OBJS) examples/ex_3.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
examples/ex_4: $(LIB_OBJS) examples/ex_4.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB)
$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
tests/soak: $(LIB_OBJS) tests/soak.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
examples/soak: examples/common.o tests/tdigest.o examples/soak.o $(STATIC_LIB)
$(CC) $^ -lm -o $@ $(CFLAGS) $(TEST_FLAGS)
tests/fuzzer: $(LIB_OBJS) tests/fuzzer.o $(STATIC_LIB)
$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS) -DFUZZ_DEBUG
todo:
rg -i 'todo|gsb|abort'

View file

@ -1,8 +1,12 @@
# Sparsemap
`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
requires additional 8 bytes of overhead.
Bitsets, also called bitmaps, are commonly used as fast data structures.
Unfortunately, they can use too much memory. To compensate, we often use
compressed bitmaps.
`sparsemap` is a sparse, compressed bitmap. In the best case, it can store 2048
bits in just 8 bytes. In the worst case, it stores the 2048 bits uncompressed and
requires an additional 8 bytes of overhead.
The "best" case happens when large consecutive sequences of the bits are
either set ("1") or not set ("0"). If your numbers are consecutive 64bit
@ -14,9 +18,9 @@ On the lowest level stores bits in sm_bitvec_t's (a uint32_t or uint64_t).
Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
to each sm_bitvec_t describes its condition. The descriptor word and the
sm_bitvec_t's have the same size.) The descriptor of a sm_bitvec_t
sm_bitvec_t's have the same size. The descriptor of a sm_bitvec_t
specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
bits ("0") or has a mixed payload. In the first and second case the
bits ("0") or has a mixed payload. In the first and second cases, the
sm_bitvec_t is not stored.
An example shows a sequence of 4 x 16 bits (here, each sm_bitvec_t and the
@ -27,7 +31,7 @@ Descriptor word has 16 bits):
^^ ^^ ^^ ^^-- sm_bitvec_t #0 - #3 are "0000000000000000"
^^-- sm_bitvec_t #4 is "1111111111111111"
^^-- sm_bitvec_t #5 is "0000000000000000"
^^-- sm_bitvec_t #7 is "1111111111111111"
^^-- sm_bitvec_t #6 is "1111111111111111"
^^-- sm_bitvec_t #7 is "0110010101111001"
Since the first 7 sm_bitvec_t's are either all "1" or "0" they are not stored.
@ -36,30 +40,34 @@ The actual memory sequence looks like this:
0000000011001110 0110010101111001
Instead of storing 8 Words (16 bytes), we only store 2 Words (2 bytes): one
for the descriptor, one for last sm_bitvec_t #7.
for the descriptor, and one for the last sm_bitvec_t #7.
The sparsemap stores a list of chunk maps, and for each chunk map it stores the
The sparsemap stores a list of chunk maps, and for each chunk map, it stores the
absolute address (i.e. if the user sets bit 0 and bit 10000, and the chunk map
capacity is 2048, the sparsemap creates two chunk maps; the first starts at
offset 0, the second starts at offset 8192).
## Usage instructions
The file `examples/ex_1.c` has example code.
Copy the files `src/sparsemap.c` and `include/sparsemap.h` into your project.
Review the `examples/*` and `tests/*` code.
## Final words
This bitmap has efficient compression when used on long sequences of set (or
unset) bits (i.e. with a word size of 64bit, and a payload of consecutive
unset) bits (i.e. with a word size of 64 bit and a payload of consecutive
numbers without gaps, the payload of 2048 x sizeof(uint64_t) = 16kb will occupy
only 8 bytes!
only 8 bytes!).
However, if the sequence is not consecutive and has gaps, it's possible that
the compression is inefficient, and the size (in the worst case) is identical
to an uncompressed bit vector (sometimes higher due to the bytes required for
metadata). In such cases, other compression schemes are more efficient (i.e.
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/). We
include in `lib` the amalgamated (git `2dc8070`) and well-known
[Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring/tree/master) and
use it in the soak test to ensure our results are as accurate as theirs.
This library was originally created for [hamsterdb](http://hamsterdb.com) in
This library was originally created by [Christoph Rupp](https://crupp.de) in
C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
for use in LMDB and OpenLDAP.

14
cmake-it.sh Executable file
View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
target=${1:-Debug}
set targets="Debug Profile Release"
case "$target" in
$targets*) echo "Building ${target}..." ;;
*) echo "Unknown target ${target}, exiting." ;;
esac
name=${target,,}
echo $name
rm -rf "./cmake-build-${name}-system" && \
cmake -DCMAKE_BUILD_TYPE=${target} -DCMAKE_MAKE_PROGRAM=ninja -DCMAKE_C_COMPILER=clang -G Ninja -S "${PWD}" -B "${PWD}/cmake-build-${name}-system" && \
(cd "${PWD}/cmake-build-${name}-system" && ninja)

View file

@ -186,15 +186,17 @@ main()
sparsemap_clear(map);
for (int i = 0; i < 2048 * 3; i++) {
sparsemap_set(map, i, true);
assert(sparsemap_is_set(map, i) == true);
}
sparsemap_split(map, 64, sm2);
for (int i = 0; i < 64; i++) {
assert(sparsemap_is_set(map, i) == true);
assert(sparsemap_is_set(sm2, i) == false);
}
for (int i = 64; i < 2048 * 3; i++) {
assert(sparsemap_is_set(map, i) == false);
assert(sparsemap_is_set(sm2, i) == true);
for (int i = 0; i < 2048 * 3; i++) {
if (i < 64) {
assert(sparsemap_is_set(map, i) == true);
assert(sparsemap_is_set(sm2, i) == false);
} else {
assert(sparsemap_is_set(map, i) == false);
assert(sparsemap_is_set(sm2, i) == true);
}
}
fprintf(stderr, " ok\n");

View file

@ -1,11 +1,10 @@
#include <assert.h>
#include <common.h>
#include <sparsemap.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "../include/sparsemap.h"
#include "../tests/common.h"
int
main(void)
{

View file

@ -1,11 +1,10 @@
#include <assert.h>
#include <common.h>
#include <sparsemap.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "../include/sparsemap.h"
#include "../tests/common.h"
#define TEST_ARRAY_SIZE 1024
int

View file

@ -1,864 +0,0 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../include/sparsemap.h"
#include "../tests/common.h"
#include "../tests/tdigest.h"
/* midl.h ------------------------------------------------------------------ */
/** @defgroup idls ID List Management
* @{
*/
/** A generic unsigned ID number. These were entryIDs in back-bdb.
* Preferably it should have the same size as a pointer.
*/
typedef size_t MDB_ID;
/** An IDL is an ID List, a sorted array of IDs. The first
* element of the array is a counter for how many actual
* IDs are in the list. In the original back-bdb code, IDLs are
* sorted in ascending order. For libmdb IDLs are sorted in
* descending order.
*/
typedef MDB_ID *MDB_IDL;
/* IDL sizes - likely should be even bigger
* limiting factors: sizeof(ID), thread stack size
*/
#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN)
#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1))
#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1)
#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1)
#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID))
#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0)
#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src)))
#define MDB_IDL_FIRST(ids) ((ids)[1])
#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]])
/** Current max length of an #mdb_midl_alloc()ed IDL */
#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1])
/** Append ID to IDL. The IDL must be big enough. */
#define mdb_midl_xappend(idl, id) \
do { \
MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
xidl[xlen] = (id); \
} while (0)
/** Search for an ID in an IDL.
* @param[in] ids The IDL to search.
* @param[in] id The ID to search for.
* @return The index of the first ID greater than or equal to \b id.
*/
unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id);
/** Allocate an IDL.
* Allocates memory for an IDL of the given size.
* @return IDL on success, NULL on failure.
*/
MDB_IDL mdb_midl_alloc(int num);
/** Free an IDL.
* @param[in] ids The IDL to free.
*/
void mdb_midl_free(MDB_IDL ids);
/** Shrink an IDL.
* Return the IDL to the default size if it has grown larger.
* @param[in,out] idp Address of the IDL to shrink.
*/
void mdb_midl_shrink(MDB_IDL *idp);
/** Shrink an IDL to a specific size.
* Resize the IDL to \b size if it is larger.
* @param[in,out] idp Address of the IDL to shrink.
* @param[in] size Capacity to have once resized.
*/
void mdb_midl_shrink(MDB_IDL *idp);
/** Make room for num additional elements in an IDL.
* @param[in,out] idp Address of the IDL.
* @param[in] num Number of elements to make room for.
* @return 0 on success, ENOMEM on failure.
*/
int mdb_midl_need(MDB_IDL *idp, unsigned num);
/** Append an ID onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The ID to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append(MDB_IDL *idp, MDB_ID id);
/** Append an IDL onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] app The IDL to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app);
/** Append an ID range onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The lowest ID to append.
* @param[in] n Number of IDs to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n);
/** Merge an IDL onto an IDL. The destination IDL must be big enough.
* @param[in] idl The IDL to merge into.
* @param[in] merge The IDL to merge.
*/
void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge);
/** Sort an IDL.
* @param[in,out] ids The IDL to sort.
*/
void mdb_midl_sort(MDB_IDL ids);
/* midl.c ------------------------------------------------------------------ */
/** @defgroup idls ID List Management
* @{
*/
#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y))
unsigned
mdb_midl_search(MDB_IDL ids, MDB_ID id)
{
/*
* binary search of id in ids
* if found, returns position of id
* if not found, returns first position greater than id
*/
unsigned base = 0;
unsigned cursor = 1;
int val = 0;
unsigned n = ids[0];
while (0 < n) {
unsigned pivot = n >> 1;
cursor = base + pivot + 1;
val = CMP(ids[cursor], id);
if (val < 0) {
n = pivot;
} else if (val > 0) {
base = cursor;
n -= pivot + 1;
} else {
return cursor;
}
}
if (val > 0) {
++cursor;
}
return cursor;
}
int
mdb_midl_insert(MDB_IDL ids, MDB_ID id)
{
unsigned x, i;
x = mdb_midl_search(ids, id);
assert(x > 0);
if (x < 1) {
/* internal error */
return -2;
}
if (x <= ids[0] && ids[x] == id) {
/* duplicate */
assert(0);
return -1;
}
if (++ids[0] >= MDB_IDL_DB_MAX) {
/* no room */
--ids[0];
return -2;
} else {
/* insert id */
for (i = ids[0]; i > x; i--)
ids[i] = ids[i - 1];
ids[x] = id;
}
return 0;
}
inline void
mdb_midl_pop_n(MDB_IDL ids, unsigned n)
{
ids[0] = ids[0] - n;
}
void
mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
{
for (int i = idx - 1; idx < ids[0] - 1;)
ids[++i] = ids[++idx];
ids[0] = ids[0] - 1;
}
void
mdb_midl_remove(MDB_IDL ids, MDB_ID id)
{
unsigned idx = mdb_midl_search(ids, id);
if (idx <= ids[0] && ids[idx] == id)
mdb_midl_remove_at(ids, idx);
}
MDB_IDL
mdb_midl_alloc(int num)
{
MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID));
if (ids) {
*ids++ = num;
*ids = 0;
}
return ids;
}
void
mdb_midl_free(MDB_IDL ids)
{
if (ids)
free(ids - 1);
}
void
mdb_midl_shrink(MDB_IDL *idp)
{
MDB_IDL ids = *idp;
if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) {
*ids++ = MDB_IDL_UM_MAX;
*idp = ids;
}
}
void
mdb_midl_shrink_to(MDB_IDL *idp, size_t size)
{
MDB_IDL ids = *idp;
if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) {
*ids++ = size;
*idp = ids;
*idp[0] = *idp[0] > size ? size : *idp[0];
}
}
static int
mdb_midl_grow(MDB_IDL *idp, int num)
{
MDB_IDL idn = *idp - 1;
/* grow it */
idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
if (!idn)
return ENOMEM;
*idn++ += num;
*idp = idn;
return 0;
}
int
mdb_midl_need(MDB_IDL *idp, unsigned num)
{
MDB_IDL ids = *idp;
num += ids[0];
if (num > ids[-1]) {
num = (num + num / 4 + (256 + 2)) & -256;
if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID))))
return ENOMEM;
*ids++ = num - 2;
*idp = ids;
}
return 0;
}
int
mdb_midl_append(MDB_IDL *idp, MDB_ID id)
{
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] >= ids[-1]) {
if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
return ENOMEM;
ids = *idp;
}
ids[0]++;
ids[ids[0]] = id;
return 0;
}
int
mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app)
{
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] + app[0] >= ids[-1]) {
if (mdb_midl_grow(idp, app[0]))
return ENOMEM;
ids = *idp;
}
memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID));
ids[0] += app[0];
return 0;
}
int
mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n)
{
MDB_ID *ids = *idp, len = ids[0];
/* Too big? */
if (len + n > ids[-1]) {
if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
return ENOMEM;
ids = *idp;
}
ids[0] = len + n;
ids += len;
while (n)
ids[n--] = id++;
return 0;
}
void
mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge)
{
MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k;
idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */
old_id = idl[j];
while (i) {
merge_id = merge[i--];
for (; old_id < merge_id; old_id = idl[--j])
idl[k--] = old_id;
idl[k--] = merge_id;
}
idl[0] = total;
}
/* Quicksort + Insertion sort for small arrays */
#define SMALL 8
#define MIDL_SWAP(a, b) \
{ \
itmp = (a); \
(a) = (b); \
(b) = itmp; \
}
void
mdb_midl_sort(MDB_IDL ids)
{
/* Max possible depth of int-indexed tree * 2 items/level */
int istack[sizeof(int) * CHAR_BIT * 2];
int i, j, k, l, ir, jstack;
MDB_ID a, itmp;
ir = (int)ids[0];
l = 1;
jstack = 0;
for (;;) {
if (ir - l < SMALL) { /* Insertion sort */
for (j = l + 1; j <= ir; j++) {
a = ids[j];
for (i = j - 1; i >= 1; i--) {
if (ids[i] >= a)
break;
ids[i + 1] = ids[i];
}
ids[i + 1] = a;
}
if (jstack == 0)
break;
ir = istack[jstack--];
l = istack[jstack--];
} else {
k = (l + ir) >> 1; /* Choose median of left, center, right */
MIDL_SWAP(ids[k], ids[l + 1]);
if (ids[l] < ids[ir]) {
MIDL_SWAP(ids[l], ids[ir]);
}
if (ids[l + 1] < ids[ir]) {
MIDL_SWAP(ids[l + 1], ids[ir]);
}
if (ids[l] < ids[l + 1]) {
MIDL_SWAP(ids[l], ids[l + 1]);
}
i = l + 1;
j = ir;
a = ids[l + 1];
for (;;) {
do
i++;
while (ids[i] > a);
do
j--;
while (ids[j] < a);
if (j < i)
break;
MIDL_SWAP(ids[i], ids[j]);
}
ids[l + 1] = ids[j];
ids[j] = a;
jstack += 2;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
}
}
}
}
/* ------------------------------------------------------------------------- */
typedef MDB_ID pgno_t;
char *
bytes_as(double bytes, char *s, size_t size)
{
const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
size_t i = 0;
while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
bytes /= 1024;
i++;
}
snprintf(s, size, "%.2f %s", bytes, units[i]);
return s;
}
/**
* A "coin toss" function that is critical to the proper operation of the
* Skiplist. For example, when `max = 6` this function returns 0 with
* probability 0.5, 1 with 0.25, 2 with 0.125, etc. until 6 with 0.5^7.
*/
static int
toss(size_t max)
{
size_t level = 0;
double probability = 0.5;
double random_value = (double)xorshift32() / RAND_MAX;
while (random_value < probability && level < max) {
level++;
probability *= 0.5;
}
return level;
}
bool
verify_midl_contains(MDB_IDL list, pgno_t pg)
{
unsigned idx = mdb_midl_search(list, pg);
return idx <= list[0] && list[idx] == pg;
}
bool
verify_midl_nodups(MDB_IDL list)
{
pgno_t id = 1;
while (id < list[0]) {
if (list[id] == list[id + 1])
return false;
id++;
}
return true;
}
bool
verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len)
{
pgno_t idx = mdb_midl_search(list, pg);
bool found = idx <= list[0] && list[idx] == pg;
if (!found)
return false;
if (len == 1)
return true;
if (list[len] + 1 != list[len - 1])
return false;
return true;
}
bool
verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len)
{
for (pgno_t i = pg; i < pg + len; i++) {
pgno_t idx = mdb_midl_search(list, pg);
bool found = idx <= list[0] && list[idx] == pg;
if (found)
return false;
}
return true;
}
bool
verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
{
for (pgno_t i = pg; i < pg + len; i++) {
if (sparsemap_is_set(map, i) != true) {
return false;
}
}
return true;
}
bool
verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
{
for (pgno_t i = 0; i < len; i++) {
if (sparsemap_is_set(map, pg + i) != false) {
return false;
}
}
return true;
}
bool
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
{
for (MDB_ID i = 1; i <= list[0]; i++) {
pgno_t pg = list[i];
unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1;
if (skipped) {
for (MDB_ID j = list[i - 1]; j > list[i]; j--) {
if (sparsemap_is_set(map, pg - j) != false) {
__diag("%zu\n", pg - j);
return false;
}
}
}
if (sparsemap_is_set(map, pg) != true) {
__diag("%zu\n", pg);
return false;
}
}
return true;
}
sparsemap_idx_t
_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
{
do {
sparsemap_idx_t l = sparsemap_set(*map, idx, value);
if (l != idx) {
if (errno == ENOSPC) {
*map = sparsemap_set_data_size(*map, sparsemap_get_capacity(*map) + 64, NULL);
assert(*map != NULL);
errno = 0;
} else {
assert(false);
}
} else {
return l;
}
} while (true);
}
td_histogram_t *l_span_loc;
td_histogram_t *b_span_loc;
td_histogram_t *l_span_take;
td_histogram_t *b_span_take;
td_histogram_t *l_span_merge;
td_histogram_t *b_span_merge;
void
stats_header(void)
{
printf(
"timestamp,iterations,idl_cap,idl_used,idl_bytes,sm_cap,sm_used,idl_loc_p50,idl_loc_p75,idl_loc_p90,idl_loc_p99,idl_loc_p999,sm_loc_p50,sm_loc_p75,sm_loc_p90,sm_loc_p99,sm_loc_p999,idl_take_p50,idl_take_p75,idl_take_p90,idl_take_p99,idl_take_p999,sm_take_p50,sm_take_p75,sm_take_p90,sm_take_p99,sm_take_p999,idl_merge_p50,idl_merge_p75,idl_merge_p90,idl_merge_p99,idl_merge_p999,sm_merge_p50,sm_merge_p75,sm_merge_p90,sm_merge_p99,sm_merge_p999\n");
}
void
stats(size_t iterations, sparsemap_t *map, MDB_IDL list)
{
if (iterations < 10)
return;
td_compress(l_span_loc);
td_compress(b_span_loc);
td_compress(l_span_take);
td_compress(b_span_take);
td_compress(l_span_merge);
td_compress(b_span_merge);
printf(
"%f,%zu,%zu,%zu,%zu,%zu,%zu,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f\n",
nsts(), iterations, list[-1], list[0], MDB_IDL_SIZEOF(list), sparsemap_get_capacity(map), sparsemap_get_size(map), td_quantile(l_span_loc, .5),
td_quantile(l_span_loc, .75), td_quantile(l_span_loc, .90), td_quantile(l_span_loc, .99), td_quantile(l_span_loc, .999), td_quantile(b_span_loc, .5),
td_quantile(b_span_loc, .75), td_quantile(b_span_loc, .90), td_quantile(b_span_loc, .99), td_quantile(b_span_loc, .999), td_quantile(l_span_take, .5),
td_quantile(l_span_take, .75), td_quantile(l_span_take, .90), td_quantile(l_span_take, .99), td_quantile(l_span_take, .999), td_quantile(b_span_take, .5),
td_quantile(b_span_take, .75), td_quantile(b_span_take, .90), td_quantile(b_span_take, .99), td_quantile(b_span_take, .999), td_quantile(l_span_merge, .5),
td_quantile(l_span_merge, .75), td_quantile(l_span_merge, .90), td_quantile(l_span_merge, .99), td_quantile(l_span_merge, .999),
td_quantile(b_span_merge, .5), td_quantile(b_span_merge, .75), td_quantile(b_span_merge, .90), td_quantile(b_span_merge, .99),
td_quantile(b_span_merge, .999));
}
#define INITIAL_AMOUNT 1024 * 2
/*
* A "soak test" that tries to replicate behavior in LMDB for page allocation.
*/
int
main(void)
{
size_t replenish = 0, iterations = 0;
bool prefer_mdb_idl_location = (bool)xorshift32() % 2;
// disable buffering
#ifdef DEBUG
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
#endif
l_span_loc = td_new(100);
b_span_loc = td_new(100);
l_span_take = td_new(100);
b_span_take = td_new(100);
l_span_merge = td_new(100);
b_span_merge = td_new(100);
stats_header();
sparsemap_idx_t amt = INITIAL_AMOUNT;
MDB_IDL list = mdb_midl_alloc(amt);
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
// start with 2GiB of 4KiB free pages to track:
// - MDB_IDL requires one int for each free page
// - Sparsemap will compress the set bits using less memory
mdb_midl_need(&list, amt);
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
// We list every free (unallocated) page in the IDL, while...
mdb_midl_xappend(list, pg);
// ... true (unset in the bitmap) indicates free in the bitmap.
assert(_sparsemap_set(&map, pg, true) == pg);
}
mdb_midl_sort(list);
stats(0, map, list);
assert(verify_sm_eq_ml(map, list));
double b, e;
while (1) {
unsigned mi;
pgno_t ml, sl;
// get an amount [1, 16] of pages to find preferring smaller sizes
unsigned n = toss(15) + 1;
// find a set of pages using the MDB_IDL
{
b = nsts();
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
int retry = 1;
unsigned i = 0;
pgno_t pgno = 0, *mop = list;
unsigned n2 = n, mop_len = mop[0];
if (mop_len > n2) {
i = mop_len;
do {
pgno = mop[i];
if (mop[i - n2] == pgno + n2)
goto search_done;
} while (--i > n2);
if (--retry < 0)
break;
}
search_done:;
ml = pgno;
mi = i;
e = nsts();
td_add(l_span_loc, e - b, 1);
}
assert(verify_span_midl(list, ml, n));
assert(verify_span_sparsemap(map, ml, n));
// find a set of pages using the Sparsemap
{
b = nsts();
pgno_t pgno = sparsemap_span(map, 0, n, true);
assert(SPARSEMAP_NOT_FOUND(pgno) == false);
sl = pgno;
e = nsts();
td_add(b_span_loc, e - b, 1);
}
assert(verify_span_midl(list, sl, n));
assert(verify_span_sparsemap(map, sl, n));
// acquire the set of pages within the list
if (prefer_mdb_idl_location) {
b = nsts();
unsigned j, num = n;
int i = mi;
pgno_t *mop = list;
unsigned mop_len = mop[0];
mop[0] = mop_len -= num;
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
td_add(l_span_take, e - b, 1);
} else {
b = nsts();
unsigned j, num = n;
int i = mdb_midl_search(list, sl) + num;
pgno_t *mop = list;
unsigned mop_len = mop[0];
mop[0] = mop_len -= num;
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
td_add(l_span_take, e - b, 1);
}
// acquire the set of pages within the sparsemap
if (prefer_mdb_idl_location) {
b = nsts();
for (pgno_t i = ml; i < ml + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
e = nsts();
td_add(b_span_take, e - b, 1);
} else {
b = nsts();
for (pgno_t i = sl; i <= sl + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
e = nsts();
td_add(b_span_take, e - b, 1);
}
assert(verify_sm_eq_ml(map, list));
// Once we've used half of the free list, let's replenish it a bit.
if (list[0] < amt / 2) {
do {
pgno_t pg;
size_t len, retries = amt;
do {
len = toss(15) + 1;
pg = sparsemap_span(map, 0, len, false);
//__diag("%zu\t%zu,%zu\n", iterations, replenish, retries);
} while (SPARSEMAP_NOT_FOUND(pg) && --retries);
if (retries == 0) {
goto larger_please;
}
if (SPARSEMAP_FOUND(pg)) {
assert(verify_empty_midl(list, pg, len));
assert(verify_empty_sparsemap(map, pg, len));
assert(verify_sm_eq_ml(map, list));
if (list[-1] - list[0] < len) {
mdb_midl_need(&list, list[-1] + len);
}
for (size_t i = pg; i < pg + len; i++) {
assert(verify_midl_contains(list, i) == false);
assert(sparsemap_is_set(map, i) == false);
mdb_midl_insert(list, i);
assert(verify_midl_contains(list, i) == true);
assert(_sparsemap_set(&map, i, true) == i);
assert(sparsemap_is_set(map, i) == true);
}
mdb_midl_sort(list);
assert(verify_midl_nodups(list));
assert(verify_span_midl(list, pg, len));
assert(verify_span_sparsemap(map, pg, len));
}
assert(verify_sm_eq_ml(map, list));
replenish++;
} while (list[0] < amt - 32);
}
replenish = 0;
// every so often, either ...
if (iterations % 1000 == 0) {
larger_please:;
size_t COUNT = xorshift32() % 1024 + 513;
// ... add some amount of 4KiB pages, or
size_t len = COUNT;
// The largest page is at list[1] because this is a reverse sorted list.
pgno_t pg = list[0] ? list[1] + 1 : 0;
// if (toss(6) + 1 < 7) {
if (true) { // disable shrinking for now...
MDB_IDL new_list = mdb_midl_alloc(len);
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
new_list[i + 1] = gp;
new_list[0]++;
assert(verify_midl_contains(new_list, gp) == true);
assert(_sparsemap_set(&new_map, gp, true) == gp);
assert(sparsemap_is_set(new_map, gp));
}
assert(verify_sm_eq_ml(new_map, new_list));
{
b = nsts();
mdb_midl_append_list(&list, new_list);
mdb_midl_sort(list);
e = nsts();
td_add(l_span_merge, e - b, 1);
}
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(verify_midl_contains(list, gp) == true);
}
{
b = nsts();
sparsemap_merge(map, new_map);
e = nsts();
td_add(b_span_merge, e - b, 1);
}
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(sparsemap_is_set(map, gp));
}
free(new_map);
} else {
if (list[-1] > INITIAL_AMOUNT) {
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
pgno_t pg;
for (size_t i = 0; i < COUNT; i++) {
pg = list[list[0] - i];
assert(sparsemap_is_set(map, pg) == true);
assert(_sparsemap_set(&map, pg, false) == pg);
}
mdb_midl_shrink_to(&list, list[0] - COUNT);
assert(list[list[0]] != pg);
assert(verify_midl_nodups(list));
verify_sm_eq_ml(map, list);
}
}
}
iterations++;
stats(iterations, map, list);
}
return 0;
}

View file

@ -5,7 +5,6 @@
# nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/23.11";
utils.url = "github:numtide/flake-utils";
utils.inputs.nixpkgs.follows = "nixpkgs";
};
outputs = { self, nixpkgs, ... }
@ -18,38 +17,41 @@
config.allowUnfree = true;
};
in {
devShell = pkgs.mkShell rec {
name = "sparsemap";
packages = with pkgs; [
act
autoconf
clang
ed
gcc
gdb
gettext
graphviz-nox
libtool
m4
perl
pkg-config
python3
ripgrep
valgrind
];
flake-utils.inputs.systems.follows = "system";
devShell = pkgs.mkShell rec {
name = "sparsemap";
packages = with pkgs; [
act
autoconf
clang
cmake
ed
gcc
gdb
gettext
graphviz-nox
libtool
m4
ninja
perl
pkg-config
python3
ripgrep
valgrind
];
buildInputs = with pkgs; [
libbacktrace
glibc.out
glibc.static
];
buildInputs = with pkgs; [
libbacktrace
glibc.out
glibc.static
];
shellHook = let
icon = "f121";
in ''
shellHook = let
icon = "f121";
in ''
export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
'';
};
DOCKER_BUILDKIT = 1;
});
};
DOCKER_BUILDKIT = 1;
});
}

View file

@ -1,4 +1,6 @@
#include "../include/sparsemap.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvariadic-macros"
#define __diag(...) \
@ -54,3 +56,5 @@ void sm_whats_set(sparsemap_t *map, int off, int len);
bool sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
bool sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
char *bytes_as(double bytes, char *s, size_t size);

2908
include/roaring.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -100,7 +100,7 @@ typedef uint64_t sm_bitvec_t;
*
* The buffer used for the bitmap is allocated in the same heap allocation as
* the structure, this means that you only need to call free() on the returned
* object to free all resources. Using this method it is allowable to grow the
* object to free all resources. Using this method allows you to grow the
* buffer size by calling #sparsemap_set_data_size(). This function calls
* #sparsemap_init().
*
@ -110,16 +110,21 @@ typedef uint64_t sm_bitvec_t;
*/
sparsemap_t *sparsemap(size_t size);
/** @brief Allocate a new, copy of the \b other sparsemap_t.
*
* @param[in] other The sparsemap to copy.
*/
sparsemap_t *sparsemap_copy(sparsemap_t *other);
/** @brief Allocate a new, empty sparsemap_t that references (wraps) the buffer
* \b data of \b size bytes to use for storage of bitmap data.
*
* This function allocates a new sparsemap_t but not the buffer which is
* provided by the caller as \b data which can be allocated on the stack or
* heap. Caller is responsible for calling free() on the returned heap object
* and releasing the memory used for \b data. Resizing the buffer is not
* directly supported, you may attempt to resize by calling
* #sparsemap_set_data_size() with the potentially relocated address of \b data.
* This function calls #sparsemap_init().
* and releasing the memory used for \b data. Resizing the buffer is only
* supported when the heap object for the map includes the buffer and the
* \b data offset supplied is relative to the object (see #sparsemap()).
*
* @param[in] data A heap or stack memory buffer of \b size for use storing
* bitmap data.
@ -132,7 +137,7 @@ sparsemap_t *sparsemap_wrap(uint8_t *data, size_t size);
* bytes for storage of bitmap data.
*
* Given the address of an existing \b map allocated on the stack or heap this
* function will initialize the datastructure and use the provided \b data of
* function will initialize the data structure and use the provided \b data of
* \b size for bitmap data. Caller is responsible for all memory management.
* Resizing the buffer is not directly supported, you
* may resize it and call #sparsemap_set_data_size() and then ensure that should
@ -150,7 +155,7 @@ void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size);
* the specified buffer.
*
* Given the address of an existing \b map this function will assign to the
* provided datastructure \b data of \b size for bitmap data. Caller is
* provided data structure \b data of \b size for bitmap data. Caller is
* responsible for all memory management. Use this when as a way to
* "deserialize" bytes and make them ready for use as a bitmap.
*
@ -162,7 +167,7 @@ void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size);
void sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size);
/** @brief Resets values and empties the buffer making it ready to accept new
* data.
* data but does not free the memory.
*
* @param[in] map The sparsemap reference.
*/
@ -180,7 +185,7 @@ void sparsemap_clear(sparsemap_t *map);
* caveats apply here as well.
*
* When called referencing a \b map that was allocate by the caller this
* function will only update the values within the datastructure.
* function will only update the values within the data structure.
*
* @param[in] map The sparsemap reference.
* @param[in] size The desired size of the buffer \b data used for the bitmap.
@ -189,7 +194,7 @@ void sparsemap_clear(sparsemap_t *map);
* @note The resizing of caller supplied allocated objects is not yet fully
* supported.
*/
sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, size_t size, uint8_t *data);
sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, uint8_t *data, size_t size);
/** @brief Calculate remaining capacity, approaches 0 when full.
*
@ -246,38 +251,82 @@ sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
*/
size_t sparsemap_get_size(sparsemap_t *map);
/** @brief Returns a pointer to the data buffer used for the map.
*
* @param[in] map The sparsemap reference.
* @returns a pointer to the data buffer used for the map
*/
void *sparsemap_get_data(sparsemap_t *map);
/** @brief Returns the number of elements in the map.
*
* @param[in] map The sparsemap reference.
* @returns the number of elements in the map
*/
size_t sparsemap_count(sparsemap_t *map);
/** @brief Returns the offset of the first bit set in the map.
*
* This is the same as the value of the first set bit in the
* map.
*
* @param[in] map The sparsemap reference.
* @returns the offset of the first bit set in the map
*/
sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map);
/** @brief Returns the offset of the last bit set in the map.
*
* This is the same as the value of the last bit set in the
* map.
*
* @param[in] map The sparsemap reference.
* @returns the offset of the index bit set in the map
*/
sparsemap_idx_t sparsemap_get_ending_offset(sparsemap_t *map);
/** @brief Returns the percent of bits set in the map.
*
* @param[in] map The sparsemap reference.
* @returns the percent of bits set.
*/
double sparsemap_fill_factor(sparsemap_t *map);
/** @brief Provides a method for a callback function to examine every bit set in
* the index.
*
* This decompresses the whole bitmap and invokes #scanner() passing a 64bit
* "vector" of bits in order from 0 index to the end of the map. Using standard
* bit masking techniques it is possible to read each bit from LSB to MSB in
* these vectors to read the entire content of the bitmap index (see
* examples/ex_4.c).
* This decompresses the whole bitmap and invokes #scanner() passing an array
* of the positions of set bits in order from 0 index to the end of the map.
*
* @param[in] map The sparsemap reference.
* @param[in] skip Start the scan after "skip" bits.
* @param[in] skip Start the scan after \b skip position in the map.
* @param[in] aux Auxiliary information passed to the scanner.
*/
void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n), size_t skip);
void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux);
/** @brief Merges the values from \b other into the \b map, \b other is unchanged.
* \b other bitmap while removing them from \b map.
/** @brief Merges the values from \b source into \b destination, \b source is unchanged.
*
* @param[in] map The sparsemap reference.
* @param[in] other The bitmap to merge into \b map.
* Efficiently adds all set bits from \b source into \b destination.
*
* @param[in] destination The sparsemap reference into which we will merge \b source.
* @param[in] source The bitmap to merge into \b destination.
* @returns 0 on success, or sets errno to ENOSPC and returns the amount of
* additional space required to successfully merge the maps.
*/
void sparsemap_merge(sparsemap_t *map, sparsemap_t *other);
int sparsemap_merge(sparsemap_t *destination, sparsemap_t *source);
/** @brief Splits the bitmap by assigning all bits starting at \b offset to the
* \b other bitmap while removing them from \b map.
*
* The split must occur on a vector boundary.
* The \b other bitmap is expected to be empty.
*
* @param[in] map The sparsemap reference.
* @param[in] offset The 0-based offset into the bitmap at which to split.
* @param[in] offset The 0-based offset into the bitmap at which to split, if
* set to SPARSEMAP_IDX_MAX then the bits will be evenly split.
* @param[in] other The bitmap into which we place the split.
* @returns the offset at which the map was split
*/
void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other);
sparsemap_idx_t sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other);
/** @brief Finds the index of the \b n'th bit set to \b value.
*
@ -291,7 +340,7 @@ void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *othe
* 3 when 0-based).
*
* @param[in] map The sparsemap reference.
* @param[in] n Specifies how many bits to ignore (when n=3 return the position
* @param[in] n Specifies how many bits to ignore (when n=2 return the position
* of the third matching bit).
* @param[in] value Determines if the search is to examine set (true) or unset
* (false) bits in the bitmap index.
@ -321,14 +370,14 @@ size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value);
* matching \b value in the bitmap.
*
* @param[in] map The sparsemap reference.
* @param[in] idx 0-based start of search within the bitmap.
* @param[in] start 0-based start of search within the bitmap.
* @param[in] len The length of contiguous bits we're seeking.
* @param[in] value Determines if the scan is to find all set (true) or unset
* (false) bits of \b len.
* @returns the index of the first bit matching the criteria; when not found
* found SPARSEMAP_IDX_MAX
*/
size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value);
size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t start, size_t len, bool value);
#if defined(__cplusplus)
}

View file

@ -18,8 +18,8 @@
#endif
#endif
#include "../include/common.h"
#include "../include/sparsemap.h"
#include "common.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvariadic-macros"
@ -51,6 +51,29 @@ tsc(void)
return 0;
}
// get microsecond timestamp
uint64_t
msts()
{
#ifdef _SC_MONOTONIC_CLOCK
struct timespec ts;
if (sysconf(_SC_MONOTONIC_CLOCK) > 0) {
/* A monotonic clock presents */
if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
return (uint64_t)(ts.tv_sec * 1000000 + ts.tv_nsec / 1000);
else
return 0;
}
return 0;
#else
struct timeval tv;
if (gettimeofday(&tv, NULL) == 0)
return (uint64_t)(tv.tv_sec * 1000000 + tv.tv_usec);
else
return 0;
#endif
}
double
nsts(void)
{
@ -87,7 +110,7 @@ xorshift32_seed(void)
}
void
shuffle(int *array, size_t n) // TODO working?
shuffle(int *array, size_t n)
{
for (size_t i = n - 1; i > 0; --i) {
size_t j = xorshift32() % (i + 1);
@ -277,17 +300,6 @@ is_set(const int array[], int bit)
return false;
}
int
is_unique(int a[], int l, int value)
{
for (int i = 0; i < l; ++i) {
if (a[i] == value) {
return 0; // Not unique
}
}
return 1; // Unique
}
int
whats_set_uint64(uint64_t number, int pos[64])
{
@ -302,19 +314,31 @@ whats_set_uint64(uint64_t number, int pos[64])
return length;
}
/** @brief Fills an array with unique random values between 0 and max_value.
*
* @param[in] a The array to fill.
* @param[in] l The length of the array to fill.
* @param[in] max_value The maximum value for the random numbers.
*/
void
setup_test_array(int a[], int l, int max_value)
{
if (a == NULL || max_value < 0) {
return; // Basic error handling and validation
// Create a set to store the unique values.
int unique_values[max_value + 1];
for (int i = 0; i <= max_value; ++i) {
unique_values[i] = 0;
}
for (int i = 0; i < l; ++i) {
int candidate;
do {
candidate = random_uint32() % (max_value + 1); // Generate a new value within the specified range
} while (!is_unique(a, i, candidate)); // Repeat until a unique value is found
a[i] = candidate; // Assign the unique value to the array
// Keep generating random numbers until we have l unique values.
int count = 0;
while (count < l) {
int random_number = random_uint32() % (max_value + 1);
if (unique_values[random_number] == 0) {
unique_values[random_number] = 1;
a[count] = random_number;
count++;
}
}
}
@ -432,3 +456,18 @@ sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value)
}
return false;
}
char *
bytes_as(double bytes, char *s, size_t size)
{
const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
size_t i = 0;
while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
bytes /= 1024;
i++;
}
snprintf(s, size, "%.2f %s", bytes, units[i]);
return s;
}

25883
lib/roaring.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

417
tests/midl.c Normal file
View file

@ -0,0 +1,417 @@
/** @defgroup idls ID List Management
* @{
*/
/** A generic unsigned ID number. These were entryIDs in back-bdb.
* Preferably it should have the same size as a pointer.
*/
typedef size_t MDB_ID;
/** An IDL is an ID List, a sorted array of IDs. The first
* element of the array is a counter for how many actual
* IDs are in the list. In the original back-bdb code, IDLs are
* sorted in ascending order. For libmdb IDLs are sorted in
* descending order.
*/
typedef MDB_ID *MDB_IDL;
/* IDL sizes - likely should be even bigger
* limiting factors: sizeof(ID), thread stack size
*/
#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN)
#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1))
#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1)
#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1)
#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID))
#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0)
#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src)))
#define MDB_IDL_FIRST(ids) ((ids)[1])
#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]])
/** Current max length of an #mdb_midl_alloc()ed IDL */
#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1])
/** Append ID to IDL. The IDL must be big enough. */
#define mdb_midl_xappend(idl, id) \
do { \
MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
xidl[xlen] = (id); \
} while (0)
/** Search for an ID in an IDL.
* @param[in] ids The IDL to search.
* @param[in] id The ID to search for.
* @return The index of the first ID greater than or equal to \b id.
*/
unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id);
/** Allocate an IDL.
* Allocates memory for an IDL of the given size.
* @return IDL on success, NULL on failure.
*/
MDB_IDL mdb_midl_alloc(int num);
/** Free an IDL.
* @param[in] ids The IDL to free.
*/
void mdb_midl_free(MDB_IDL ids);
/** Shrink an IDL.
* Return the IDL to the default size if it has grown larger.
* @param[in,out] idp Address of the IDL to shrink.
*/
void mdb_midl_shrink(MDB_IDL *idp);
/** Shrink an IDL to a specific size.
* Resize the IDL to \b size if it is larger.
* @param[in,out] idp Address of the IDL to shrink.
* @param[in] size Capacity to have once resized.
*/
void mdb_midl_shrink(MDB_IDL *idp);
/** Make room for num additional elements in an IDL.
* @param[in,out] idp Address of the IDL.
* @param[in] num Number of elements to make room for.
* @return 0 on success, ENOMEM on failure.
*/
int mdb_midl_need(MDB_IDL *idp, unsigned num);
/** Append an ID onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The ID to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append(MDB_IDL *idp, MDB_ID id);
/** Append an IDL onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] app The IDL to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app);
/** Append an ID range onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The lowest ID to append.
* @param[in] n Number of IDs to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n);
/** Merge an IDL onto an IDL. The destination IDL must be big enough.
* @param[in] idl The IDL to merge into.
* @param[in] merge The IDL to merge.
*/
void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge);
/** Sort an IDL.
* @param[in,out] ids The IDL to sort.
*/
void mdb_midl_sort(MDB_IDL ids);
/* midl.c ------------------------------------------------------------------ */
/** @defgroup idls ID List Management
* @{
*/
#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y))
unsigned
mdb_midl_search(MDB_IDL ids, MDB_ID id)
{
/*
* binary search of id in ids
* if found, returns position of id
* if not found, returns first position greater than id
*/
unsigned base = 0;
unsigned cursor = 1;
int val = 0;
unsigned n = ids[0];
while (0 < n) {
unsigned pivot = n >> 1;
cursor = base + pivot + 1;
val = CMP(ids[cursor], id);
if (val < 0) {
n = pivot;
} else if (val > 0) {
base = cursor;
n -= pivot + 1;
} else {
return cursor;
}
}
if (val > 0) {
++cursor;
}
return cursor;
}
int
mdb_midl_insert(MDB_IDL ids, MDB_ID id)
{
unsigned x, i;
x = mdb_midl_search(ids, id);
assert(x > 0);
if (x < 1) {
/* internal error */
return -2;
}
if (x <= ids[0] && ids[x] == id) {
/* duplicate */
assert(0);
return -1;
}
if (++ids[0] >= MDB_IDL_DB_MAX) {
/* no room */
--ids[0];
return -2;
} else {
/* insert id */
for (i = ids[0]; i > x; i--)
ids[i] = ids[i - 1];
ids[x] = id;
}
return 0;
}
inline void
mdb_midl_pop_n(MDB_IDL ids, unsigned n)
{
ids[0] = ids[0] - n;
}
void
mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
{
for (int i = idx - 1; idx < ids[0] - 1;)
ids[++i] = ids[++idx];
ids[0] = ids[0] - 1;
}
void
mdb_midl_remove(MDB_IDL ids, MDB_ID id)
{
unsigned idx = mdb_midl_search(ids, id);
if (idx <= ids[0] && ids[idx] == id)
mdb_midl_remove_at(ids, idx);
}
MDB_IDL
mdb_midl_alloc(int num)
{
MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID));
if (ids) {
*ids++ = num;
*ids = 0;
}
return ids;
}
void
mdb_midl_free(MDB_IDL ids)
{
if (ids)
free(ids - 1);
}
void
mdb_midl_shrink(MDB_IDL *idp)
{
MDB_IDL ids = *idp;
if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) {
*ids++ = MDB_IDL_UM_MAX;
*idp = ids;
}
}
void
mdb_midl_shrink_to(MDB_IDL *idp, size_t size)
{
MDB_IDL ids = *idp;
if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) {
*ids++ = size;
*idp = ids;
*idp[0] = *idp[0] > size ? size : *idp[0];
}
}
static int
mdb_midl_grow(MDB_IDL *idp, int num)
{
MDB_IDL idn = *idp - 1;
/* grow it */
idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
if (!idn)
return ENOMEM;
*idn++ += num;
*idp = idn;
return 0;
}
int
mdb_midl_need(MDB_IDL *idp, unsigned num)
{
MDB_IDL ids = *idp;
num += ids[0];
if (num > ids[-1]) {
num = (num + num / 4 + (256 + 2)) & -256;
if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID))))
return ENOMEM;
*ids++ = num - 2;
*idp = ids;
}
return 0;
}
int
mdb_midl_append(MDB_IDL *idp, MDB_ID id)
{
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] >= ids[-1]) {
if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
return ENOMEM;
ids = *idp;
}
ids[0]++;
ids[ids[0]] = id;
return 0;
}
int
mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app)
{
MDB_IDL ids = *idp;
/* Too big? */
if (ids[0] + app[0] >= ids[-1]) {
if (mdb_midl_grow(idp, app[0]))
return ENOMEM;
ids = *idp;
}
memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID));
ids[0] += app[0];
return 0;
}
int
mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n)
{
MDB_ID *ids = *idp, len = ids[0];
/* Too big? */
if (len + n > ids[-1]) {
if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
return ENOMEM;
ids = *idp;
}
ids[0] = len + n;
ids += len;
while (n)
ids[n--] = id++;
return 0;
}
void
mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge)
{
MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k;
idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */
old_id = idl[j];
while (i) {
merge_id = merge[i--];
for (; old_id < merge_id; old_id = idl[--j])
idl[k--] = old_id;
idl[k--] = merge_id;
}
idl[0] = total;
}
/* Quicksort + Insertion sort for small arrays */
#define SMALL 8
#define MIDL_SWAP(a, b) \
{ \
itmp = (a); \
(a) = (b); \
(b) = itmp; \
}
void
mdb_midl_sort(MDB_IDL ids)
{
/* Max possible depth of int-indexed tree * 2 items/level */
int istack[sizeof(int) * CHAR_BIT * 2];
int i, j, k, l, ir, jstack;
MDB_ID a, itmp;
ir = (int)ids[0];
l = 1;
jstack = 0;
for (;;) {
if (ir - l < SMALL) { /* Insertion sort */
for (j = l + 1; j <= ir; j++) {
a = ids[j];
for (i = j - 1; i >= 1; i--) {
if (ids[i] >= a)
break;
ids[i + 1] = ids[i];
}
ids[i + 1] = a;
}
if (jstack == 0)
break;
ir = istack[jstack--];
l = istack[jstack--];
} else {
k = (l + ir) >> 1; /* Choose median of left, center, right */
MIDL_SWAP(ids[k], ids[l + 1]);
if (ids[l] < ids[ir]) {
MIDL_SWAP(ids[l], ids[ir]);
}
if (ids[l + 1] < ids[ir]) {
MIDL_SWAP(ids[l + 1], ids[ir]);
}
if (ids[l] < ids[l + 1]) {
MIDL_SWAP(ids[l], ids[l + 1]);
}
i = l + 1;
j = ir;
a = ids[l + 1];
for (;;) {
do
i++;
while (ids[i] > a);
do
j--;
while (ids[j] < a);
if (j < i)
break;
MIDL_SWAP(ids[i], ids[j]);
}
ids[l + 1] = ids[j];
ids[j] = a;
jstack += 2;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
}
}
}
}

1485
tests/soak.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -44,15 +44,17 @@ void
populate_map(sparsemap_t *map, int size, int max_value)
{
int array[size];
size_t before;
setup_test_array(array, size, max_value);
// TODO ensure_sequential_set(array, size, 10);
shuffle(array, size);
before = sparsemap_count(map);
for (int i = 0; i < size; i++) {
sparsemap_set(map, array[i], true);
bool set = sparsemap_is_set(map, array[i]);
munit_assert_true(set);
assert_true(set);
}
assert_true(sparsemap_count(map) == before + size);
}
static void *
@ -103,7 +105,7 @@ test_api_new_realloc(const MunitParameter params[], void *data)
assert_true(map->m_capacity == 1024);
assert_true(map->m_data_used == sizeof(uint32_t));
map = sparsemap_set_data_size(map, 2048, NULL);
map = sparsemap_set_data_size(map, NULL, 2048);
assert_true(map->m_capacity == 2048);
assert_true(map->m_data_used == sizeof(uint32_t));
@ -276,7 +278,7 @@ test_api_set_data_size(const MunitParameter params[], void *data)
assert_ptr_not_null(map);
assert_true(map->m_capacity == 1024);
assert_true(map->m_capacity == sparsemap_get_capacity(map));
sparsemap_set_data_size(map, 512, NULL);
sparsemap_set_data_size(map, NULL, 512);
assert_true(map->m_capacity == 512);
assert_true(map->m_capacity == sparsemap_get_capacity(map));
return MUNIT_OK;
@ -445,45 +447,6 @@ test_api_set(const MunitParameter params[], void *data)
return MUNIT_OK;
}
// TODO remove? not public API anymore...
extern sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map);
static void *
test_api_get_starting_offset_setup(const MunitParameter params[], void *user_data)
{
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
populate_map(map, 1024, 3 * 1024);
return (void *)map;
}
static void
test_api_get_starting_offset_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map->m_data);
munit_free(map->m_data);
test_api_tear_down(fixture);
}
static MunitResult
test_api_get_starting_offset(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
assert_ptr_not_null(map);
sparsemap_set(map, 42, true);
assert_true(sparsemap_is_set(map, 42));
size_t offset = sparsemap_get_starting_offset(map);
assert_true(offset == 0);
return MUNIT_OK;
}
static void *
test_api_get_size_setup(const MunitParameter params[], void *user_data)
{
@ -519,18 +482,219 @@ test_api_get_size(const MunitParameter params[], void *data)
}
static void *
test_api_scan_setup(const MunitParameter params[], void *user_data)
test_api_count_setup(const MunitParameter params[], void *user_data)
{
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee);
populate_map(map, 1024, 3 * 1024);
return (void *)map;
}
static void
test_api_count_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map->m_data);
munit_free(map->m_data);
test_api_tear_down(fixture);
}
static MunitResult
test_api_count(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
assert_ptr_not_null(map);
assert_true(sparsemap_count(map) == 1024);
sparsemap_clear(map);
sparsemap_set(map, 0, true);
assert_true(sparsemap_count(map) == 1);
sparsemap_set(map, 8675309, true);
assert_true(sparsemap_count(map) == 2);
sparsemap_clear(map);
for (int i = 0; i < 512; i++) {
sparsemap_set(map, i + 13, true);
}
assert_true(sparsemap_count(map) == 512);
sparsemap_clear(map);
assert_true(sparsemap_count(map) == 0);
return MUNIT_OK;
}
static MunitResult
test_api_get_data(const MunitParameter params[], void *data)
{
(void)data;
(void)params;
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)sparsemap_wrap(buf, 1024);
assert_ptr_not_null(map);
populate_map(map, 1024, 3 * 1024);
assert_true(sparsemap_get_data(map) == buf);
munit_free(buf);
munit_free(map);
return MUNIT_OK;
}
static void *
test_api_get_starting_offset_setup(const MunitParameter params[], void *user_data)
{
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
return (void *)map;
}
static void
test_api_get_starting_offset_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map->m_data);
munit_free(map->m_data);
test_api_tear_down(fixture);
}
static MunitResult
test_api_get_starting_offset(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
assert_ptr_not_null(map);
sparsemap_set(map, 0, true);
assert_true(sparsemap_get_starting_offset(map) == 0);
sparsemap_clear(map);
sparsemap_set(map, 1, true);
assert_true(sparsemap_get_starting_offset(map) == 1);
sparsemap_clear(map);
sparsemap_set(map, 1025, true);
assert_true(sparsemap_get_starting_offset(map) == 1025);
sparsemap_clear(map);
for (int i = 0; i < 1000; i++) {
sparsemap_set(map, i, true);
}
assert_true(sparsemap_get_starting_offset(map) == 0);
sparsemap_clear(map);
for (int i = 0; i < 1000; i++) {
sparsemap_set(map, i + 1024, true);
}
assert_true(sparsemap_get_starting_offset(map) == 1024);
sparsemap_clear(map);
sparsemap_set(map, 13012, true);
assert_true(sparsemap_get_starting_offset(map) == 13012);
return MUNIT_OK;
}
static void *
test_api_get_ending_offset_setup(const MunitParameter params[], void *user_data)
{
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
return (void *)map;
}
static void
test_api_get_ending_offset_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map->m_data);
munit_free(map->m_data);
test_api_tear_down(fixture);
}
static MunitResult
test_api_get_ending_offset(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
assert_ptr_not_null(map);
sparsemap_set(map, 0, true);
assert_true(sparsemap_get_ending_offset(map) == 0);
sparsemap_clear(map);
sparsemap_set(map, 0, true);
sparsemap_set(map, 1, true);
assert_true(sparsemap_get_ending_offset(map) == 1);
sparsemap_clear(map);
sparsemap_set(map, 0, true);
sparsemap_set(map, 67, true);
sparsemap_set(map, 1002, true);
sparsemap_set(map, 3087, true);
sparsemap_set(map, 13012, true);
assert_true(sparsemap_get_ending_offset(map) == 13012);
return MUNIT_OK;
}
static void *
test_api_get_starting_offset_rolling_setup(const MunitParameter params[], void *user_data)
{
(void)params;
(void)user_data;
sparsemap_t *map = sparsemap(10 * 1024);
assert_ptr_not_null(map);
return (void *)map;
}
static void
test_api_get_starting_offset_rolling_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map);
munit_free(map);
}
static MunitResult
test_api_get_starting_offset_rolling(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
for (sparsemap_idx_t i = 0; i < 10 * 2048; i++) {
sparsemap_set(map, i, true);
if (i > 2047) {
sparsemap_set(map, i - 2048, false);
assert_true(sparsemap_get_starting_offset(map) == i - 2047);
// printf("%d\t%d\t%zu\n", i, i - 2047, sparsemap_get_starting_offset(map));
}
}
return MUNIT_OK;
}
static void *
test_api_scan_setup(const MunitParameter params[], void *user_data)
{
uint8_t *buf = munit_calloc(1024, sizeof(uint8_t));
assert_ptr_not_null(buf);
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
return (void *)map;
}
static void
test_api_scan_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
@ -539,11 +703,15 @@ test_api_scan_tear_down(void *fixture)
test_api_tear_down(fixture);
}
void
scan_for_0xfeedfacebadcoffee(sm_idx_t v[], size_t n)
scan_for_0xfeedfacebadcoffee(sm_idx_t v[], size_t n, void *aux)
{
/* Called multiple times */
((void)v);
((void)n);
size_t bit_pos[] = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 22, 23, 24, 26, 27, 29, 31, 32, 33, 34, 35, 38, 39, 41, 43, 44, 45, 46, 47, 48, 50, 51,
53, 54, 55, 57, 58, 59, 60, 61, 62, 63 };
(void)aux;
for (size_t i = 0; i < n; i++) {
assert(v[i] == bit_pos[i]);
}
}
static MunitResult
test_api_scan(const MunitParameter params[], void *data)
@ -552,10 +720,8 @@ test_api_scan(const MunitParameter params[], void *data)
(void)params;
assert_ptr_not_null(map);
sparsemap_set(map, 4200, true);
assert_true(sparsemap_is_set(map, 4200));
sparsemap_scan(map, scan_for_0xfeedfacebadcoffee, 0);
sm_bitmap_from_uint64(map, 0, ((uint64_t)0xfeedface << 32) | 0xbadc0ffee);
sparsemap_scan(map, scan_for_0xfeedfacebadcoffee, 0, NULL);
return MUNIT_OK;
}
@ -568,9 +734,6 @@ test_api_split_setup(const MunitParameter params[], void *user_data)
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024);
for (int i = 0; i < 1024; i++) {
sparsemap_set(map, i, true);
}
return (void *)map;
}
static void
@ -591,17 +754,73 @@ test_api_split(const MunitParameter params[], void *data)
assert_ptr_not_null(map);
sparsemap_init(&portion, buf, 512);
sparsemap_split(map, 512, &portion);
for (int i = 0; i < 512; i++) {
sparsemap_init(&portion, buf, 1024);
for (sparsemap_idx_t off = 0; off < 1024; off++) {
for (sparsemap_idx_t seg = 0; seg < 10 * 1024; seg += 1024) {
for (sparsemap_idx_t i = 0; i < 1024; i++) {
assert_true(sparsemap_set(map, i + seg, true) == i + seg);
}
for (sparsemap_idx_t i = 0; i < 1024; i++) {
assert_true(sparsemap_is_set(map, i + seg));
assert_false(sparsemap_is_set(&portion, i + seg));
}
sparsemap_split(map, seg + off, &portion);
for (sparsemap_idx_t i = seg; i < seg + 1024; i++) {
if (i < seg + off) {
assert_true(sparsemap_is_set(map, i));
assert_false(sparsemap_is_set(&portion, i));
} else {
assert_false(sparsemap_is_set(map, i));
assert_true(sparsemap_is_set(&portion, i));
}
}
sparsemap_clear(map);
sparsemap_clear(&portion);
}
}
for (sparsemap_idx_t i = 0; i < 100; i++) {
assert_true(sparsemap_set(map, i, true) == i);
}
for (sparsemap_idx_t i = 0; i < 100; i++) {
assert_true(sparsemap_is_set(map, i));
assert_false(sparsemap_is_set(&portion, i));
}
for (int i = 513; i < 1024; i++) {
sparsemap_idx_t offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion);
for (size_t i = 0; i < offset; i++) {
assert_true(sparsemap_is_set(map, i));
assert_false(sparsemap_is_set(&portion, i));
}
for (int i = offset; i < 100; i++) {
assert_false(sparsemap_is_set(map, i));
assert_true(sparsemap_is_set(&portion, i));
}
sparsemap_clear(&portion);
sparsemap_clear(map);
sparsemap_init(&portion, buf, 1024);
for (sparsemap_idx_t i = 0; i < 13; i++) {
assert_true(sparsemap_set(map, i + 24, true) == i + 24);
}
offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion);
for (sparsemap_idx_t i = 0; i < offset - 24; i++) {
assert_true(sparsemap_is_set(map, i + 24));
assert_false(sparsemap_is_set(&portion, i + 24));
}
for (sparsemap_idx_t i = offset - 24; i < 13; i++) {
assert_false(sparsemap_is_set(map, i + 24));
assert_true(sparsemap_is_set(&portion, i + 24));
}
return MUNIT_OK;
}
@ -637,10 +856,8 @@ test_api_merge(const MunitParameter params[], void *data)
// Merge a single set bit in the first chunk into the empty map.
sparsemap_set(other, 0, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(other, 0));
assert_true(sparsemap_is_set(map, 0));
sparsemap_clear(map);
sparsemap_clear(other);
@ -648,24 +865,20 @@ test_api_merge(const MunitParameter params[], void *data)
sparsemap_set(map, 0, true);
sparsemap_set(other, 0, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 0));
sparsemap_clear(map);
sparsemap_clear(other);
// Merge an empty map with one that has the first bit set.
sparsemap_set(map, 0, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 0));
sparsemap_clear(map);
sparsemap_clear(other);
sparsemap_set(other, 2049, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 2049));
sparsemap_clear(map);
sparsemap_clear(other);
@ -673,34 +886,30 @@ test_api_merge(const MunitParameter params[], void *data)
sparsemap_set(other, 2049, true);
sparsemap_set(map, 2050, true);
sparsemap_set(other, 4097, true);
sparsemap_set(map, 6113, true);
sparsemap_set(other, 8193, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 1));
assert_true(sparsemap_is_set(map, 2049));
assert_true(sparsemap_is_set(map, 2050));
assert_true(sparsemap_is_set(map, 4097));
assert_true(sparsemap_is_set(map, 6113));
assert_true(sparsemap_is_set(map, 8193));
for (int i = 0; i < 10000; i++) {
if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 8193)
if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 6113 || i == 8193)
continue;
else
assert_false(sparsemap_is_set(map, i));
}
sparsemap_clear(map);
sparsemap_clear(other);
sparsemap_set(map, 0, true);
sparsemap_set(map, 2048, true);
sparsemap_set(map, 2049, true);
sparsemap_set(map, 8193, true);
for (int i = 2049; i < 4096; i++) {
sparsemap_set(other, i, true);
}
sparsemap_merge(map, other);
assert(sparsemap_is_set(map, 0));
assert(sparsemap_is_set(map, 2048));
@ -708,8 +917,28 @@ test_api_merge(const MunitParameter params[], void *data)
for (int i = 2049; i < 4096; i++) {
assert(sparsemap_is_set(map, i));
}
sparsemap_clear(map);
sparsemap_clear(other);
free(other);
for (int i = 2049; i < 4096; i++) {
sparsemap_set(map, i, true);
}
sparsemap_split(map, 2051, other);
for (int i = 2049; i < 4096; i++) {
if (i < 2051) {
assert_true(sparsemap_is_set(map, i));
assert_false(sparsemap_is_set(other, i));
} else {
assert_false(sparsemap_is_set(map, i));
assert_true(sparsemap_is_set(other, i));
}
}
sparsemap_merge(map, other);
for (int i = 2049; i < 4096; i++) {
sparsemap_is_set(map, i);
}
munit_free(other);
return MUNIT_OK;
}
@ -996,14 +1225,6 @@ test_api_span(const MunitParameter params[], void *data)
located_at = sparsemap_span(map, placed_at / 2, 50, true);
assert_true(located_at == placed_at);
/* TODO
sparsemap_clear(map);
placed_at = sm_add_span(map, amt, amt - 1);
located_at = sparsemap_span(map, 0, amt - 1, true);
assert_true(located_at == placed_at);
*/
return MUNIT_OK;
}
@ -1022,9 +1243,12 @@ static MunitTest api_test_suite[] = {
{ (char *)"/get_capacity", test_api_get_capacity, test_api_get_capacity_setup, test_api_get_capacity_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/is_set", test_api_is_set, test_api_is_set_setup, test_api_is_set_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/set", test_api_set, test_api_set_setup, test_api_set_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_starting_offset", test_api_get_starting_offset, test_api_get_starting_offset_setup, test_api_get_starting_offset_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
//TODO { (char *)"/get_ending_offset", test_api_get_ending_offset, test_api_get_ending_offset_setup, test_api_get_ending_offset_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_size", test_api_get_size, test_api_get_size_setup, test_api_get_size_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/count", test_api_count, test_api_count_setup, test_api_count_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_data", test_api_get_data, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_starting_offset", test_api_get_starting_offset, test_api_get_starting_offset_setup, test_api_get_starting_offset_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_starting_offset/rolling", test_api_get_starting_offset_rolling, test_api_get_starting_offset_rolling_setup, test_api_get_starting_offset_rolling_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/get_ending_offset", test_api_get_ending_offset, test_api_get_ending_offset_setup, test_api_get_ending_offset_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/scan", test_api_scan, test_api_scan_setup, test_api_scan_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/split", test_api_split, test_api_split_setup, test_api_split_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/merge", test_api_merge, test_api_merge_setup, test_api_merge_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
@ -1071,14 +1295,13 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data)
for (size_t i = 0; i < amt;) {
int l = i % 31 + 16;
// TODO: sm_add_span(map, amt, l);
sm_add_span(map, 10000, l);
if (errno == ENOSPC) {
map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL);
map = sparsemap_set_data_size(map, NULL, sparsemap_get_capacity(map) * 2);
errno = 0;
}
i += l;
/* ANSI esc code to clear line, carrage return, then print on the same line */
/* ANSI esc code to clear line, carriage return, then print on the same line */
// printf("\033[2K\r%d", i);
// printf("%d\t%d\n", l, i);
}
@ -1120,7 +1343,7 @@ test_scale_ondrej(const MunitParameter params[], void *data)
bool set = (i != needle) ? (j < 10) : (j < 9);
sparsemap_set(map, i, set);
if (errno == ENOSPC) {
map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL);
map = sparsemap_set_data_size(map, NULL, sparsemap_get_capacity(map) * 2);
errno = 0;
}
}
@ -1159,6 +1382,40 @@ test_scale_fuzz(const MunitParameter params[], void *data)
return MUNIT_OK;
}
static void *
test_scale_alternating_setup(const MunitParameter params[], void *user_data)
{
(void)params;
(void)user_data;
sparsemap_t *map = sparsemap(10 * 1024);
assert_ptr_not_null(map);
return (void *)map;
}
static void
test_scale_alternating_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map);
munit_free(map);
}
extern char *bytes_as(double bytes, char *s, size_t size);
static MunitResult
test_scale_alternating(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
for (sparsemap_idx_t i = 0; i < (1000 * 8192); i++) {
if (i % 2) {
if (sparsemap_set(map, i, true) != i) {
// printf("%zu\n", i);
break;
}
}
}
return MUNIT_OK;
}
static void *
test_scale_spans_come_spans_go_setup(const MunitParameter params[], void *user_data)
{
@ -1178,7 +1435,7 @@ test_scale_spans_come_spans_go_tear_down(void *fixture)
static MunitResult
test_scale_spans_come_spans_go(const MunitParameter params[], void *data)
{
size_t amt = 8192; // 268435456; // ~5e7 interations due to 2e9 / avg(l)
size_t amt = 8192; // 268435456; // ~5e7 iterations due to 2e9 / avg(l)
sparsemap_t *map = (sparsemap_t *)data;
(void)params;
@ -1188,7 +1445,7 @@ test_scale_spans_come_spans_go(const MunitParameter params[], void *data)
int l = i % 31 + 16;
sm_add_span(map, amt, l);
if (errno == ENOSPC) {
map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024, NULL);
map = sparsemap_set_data_size(map, NULL, sparsemap_get_capacity(map) + 1024);
assert_ptr_not_null(map);
errno = 0;
}
@ -1251,8 +1508,6 @@ test_scale_best_case(const MunitParameter params[], void *data)
So, in a 1KiB buffer you have:
(1024 KiB / 8 bytes) * 2048 = 268,435,456 bits
or 1.09 TiB of 4KiB pages. Let's investigate, and find out if that's the case.
TODO: Actually, 172032 are stored before SEGV, or 706 MiB of 4KiB pages.
*/
/* Set every bit on, that should be the best case. */
@ -1297,8 +1552,6 @@ test_scale_worst_case(const MunitParameter params[], void *data)
So, in a 1KiB buffer you have:
(1024 KiB / 264 bytes) * 2048 = 8,134,407.75758 bits
or 33.3 GiB of 4KiB pages. Let's investigate, and find out if that's the case.
TODO: actually 7744 are stored before SEGV, or 31MiB of 4KiB pages.
*/
/* Set every other bit, that has to be the "worst case" for this index. */
@ -1337,26 +1590,16 @@ static MunitResult
test_perf_span_solo(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
// double stop, start;
(void)params;
int located_at, placed_at, amt = 500;
assert_ptr_not_null(map);
return MUNIT_OK; // TODO
for (int i = 1; i < amt; i++) {
for (int length = 1; length <= 100; length++) {
sparsemap_clear(map);
placed_at = sm_add_span(map, amt, length);
// logf("i = %d, length = %d\tplaced_at %d\n", i, length, placed_at);
// sm_whats_set(map, 5000);
// start = nsts();
located_at = sparsemap_span(map, 0, length, true);
// stop = nsts();
// double amt = (stop - start) * 1e6;
// if (amt > 0) {
// fprintf(stdout, "%0.8f\n", amt);
// }
if (placed_at != located_at)
logf("a: i = %d, length = %d\tplaced_at %d located_at %d\n", i, length, placed_at, located_at);
}
@ -1420,6 +1663,7 @@ static MunitTest scale_test_suite[] = {
{ (char *)"/ondrej", test_scale_ondrej, test_scale_ondrej_setup, test_scale_ondrej_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
#endif
{ (char *)"/fuzz", test_scale_fuzz, test_scale_fuzz_setup, test_scale_fuzz_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/alternating", test_scale_alternating, test_scale_alternating_setup, test_scale_alternating_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/spans_come_spans_go", test_scale_spans_come_spans_go, test_scale_spans_come_spans_go_setup, test_scale_spans_come_spans_go_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/best-case", test_scale_best_case, test_scale_best_case_setup, test_scale_best_case_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/worst-case", test_scale_worst_case, test_scale_worst_case_setup, test_scale_worst_case_tear_down, MUNIT_TEST_OPTION_NONE, NULL },