shrink the buffer as chunks empty (#12 )

Reviewed-on: #12 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>
Merge pull request 'fuzz testing' (#11 ) from gburd/fixing-the-fuzz into main
2024-07-02 13:07:25 +00:00 · 2024-06-27 08:13:15 +00:00 · 2024-06-27 04:10:08 -04:00 · 2024-05-23 19:59:24 -04:00 · 2024-05-21 11:59:07 -04:00 · 2024-05-21 00:29:26 +00:00
29 changed files with 34769 additions and 993 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -3,8 +3,11 @@ Checks:    >
  bugprone-*,
  clang-analyzer-*,
  google-*,
-  misc-*,
+  -google-objectivec-*,
  modernize-*,
  -modernize-deprecated-headers,
  -modernize-use-using,
  misc-*,
  performance-*,
  portability-*,
  -bugprone-branch-clone,
--- a/.envrc
+++ b/.envrc
@ -1,5 +1,5 @@
 if ! has nix_direnv_version || ! nix_direnv_version 3.0.4; then
    source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/3.0.4/direnvrc" "sha256-DzlYZ33mWF/Gs8DDeyjr8mnVmQGx7ASYqA5WlxwvBG4="
 fi
-watch_file devShell.nix shell.nix flake.nix
+watch_file shell.nix flake.nix
 use flake || use nix
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 **/*.o
 tests/test
 examples/ex_?
 tests/soak
 .cache
 hints.txt
 tmp/
@ -27,6 +28,7 @@ compile_commands.json
 *.dat
 *.fsm
 *.db
 .vscode/
 # Created by https://www.gitignore.io/api/jetbrains
 # Edit at https://www.gitignore.io/?templates=jetbrains
--- a/.idea/customTargets.xml
+++ b/.idea/customTargets.xml
@ -0,0 +1,15 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="CLionExternalBuildManager">
    <target id="db0ccaeb-4851-470b-83d0-afa663f6ceb9" name="tests/soak" defaultType="MAKE">
      <configuration id="98973a90-a9d0-431b-9071-9ce6960b0b01" name="tests/soak">
        <build type="MAKE">
          <make targetName="tests/soak" />
        </build>
        <clean type="MAKE">
          <make targetName="clean" />
        </clean>
      </configuration>
    </target>
  </component>
 </project>
--- a/.idea/makefile.xml
+++ b/.idea/makefile.xml
@ -0,0 +1,25 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="MakefileBuildTargetsManager">
    <user-build-targets>
      <build-target name="tests/soak">
        <build-configurations>
          <build-configuration>
            <make-targets>
              <make-target>tests/soak</make-target>
            </make-targets>
          </build-configuration>
        </build-configurations>
      </build-target>
      <build-target name="clean">
        <build-configurations>
          <build-configuration>
            <make-targets>
              <make-target>clean</make-target>
            </make-targets>
          </build-configuration>
        </build-configurations>
      </build-target>
    </user-build-targets>
  </component>
 </project>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -8,6 +8,7 @@
    <sourceRoots>
      <file path="$PROJECT_DIR$/examples" />
      <file path="$PROJECT_DIR$/include" />
      <file path="$PROJECT_DIR$/lib" />
      <file path="$PROJECT_DIR$/src" />
      <file path="$PROJECT_DIR$/tests" />
    </sourceRoots>
--- a/.idx/dev.nix
+++ b/.idx/dev.nix
@ -0,0 +1,105 @@
 # To learn more about how to use Nix to configure your environment
 # see: https://developers.google.com/idx/guides/customize-idx-env
 { pkgs, ... }: {
  # Which nixpkgs channel to use.
  channel = "stable-23.11"; # or "unstable"
  # Use https://search.nixos.org/packages to find packages
  packages = with pkgs; [
    act
    autoconf
    clang
    clang-tools
    cmake
    ed
    fira-code-nerdfont
    gcc
    gdb
    gettext
    glibc.out
    glibc.static
    gnumake
    graphviz-nox
    libbacktrace
    libtool
    lldb
    m4
    neovim
    ninja
    openssh
    perl
    pkg-config
    python3
    ripgrep
    # pkgs.python311
    # pkgs.python311Packages.pip
  ];
  # Sets environment variables in the workspace
  env = {
    GIT_SSH_COMMAND="ssh -i ~/.ssh/id_ed25519 -F /dev/null";
  };
  idx = {
    # Search for the extensions you want on https://open-vsx.org/ and use "publisher.id"
    extensions = [
      "coolbear.systemd-unit-file"
      "dotjoshjohnson.xml"
      "editorconfig.editorconfig"
      "esbenp.prettier-vscode"
      "mads-hartmann.bash-ide-vscode"
      "ms-python.python"
      "ms-vscode.clangd"
      "ms-vscode.cmake-tools"
      "ms-vscode.cpptools"
      "ms-vscode.cpptools-extension-pack"
      "ms-vscode.makefile-tools"
      "ms-vsliveshare.vsliveshare"
      "redhat.vscode-yaml"
      "rogalmic.bash-debug"
      "ryu1kn.partial-diff"
      "streetsidesoftware.code-spell-checker"
      "timonwong.shellcheck"
      "twxs.cmake"
      "vadimcn.vscode-lldb"
      #"vscode-icons-team.vscode-icons"
      "yzhang.markdown-all-in-one"
      "znck.grammarly"
      #"llvm-vs-code-extensions.vscode-clangd"
      #"eamodio.gitlens"
      "asvetliakov.vscode-neovim"
      #"golang.go"
      #"jnoortheen.nix-ide"
      #"ms-python.vscode-pylance"
      #"mspython.debugpy"
      #"scala-lang.scala"
      #"scalameta.metals"
      #"vscodevim.vim"
    ];
    # Enable previews
    previews = {
      enable = true;
      previews = {
        # web = {
        #   # Example: run "npm run dev" with PORT set to IDX's defined port for previews,
        #   # and show it in IDX's web preview panel
        #   command = ["npm" "run" "dev"];
        #   manager = "web";
        #   env = {
        #     # Environment variables to set for your server
        #     PORT = "$PORT";
        #   };
        # };
      };
    };
    # Workspace lifecycle hooks
    workspace = {
      # Runs when a workspace is first created
      onCreate = {
        # Example: install JS dependencies from NPM
        # npm-install = 'npm install';
      };
      onStart = {
        # Example: start a background task to watch and re-build backend code
        # watch-backend = "npm run watch-backend";
      };
    };
  };
 }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,94 @@
 cmake_minimum_required(VERSION 3.27)
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
    message(FATAL_ERROR "Do not build in-source. Please remove CMakeCache.txt and the CMakeFiles/ directory. Then build out-of-source.")
 endif()
 project(sparsemap LANGUAGES C)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_C_OUTPUT_EXTENSION .o)
 # Set source and header file locations
 set(SOURCE_DIR src)
 set(HEADER_DIR include)
 set(COMMON_CMAKE_C_FLAGS "-Wall -Wextra -Wpedantic")
 set(CMAKE_C_FLAGS_DEBUG "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -O0")
 set(CMAKE_C_FLAGS_PROFILE "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -Og -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope")
 set(CMAKE_C_FLAGS_RELEASE "-Ofast")
 # Include all header files from the header directory
 file(GLOB_RECURSE HEADERS CONFIGURE_FILES ${HEADER_DIR}/*.h)
 # Configure library sources
 set(LIB_SRC
    ${SOURCE_DIR}/sparsemap.c
 )
 # Option to control building shared/static libraries
 option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 # Add shared library
 add_library(sparsemap_SHARED SHARED ${LIB_SRC} ${HEADERS})
 # Set target properties for the shared library (adjust if needed)
 set_target_properties(sparsemap_SHARED PROPERTIES
    VERSION 1.0.0  # Set library version
    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"  # Set output directory
    COMPILE_FLAGS "${CMAKE_C_FLAGS_${CMAKE_CURRENT_LIST_MODE}}"
 )
 target_include_directories(sparsemap_SHARED PRIVATE ${HEADER_DIR})
 # Add static library
 add_library(sparsemap STATIC ${LIB_SRC} ${HEADERS})
 # Set target properties for static library (adjust if needed)
 set_target_properties(sparsemap PROPERTIES
    OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"  # Set output directory
 )
 target_include_directories(sparsemap PRIVATE ${HEADER_DIR})
 # Add ex_1 program
 add_executable(ex_1 examples/ex_1.c tests/munit.c lib/common.c)
 target_link_libraries(ex_1 PRIVATE sparsemap)
 target_include_directories(ex_1 PRIVATE ${HEADER_DIR})
 add_custom_target(run_ex_1 COMMAND ex_1 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add ex_2 program
 add_executable(ex_2 examples/ex_2.c tests/munit.c lib/common.c)
 target_link_libraries(ex_2 PRIVATE sparsemap)
 target_include_directories(ex_2 PRIVATE ${HEADER_DIR})
 add_custom_target(run_ex_2 COMMAND ex_2 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add ex_3 program
 add_executable(ex_3 examples/ex_3.c tests/munit.c lib/common.c)
 target_link_libraries(ex_3 PRIVATE sparsemap)
 target_include_directories(ex_3 PRIVATE ${HEADER_DIR})
 add_custom_target(run_ex_3 COMMAND ex_3 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add ex_4 program
 add_executable(ex_4 examples/ex_4.c tests/munit.c lib/common.c)
 target_link_libraries(ex_4 PRIVATE sparsemap)
 target_include_directories(ex_4 PRIVATE ${HEADER_DIR})
 add_custom_target(run_ex_4 COMMAND ex_4 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add test program
 add_executable(test tests/test.c tests/munit.c lib/common.c)
 target_link_libraries(test PRIVATE sparsemap)
 target_include_directories(test PRIVATE ${HEADER_DIR})
 add_custom_target(run_test COMMAND test WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add soak program
 add_executable(soak tests/soak.c lib/common.c lib/tdigest.c lib/roaring.c)
 target_link_libraries(soak PRIVATE sparsemap)
 target_include_directories(soak PRIVATE ${HEADER_DIR} lib)
 target_link_libraries(soak PUBLIC m)
 add_custom_target(run_soak COMMAND soak WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 # Add fuzzer program
 # add_executable(fuzzer tests/fuzzer.c)
 # target_link_libraries(fuzzer PRIVATE sparsemap)
 # target_include_directories(fuzzer PRIVATE ${HEADER_DIR} lib)
 # target_link_libraries(fuzzer PUBLIC m)
 # add_custom_target(run_fuzzer COMMAND fuzzer WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
--- a/69
+++ b/69
@ -3,16 +3,24 @@ OBJS = sparsemap.o
 STATIC_LIB = libsparsemap.a
 SHARED_LIB = libsparsemap.so
 LIBS =  -lm
 #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
-CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
+#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
-#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
+CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Ofast -g -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
-TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
+#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
 TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
 #TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
 #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
-TESTS = tests/test
+TESTS = tests/test tests/soak
-TEST_OBJS = tests/test.o tests/munit.o tests/common.o
+TEST_OBJS = tests/test.o lib/munit.o lib/tdigest.o lib/common.o
 LIB_OBJS = lib/munit.o lib/tdigest.o lib/common.o lib/roaring.o
 EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4
 .PHONY: all shared static clean test examples mls
@ -29,17 +37,23 @@ $(STATIC_LIB): $(OBJS)
 $(SHARED_LIB): $(OBJS)
 	$(CC) $(CFLAGS) -o $@ $? -shared
-examples: $(STATIC_LIB) $(EXAMPLES) examples/common.o
+examples: $(STATIC_LIB) $(EXAMPLES) $(TEST_OBJS)
 mls: examples/mls
-test: $(TESTS)
+tests: $(TESTS)
-check: test
+test: tests
 	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
-tests/test: $(TEST_OBJS) $(STATIC_LIB)
+soak: tests
-	$(CC) $^ -o $@ $(TEST_FLAGS)
+	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak
 fuzzer: tests
 	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/fuzzer ./crash.case
 tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
 	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
 clean:
 	rm -f $(OBJS)
@ -49,31 +63,44 @@ clean:
 	rm -f $(EXAMPLES) examples/*.o
 format:
-	clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/test.c tests/common.c tests/common.h
+	clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c tests/midl.c lib/common.c include/common.h
 #	clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c
 %.o: src/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^
 lib/%.o: tests/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^
 tests/%.o: tests/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^
 examples/%.o: examples/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^
-examples/common.o: tests/common.c
+examples/ex_1:  $(LIB_OBJS) examples/ex_1.o $(STATIC_LIB)
-	$(CC) $(CFLAGS) -c -o $@ $^
+	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
-examples/ex_1: examples/common.o examples/ex_1.o $(STATIC_LIB)
+examples/ex_2: $(LIB_OBJS) examples/ex_2.o $(STATIC_LIB)
-	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
+	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
-examples/ex_2: examples/common.o examples/ex_2.o $(STATIC_LIB)
+examples/ex_3: $(LIB_OBJS) examples/ex_3.o $(STATIC_LIB)
-	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
+	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
-examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB)
+examples/ex_4: $(LIB_OBJS) examples/ex_4.o $(STATIC_LIB)
-	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
+	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
-examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB)
+tests/soak: $(LIB_OBJS) tests/soak.o $(STATIC_LIB)
-	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)
+	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
 tests/fuzzer: $(LIB_OBJS) tests/fuzzer.o $(STATIC_LIB)
 	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS) -DFUZZ_DEBUG
 todo:
 	rg -i 'todo|gsb|abort'
 # cp src/sparsemap.c /tmp && clang-tidy src/sparsemap.c -fix -fix-errors -checks="readability-braces-around-statements" -- -DDEBUG -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
 # clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
 # clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./examples/soak
--- a/README.md
+++ b/README.md
@ -1,8 +1,12 @@
 # Sparsemap
-`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
+Bitsets, also called bitmaps, are commonly used as fast data structures.
-bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
+Unfortunately, they can use too much memory. To compensate, we often use
-requires additional 8 bytes of overhead.
+compressed bitmaps.
 `sparsemap` is a sparse, compressed bitmap. In the best case, it can store 2048
 bits in just 8 bytes. In the worst case, it stores the 2048 bits uncompressed and
 requires an additional 8 bytes of overhead.
 The "best" case happens when large consecutive sequences of the bits are
 either set ("1") or not set ("0"). If your numbers are consecutive 64bit
@ -14,9 +18,9 @@ On the lowest level stores bits in sm_bitvec_t's (a uint32_t or uint64_t).
 Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
 to each sm_bitvec_t describes its condition. The descriptor word and the
-sm_bitvec_t's have the same size.) The descriptor of a sm_bitvec_t
+sm_bitvec_t's have the same size. The descriptor of a sm_bitvec_t
 specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
-bits ("0") or has a mixed payload. In the first and second case the
+bits ("0") or has a mixed payload. In the first and second cases, the
 sm_bitvec_t is not stored.
 An example shows a sequence of 4 x 16 bits (here, each sm_bitvec_t and the
@ -27,7 +31,7 @@ Descriptor word has 16 bits):
      ^^ ^^ ^^ ^^-- sm_bitvec_t #0 - #3 are "0000000000000000"
                  ^^-- sm_bitvec_t #4 is "1111111111111111"
                     ^^-- sm_bitvec_t #5 is "0000000000000000"
-                        ^^-- sm_bitvec_t #7 is "1111111111111111"
+                        ^^-- sm_bitvec_t #6 is "1111111111111111"
                           ^^-- sm_bitvec_t #7 is "0110010101111001"
 Since the first 7 sm_bitvec_t's are either all "1" or "0" they are not stored.
@ -36,29 +40,34 @@ The actual memory sequence looks like this:
      0000000011001110 0110010101111001
 Instead of storing 8 Words (16 bytes), we only store 2 Words (2 bytes): one
-for the descriptor, one for last sm_bitvec_t #7.
+for the descriptor, and one for the last sm_bitvec_t #7.
-The sparsemap stores a list of chunk maps, and for each chunk map it stores the
+The sparsemap stores a list of chunk maps, and for each chunk map, it stores the
 absolute address (i.e. if the user sets bit 0 and bit 10000, and the chunk map
 capacity is 2048, the sparsemap creates two chunk maps; the first starts at
 offset 0, the second starts at offset 8192).
 ## Usage instructions
-The file `examples/ex_1.c` has example code.
+Copy the files `src/sparsemap.c` and `include/sparsemap.h` into your project.
 Review the `examples/*` and `tests/*` code.
 ## Final words
 This bitmap has efficient compression when used on long sequences of set (or
-unset) bits (i.e. with a word size of 64bit, and a payload of consecutive
+unset) bits (i.e. with a word size of 64 bit and a payload of consecutive
 numbers without gaps, the payload of 2048 x sizeof(uint64_t) = 16kb will occupy
-only 8 bytes!
+only 8 bytes!).
 However, if the sequence is not consecutive and has gaps, it's possible that
 the compression is inefficient, and the size (in the worst case) is identical
 to an uncompressed bit vector (sometimes higher due to the bytes required for
 metadata). In such cases, other compression schemes are more efficient (i.e.
-http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).
+http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).  We
 include in `lib` the amalgamated (git `2dc8070`) and well-known
 [Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring/tree/master) and
 use it in the soak test to ensure our results are as accurate as theirs.
-This library was originally created for hamsterdb [http://hamsterdb.com] in
+This library was originally created by [Christoph Rupp](https://crupp.de) in
-C++ and then translated to C99 code by Greg Burd <greg@burd.me>.
+C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
 for use in LMDB and OpenLDAP.
--- a/cmake-it.sh
+++ b/cmake-it.sh
@ -0,0 +1,14 @@
 #!/usr/bin/env bash
 target=${1:-Debug}
 set targets="Debug Profile Release"
 case "$target" in
    $targets*) echo "Building ${target}..." ;;
    *) echo "Unknown target ${target}, exiting." ;;
 esac
 name=${target,,}
 echo $name
 rm -rf "./cmake-build-${name}-system" && \
    cmake -DCMAKE_BUILD_TYPE=${target} -DCMAKE_MAKE_PROGRAM=ninja -DCMAKE_C_COMPILER=clang -G Ninja -S "${PWD}" -B "${PWD}/cmake-build-${name}-system" && \
    (cd "${PWD}/cmake-build-${name}-system" && ninja)
--- a/examples/ex_1.c
+++ b/examples/ex_1.c
@ -1,4 +1,7 @@
 #include <assert.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include "../include/sparsemap.h"
@ -14,16 +17,18 @@
 /* !!! Duplicated here for testing purposes. Keep in sync, or suffer. !!! */
 struct sparsemap {
  uint8_t *m_data;
  size_t m_capacity;
  size_t m_data_used;
  uint8_t *m_data;
 };
 int
 main()
 {
  size_t size = 4;
-  setbuf(stderr, 0); // disable buffering
+  setvbuf(stdout, NULL, _IONBF, 0); // Disable buffering for stdout
  setvbuf(stderr, NULL, _IONBF, 0); // Disable buffering for stdout
  __diag("Please wait a moment...");
  sparsemap_t mmap, *map = &mmap;
  uint8_t buffer[1024];
@ -135,7 +140,7 @@ main()
    sparsemap_set(map, i, true);
  }
  for (int i = 0; i < 100000; i++) {
-    assert(sparsemap_select(map, i) == (unsigned)i);
+    assert(sparsemap_select(map, i, true) == (unsigned)i);
  }
  sparsemap_clear(map);
@ -145,7 +150,7 @@ main()
    sparsemap_set(map, i, true);
  }
  for (int i = 1; i < 513; i++) {
-    assert(sparsemap_select(map, i - 1) == (unsigned)i);
+    assert(sparsemap_select(map, i - 1, true) == (unsigned)i);
  }
  sparsemap_clear(map);
@ -155,7 +160,7 @@ main()
    sparsemap_set(map, i * 10, true);
  }
  for (size_t i = 0; i < 8; i++) {
-    assert(sparsemap_select(map, i) == i * 10);
+    assert(sparsemap_select(map, i, true) == (sparsemap_idx_t)i * 10);
  }
  // split and move, aligned to MiniMap capacity
@ -181,16 +186,18 @@ main()
  sparsemap_clear(map);
  for (int i = 0; i < 2048 * 3; i++) {
    sparsemap_set(map, i, true);
    assert(sparsemap_is_set(map, i) == true);
  }
  sparsemap_split(map, 64, sm2);
-  for (int i = 0; i < 64; i++) {
+  for (int i = 0; i < 2048 * 3; i++) {
    if (i < 64) {
      assert(sparsemap_is_set(map, i) == true);
      assert(sparsemap_is_set(sm2, i) == false);
-  }
+    } else {
  for (int i = 64; i < 2048 * 3; i++) {
      assert(sparsemap_is_set(map, i) == false);
      assert(sparsemap_is_set(sm2, i) == true);
    }
  }
  fprintf(stderr, " ok\n");
 }
--- a/examples/ex_2.c
+++ b/examples/ex_2.c
@ -1,9 +1,7 @@
 #include <assert.h>
-#include <stdarg.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
 #include "../include/sparsemap.h"
@ -16,32 +14,31 @@
  } while (0)
 #pragma GCC diagnostic pop
 #define SEED
 int
 main(void)
 {
-  int i = 0;
+  int i;
  // disable buffering
-  setbuf(stderr, 0);
+  setvbuf(stdout, NULL, _IONBF, 0); // Disable buffering for stdout
  setvbuf(stderr, NULL, _IONBF, 0); // Disable buffering for stdout
  // start with a 1KiB buffer, 1024 bits
  uint8_t *buf = calloc(1024, sizeof(uint8_t));
  // create the sparse bitmap
-  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024);
+  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 1024);
  // Set every other bit (pathologically worst case) to see what happens
  // when the map is full.
  for (i = 0; i < 7744; i++) {
-    if (i % 2)
+    if (!i % 2) {
      continue;
      sparsemap_set(map, i, true);
      assert(sparsemap_is_set(map, i) == true);
    }
  }
  // On 1024 KiB of buffer with every other bit set the map holds 7744 bits
-  // and then runs out of space.  This next _set() call will fail/abort.
+  // and then runs out of space.  This next _set() call will fail.
  sparsemap_set(map, ++i, true);
  assert(sparsemap_is_set(map, i) == true);
  return 0;
--- a/examples/ex_3.c
+++ b/examples/ex_3.c
@ -1,17 +1,14 @@
 #include <assert.h>
-#include <stdarg.h>
+#include <common.h>
 #include <sparsemap.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
 #include "../include/sparsemap.h"
 #include "../tests/common.h"
 int
 main(void)
 {
-  int i = 0;
+  int i;
  int array[1024] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
    38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
    77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
@ -60,7 +57,7 @@ main(void)
  uint8_t *buf = calloc(1024, sizeof(uint8_t));
  // create the sparse bitmap
-  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024);
+  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 1024);
  // set all the bits on in a random order
  for (i = 0; i < 1024; i++) {
--- a/examples/ex_4.c
+++ b/examples/ex_4.c
@ -1,11 +1,10 @@
 #include <assert.h>
 #include <common.h>
 #include <sparsemap.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include "../include/sparsemap.h"
 #include "../tests/common.h"
 #define TEST_ARRAY_SIZE 1024
 int
@ -24,7 +23,7 @@ main(void)
  uint8_t *buf = calloc((size_t)3 * 1024, sizeof(uint8_t));
  // create the sparse bitmap
-  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 3 * 1024);
+  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 3 * 1024);
  // create an array of ints
  setup_test_array(array, TEST_ARRAY_SIZE, 1024 * 3);
@ -60,7 +59,7 @@ main(void)
      assert(sparsemap_is_set(map, array[i]) == true);
    }
    has_span(map, array, TEST_ARRAY_SIZE, (int)len);
-    size_t l = sparsemap_span(map, 0, len);
+    size_t l = sparsemap_span(map, 0, len, true);
    if (l != (size_t)-1) {
      __diag("Found span in map starting at %lu of length %lu\n", l, len);
      __diag("is_span(%lu, %lu) == %s\n", l, len, is_span(array, TEST_ARRAY_SIZE, l, len) ? "yes" : "no");
--- a/flake.lock
+++ b/flake.lock
@ -1,43 +1,25 @@
 {
  "nodes": {
    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1710146030,
        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1712192574,
+        "lastModified": 1701282334,
-        "narHash": "sha256-LbbVOliJKTF4Zl2b9salumvdMXuQBr2kuKP5+ZwbYq4=",
+        "narHash": "sha256-MxCVrXY6v4QmfTwIysjjaX0XUhqBbxTWWB4HXtDYsdk=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "f480f9d09e4b4cf87ee6151eba068197125714de",
+        "rev": "057f9aecfb71c4437d2b27d3323df7f93c010b7e",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
-        "ref": "nixpkgs-unstable",
+        "ref": "23.11",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
-        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs",
-        "nixpkgs": "nixpkgs"
+        "utils": "utils"
      }
    },
    "systems": {
@ -54,6 +36,24 @@
        "repo": "default",
        "type": "github"
      }
    },
    "utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1710146030,
        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@ -1,38 +1,30 @@
 {
-  description = "A Concurrent Skip List library for key/value pairs.";
+  description = "A sparse bitmapped index library in C.";
  inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
+    # nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
+    nixpkgs.url = "github:NixOS/nixpkgs/23.11";
    utils.url = "github:numtide/flake-utils";
  };
-  outputs =
+  outputs = { self, nixpkgs, ... }
-    { self
+    @inputs: inputs.utils.lib.eachSystem [
-    , nixpkgs
+      "x86_64-linux" "i686-linux" "aarch64-linux" "x86_64-darwin"
-    , flake-utils
+    ] (system:
-    , ...
+      let pkgs = import nixpkgs {
    }:
    flake-utils.lib.eachDefaultSystem (system:
      let
        pkgs = import nixpkgs {
            inherit system;
-          config = { allowUnfree = true; };
+            overlays = [];
            config.allowUnfree = true;
          };
        supportedSystems = [ "x86_64-linux" ];
        forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
        nixpkgsFor = forAllSystems (system: import nixpkgs {
          inherit system;
          overlays = [ self.overlay ];
        });
      in {
-        pkgs = import nixpkgs {
+        flake-utils.inputs.systems.follows = "system";
-          inherit system;
+        devShell = pkgs.mkShell rec {
-          devShell = nixpkgs.legacyPackages.${system} {
+          name = "sparsemap";
-            pkgs.mkShell = {
+          packages = with pkgs; [
              nativeBuildInputs = with pkgs.buildPackages; [
            act
            autoconf
            clang
            cmake
            ed
            gcc
            gdb
@ -40,20 +32,26 @@
            graphviz-nox
            libtool
            m4
            ninja
            perl
            pkg-config
            python3
            ripgrep
            valgrind
          ];
          buildInputs = with pkgs; [
            libbacktrace
            glibc.out
            glibc.static
          ];
          shellHook = let
            icon = "f121";
          in ''
        export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
        '';
        };
        DOCKER_BUILDKIT = 1;
          };
        };
      });
 }
--- a/include/common.h
+++ b/include/common.h
@ -1,4 +1,6 @@
 #include "../include/sparsemap.h"
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvariadic-macros"
 #define __diag(...)                                                \
@ -23,23 +25,12 @@
 #define XORSHIFT_SEED_VALUE ((unsigned int)time(NULL) ^ getpid())
 #endif
 #define EST_MEDIAN_DECL(decl, size)   \
  uint64_t heap_##decl[size] = { 0 }; \
  int heap_##decl##_max_size = size;  \
  int heap_##decl##_size = 0;
 #define EST_MEDIAN_ADD(decl, value) est_insert_value(heap_##decl, heap_##decl##_max_size, &heap_##decl##_size, (value));
 #define EST_MEDIAN_GET(decl) heap_##decl[0]
 uint64_t tsc(void);
 double tsc_ticks_to_ns(uint64_t tsc_ticks);
-void est_sift_up(uint64_t *heap, int child_index);
+double nsts(void);
 void est_sift_down(uint64_t *heap, int heap_size, int parent_index);
 void est_insert_value(uint64_t *heap, int heap_max_size, int *heap_size, uint64_t value);
-void xorshift32_seed();
+void xorshift32_seed(void);
-uint32_t xorshift32();
+uint32_t xorshift32(void);
 void print_array(int *array, int l);
 void print_spans(int *array, int n);
@ -52,11 +43,18 @@ int is_unique(int a[], int l, int value);
 void setup_test_array(int a[], int l, int max_value);
 void shuffle(int *array, size_t n);
 int ensure_sequential_set(int a[], int l, int r);
-int create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r);
+sparsemap_idx_t sm_add_span(sparsemap_t *map, int map_size, int span_length);
 void print_bits(char *name, uint64_t value);
 void bitmap_from_uint32(sparsemap_t *map, uint32_t number);
-void bitmap_from_uint64(sparsemap_t *map, uint64_t number);
+void sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number);
 uint32_t rank_uint64(uint64_t number, int n, int p);
 int whats_set_uint64(uint64_t number, int bitPositions[64]);
-void whats_set(sparsemap_t *map, int m);
+void sm_whats_set(sparsemap_t *map, int off, int len);
 bool sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
 bool sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
 char *bytes_as(double bytes, char *s, size_t size);
--- a/include/roaring.h
+++ b/include/roaring.h
--- a/include/sparsemap.h
+++ b/include/sparsemap.h
@ -69,14 +69,14 @@
 #ifndef SPARSEMAP_H
 #define SPARSEMAP_H
 #include <sys/types.h>
 #include <assert.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <stdio.h>
+
-#include <string.h>
+#if defined(__cplusplus)
 extern "C" {
 #endif
 /*
 * The public interface for a sparse bit-mapped index, a "sparse map".
@ -88,55 +88,299 @@
 */
 typedef struct sparsemap sparsemap_t;
 typedef size_t sparsemap_idx_t;
 #define SPARSEMAP_IDX_MAX SIZE_MAX
 #define SPARSEMAP_FOUND(x) ((x) != SPARSEMAP_IDX_MAX)
 #define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX)
 typedef uint32_t sm_idx_t;
 typedef uint64_t sm_bitvec_t;
-/* Allocate on a sparsemap_t on the heap and initialize it. */
+/** @brief Allocate a new, empty sparsemap_t with a buffer of \b size on the
-sparsemap_t *sparsemap(uint8_t *data, size_t size);
+ * heap to use for storage of bitmap data.
 *
 * The buffer used for the bitmap is allocated in the same heap allocation as
 * the structure, this means that you only need to call free() on the returned
 * object to free all resources.  Using this method allows you to grow the
 * buffer size by calling #sparsemap_set_data_size().  This function calls
 * #sparsemap_init().
 *
 * @param[in] size The starting size of the buffer used for the bitmap, default
 * is 1024 bytes.
 * @returns The newly allocated sparsemap reference.
 */
 sparsemap_t *sparsemap(size_t size);
-/* Initialize sparsemap_t with data. */
+/** @brief Allocate a new, copy of the \b other sparsemap_t.
 *
 * @param[in] other The sparsemap to copy.
 */
 sparsemap_t *sparsemap_copy(sparsemap_t *other);
 /** @brief Allocate a new, empty sparsemap_t that references (wraps) the buffer
 * \b data of \b size bytes to use for storage of bitmap data.
 *
 * This function allocates a new sparsemap_t but not the buffer which is
 * provided by the caller as \b data which can be allocated on the stack or
 * heap.  Caller is responsible for calling free() on the returned heap object
 * and releasing the memory used for \b data.  Resizing the buffer is only
 * supported when the heap object for the map includes the buffer and the
 * \b data offset supplied is relative to the object (see #sparsemap()).
 *
 * @param[in] data A heap or stack memory buffer of \b size for use storing
 * bitmap data.
 * @param[in] size The size of the buffer \b data used for the bitmap.
 * @returns The newly allocated sparsemap reference.
 */
 sparsemap_t *sparsemap_wrap(uint8_t *data, size_t size);
 /** @brief Initialize an existing sparsemap_t by assigning \b data of \b size
 * bytes for storage of bitmap data.
 *
 * Given the address of an existing \b map allocated on the stack or heap this
 * function will initialize the data structure and use the provided \b data of
 * \b size for bitmap data.  Caller is responsible for all memory management.
 * Resizing the buffer is not directly supported, you
 * may resize it and call #sparsemap_set_data_size() and then ensure that should
 * the address of the object changed you need to update it by calling #sparsemap_
 * m_data field.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] data A heap or stack memory buffer of \b size for use storing
 * bitmap data.
 * @param[in] size The size of the buffer \b data used for the bitmap.
 */
 void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size);
-/* Clears the whole buffer. */
+/** @brief Opens, without initializing, an existing sparsemap contained within
 * the specified buffer.
 *
 * Given the address of an existing \b map this function will assign to the
 * provided data structure \b data of \b size for bitmap data.  Caller is
 * responsible for all memory management.  Use this when as a way to
 * "deserialize" bytes and make them ready for use as a bitmap.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] data A heap or stack memory buffer of \b size for use storing
 * bitmap data.
 * @param[in] size The size of the buffer \b data used for the bitmap.
 */
 void sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size);
 /** @brief Resets values and empties the buffer making it ready to accept new
 *  data but does not free the memory.
 *
 * @param[in] map The sparsemap reference.
 */
 void sparsemap_clear(sparsemap_t *map);
-/* Opens an existing sparsemap at the specified buffer. */
+/** @brief Update the size of the buffer \b data used for storing the bitmap.
-void sparsemap_open(sparsemap_t *, uint8_t *data, size_t data_size);
+ *
 * When called with \b data NULL on a \b map that was created with #sparsemap()
 * this function will reallocate the storage for both the map and data possibly
 * changing the address of the map itself so it is important for the caller to
 * update all references to this map to the address returned in this scenario.
 * Access to stale references will result in memory violations and program
 * termination.  Caller is not required to free() the old address, only the new
 * one should it have changed.  This uses #realloc() under the covers, all
 * caveats apply here as well.
 *
 * When called referencing a \b map that was allocate by the caller this
 * function will only update the values within the data structure.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] size The desired size of the buffer \b data used for the bitmap.
 * @returns The -- potentially changed -- sparsemap reference, or NULL should a
 * #realloc() fail (\b ENOMEM)
 * @note The resizing of caller supplied allocated objects is not yet fully
 * supported.
 */
 sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, uint8_t *data, size_t size);
-/* Resizes the data range. */
+/** @brief Calculate remaining capacity, approaches 0 when full.
-void sparsemap_set_data_size(sparsemap_t *map, size_t data_size);
+ *
-
+ * Provides an estimate in the range [0.0, 100.0] of the remaining capacity of
-/* Calculate remaining capacity, full when 0. */
+ * the buffer storing bitmap data.  This can change up or down as more data
 * is added/removed due to the method for compressed representation, do not
 * expect a smooth progression either direction.  This is a rough estimate only
 * and may also jump in value after seemingly indiscriminate changes to the map.
 *
 * @param[in] map The sparsemap reference.
 * @returns an estimate for remaining capacity that approaches 0.0 when full or
 * 100.0 when empty
 */
 double sparsemap_capacity_remaining(sparsemap_t *map);
-/* Returns the size of the underlying byte array. */
+/** @brief Returns the capacity of the underlying byte array in bytes.
 *
 * Specifically, this returns the byte \b size provided for the underlying
 * buffer used to store bitmap data.
 *
 * @param[in] map The sparsemap reference.
 * @returns byte size of the buffer used for storing bitmap data
 */
 size_t sparsemap_get_capacity(sparsemap_t *map);
-/* Returns the value of a bit at index |idx|. */
+/** @brief Returns the value of a bit at index \b idx, either true for "set" (1)
-bool sparsemap_is_set(sparsemap_t *map, size_t idx);
+ * or \b false for "unset" (0).
 *
 * @param[in] map The sparsemap reference.
 * @param[in] idx The 0-based offset into the bitmap index to examine.
 * @returns either true or false
 */
 bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx);
-/* Sets the bit at index |idx| to true or false, depending on |value|. */
+/** @brief Sets the bit at index \b idx to \b value.
-void sparsemap_set(sparsemap_t *map, size_t idx, bool value);
+ *
 * A sparsemap has a fixed size buffer with a capacity that can be exhausted by
 * when calling this function.  In such cases the return value is not equal to
 * the provided \b idx and errno is set to ENOSPC.  In such situations it is
 * possible to grow the data size and retry the set() operation under certain
 * circumstances (see #sparsemap() and #sparsemap_set_data_size()).
 *
 * @param[in] map The sparsemap reference.
 * @param[in] idx The 0-based offset into the bitmap index to modify.
 * @returns the \b idx supplied on success or SPARSEMAP_IDX_MAX on error
 * with \b errno set to ENOSPC when the map is full.
 */
 sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value);
-/* Returns the offset of the very first bit. */
+/** @brief Returns the byte size of the data buffer that has been used thus far.
-sm_idx_t sparsemap_get_start_offset(sparsemap_t *map);
+ *
-
+ * @param[in] map The sparsemap reference.
-/* Returns the used size in the data buffer. */
+ * @returns the byte size of the data buffer that has been used thus far
 */
 size_t sparsemap_get_size(sparsemap_t *map);
-/* Decompresses the whole bitmap; calls scanner for all bits. */
+/** @brief Returns a pointer to the data buffer used for the map.
-void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t), size_t skip);
+ *
 * @param[in] map The sparsemap reference.
 * @returns a pointer to the data buffer used for the map
 */
 void *sparsemap_get_data(sparsemap_t *map);
-/* Appends all chunk maps from |map| starting at |sstart| to |other|, then
+/** @brief Returns the number of elements in the map.
-   reduces the chunk map-count appropriately. */
+ *
-void sparsemap_split(sparsemap_t *map, size_t sstart, sparsemap_t *other);
+ * @param[in] map The sparsemap reference.
 * @returns the number of elements in the map
 */
 size_t sparsemap_count(sparsemap_t *map);
-/* Returns the index of the n'th set bit; uses a 0-based index. */
+/** @brief Returns the offset of the first bit set in the map.
-size_t sparsemap_select(sparsemap_t *map, size_t n);
+ *
 * This is the same as the value of the first set bit in the
 * map.
 *
 * @param[in] map The sparsemap reference.
 * @returns the offset of the first bit set in the map
 */
 sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map);
-/* Counts the set bits in the range [offset, idx]. */
+/** @brief Returns the offset of the last bit set in the map.
-size_t sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx);
+ *
 * This is the same as the value of the last bit set in the
 * map.
 *
 * @param[in] map The sparsemap reference.
 * @returns the offset of the index bit set in the map
 */
 sparsemap_idx_t sparsemap_get_ending_offset(sparsemap_t *map);
-size_t sparsemap_span(sparsemap_t *map, size_t loc, size_t len);
+/** @brief Returns the percent of bits set in the map.
 *
 * @param[in] map The sparsemap reference.
 * @returns the percent of bits set.
 */
 double sparsemap_fill_factor(sparsemap_t *map);
 /** @brief Provides a method for a callback function to examine every bit set in
 * the index.
 *
 * This decompresses the whole bitmap and invokes #scanner() passing an array
 * of the positions of set bits in order from 0 index to the end of the map.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] skip Start the scan after \b skip position in the map.
 * @param[in] aux Auxiliary information passed to the scanner.
 */
 void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux);
 /** @brief Merges the values from \b source into \b destination, \b source is unchanged.
 *
 * Efficiently adds all set bits from \b source into \b destination.
 *
 * @param[in] destination The sparsemap reference into which we will merge \b source.
 * @param[in] source The bitmap to merge into \b destination.
 * @returns 0 on success, or sets errno to ENOSPC and returns the amount of
 * additional space required to successfully merge the maps.
 */
 int sparsemap_merge(sparsemap_t *destination, sparsemap_t *source);
 /** @brief Splits the bitmap by assigning all bits starting at \b offset to the
 * \b other bitmap while removing them from \b map.
 *
 * The \b other bitmap is expected to be empty.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] offset The 0-based offset into the bitmap at which to split, if
 * set to SPARSEMAP_IDX_MAX then the bits will be evenly split.
 * @param[in] other The bitmap into which we place the split.
 * @returns the offset at which the map was split
 */
 sparsemap_idx_t sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other);
 /** @brief Finds the index of the \b n'th bit set to \b value.
 *
 * Locates the \b n'th bit either set, \b value is true, or unset, \b value is
 * false, from the start of the bitmap.
 * So, if your bit pattern is: ```1101 1110 1010 1101 1011 1110 1110 1111``` and
 * you request the first set bit the result is `0` (meaning the 1st bit in the
 * map which is index 0 because this is 0-based indexing).  The first unset bit
 * is `2` (or the third bit in the pattern).  When n is 3 and value is true the
 * result would be `3` (the fourth bit, or the third set bit which is at index
 * 3 when 0-based).
 *
 * @param[in] map The sparsemap reference.
 * @param[in] n Specifies how many bits to ignore (when n=2 return the position
 * of the third matching bit).
 * @param[in] value Determines if the search is to examine set (true) or unset
 * (false) bits in the bitmap index.
 * @returns the 0-based index of the located bit position within the map; when
 * not found either SPARSEMAP_IDX_MAX.
 */
 sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value);
 /** @brief Counts the bits matching \b value in the provided range, [\b x, \b
 * y].
 *
 * Counts the set, \b value is true, or unset, \b value is false, bits starting
 * at the \b idx'th bit (0-based) in the range [\b x, \b y] (inclusive on either
 * end).  If range is [0, 0] this examines 1 bit, the first one in the map, and
 * returns 1 if value is true and the bit was set.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] x 0-based start of the inclusive range to examine.
 * @param[in] y 0-based end of the inclusive range to examine.
 * @param[in] value Determines if the scan is to count the set (true) or unset
 * (false) bits in the range.
 * @returns the count of bits found within the range that match the \b value
 */
 size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value);
 /** @brief Locates the first contiguous set of bits of \b len starting at \b idx
 * matching \b value in the bitmap.
 *
 * @param[in] map The sparsemap reference.
 * @param[in] start 0-based start of search within the bitmap.
 * @param[in] len The length of contiguous bits we're seeking.
 * @param[in] value Determines if the scan is to find all set (true) or unset
 * (false) bits of \b len.
 * @returns the index of the first bit matching the criteria; when not found
 * found SPARSEMAP_IDX_MAX
 */
 size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t start, size_t len, bool value);
 #if defined(__cplusplus)
 }
 #endif
 #endif /* !defined(SPARSEMAP_H) */
--- a/include/tdigest.h
+++ b/include/tdigest.h
@ -0,0 +1,258 @@
 #pragma once
 #include <stdlib.h>
 /**
 * Adaptive histogram based on something like streaming k-means crossed with Q-digest.
 * The implementation is a direct descendent of MergingDigest
 * https://github.com/tdunning/t-digest/
 *
 * Copyright (c) 2021 Redis, All rights reserved.
 * Copyright (c) 2018 Andrew Werner, All rights reserved.
 *
 * The special characteristics of this algorithm are:
 *
 * - smaller summaries than Q-digest
 *
 * - provides part per million accuracy for extreme quantiles and typically &lt;1000 ppm accuracy
 * for middle quantiles
 *
 * - fast
 *
 * - simple
 *
 * - easy to adapt for use with map-reduce
 */
 #define MM_PI 3.14159265358979323846
 struct td_histogram {
    // compression is a setting used to configure the size of centroids when merged.
    double compression;
    double min;
    double max;
    // cap is the total size of nodes
    int cap;
    // merged_nodes is the number of merged nodes at the front of nodes.
    int merged_nodes;
    // unmerged_nodes is the number of buffered nodes.
    int unmerged_nodes;
    // we run the merge in reverse every other merge to avoid left-to-right bias in merging
    long long total_compressions;
    long long merged_weight;
    long long unmerged_weight;
    double *nodes_mean;
    long long *nodes_weight;
 };
 typedef struct td_histogram td_histogram_t;
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Allocate the memory, initialise the t-digest, and return the histogram as output parameter.
 * @param compression The compression parameter.
 * 100 is a common value for normal uses.
 * 1000 is extremely large.
 * The number of centroids retained will be a smallish (usually less than 10) multiple of this
 * number.
 * @return the histogram on success, NULL if allocation failed.
 */
 td_histogram_t *td_new(double compression);
 /**
 * Allocate the memory and initialise the t-digest.
 *
 * @param compression The compression parameter.
 * 100 is a common value for normal uses.
 * 1000 is extremely large.
 * The number of centroids retained will be a smallish (usually less than 10) multiple of this
 * number.
 * @param result Output parameter to capture allocated histogram.
 * @return 0 on success, 1 if allocation failed.
 */
 int td_init(double compression, td_histogram_t **result);
 /**
 * Frees the memory associated with the t-digest.
 *
 * @param h The histogram you want to free.
 */
 void td_free(td_histogram_t *h);
 /**
 * Reset a histogram to zero - empty out a histogram and re-initialise it
 *
 * If you want to re-use an existing histogram, but reset everything back to zero, this
 * is the routine to use.
 *
 * @param h The histogram you want to reset to empty.
 *
 */
 void td_reset(td_histogram_t *h);
 /**
 * Adds a sample to a histogram.
 *
 * @param val The value to add.
 * @param weight The weight of this point.
 * @return 0 on success, EDOM if overflow was detected as a consequence of adding the provided
 * weight.
 *
 */
 int td_add(td_histogram_t *h, double val, long long weight);
 /**
 * Re-examines a t-digest to determine whether some centroids are redundant.  If your data are
 * perversely ordered, this may be a good idea.  Even if not, this may save 20% or so in space.
 *
 * The cost is roughly the same as adding as many data points as there are centroids.  This
 * is typically &lt; 10 * compression, but could be as high as 100 * compression.
 * This is a destructive operation that is not thread-safe.
 *
 * @param h The histogram you want to compress.
 * @return 0 on success, EDOM if overflow was detected as a consequence of adding the provided
 * weight. If overflow is detected the histogram is not changed.
 *
 */
 int td_compress(td_histogram_t *h);
 /**
 * Merges all of the values from 'from' to 'this' histogram.
 *
 * @param h "This" pointer
 * @param from Histogram to copy values from.
 * * @return 0 on success, EDOM if overflow was detected as a consequence of merging the the
 * provided histogram. If overflow is detected the original histogram is not detected.
 */
 int td_merge(td_histogram_t *h, td_histogram_t *from);
 /**
 * Returns the fraction of all points added which are &le; x.
 *
 * @param x The cutoff for the cdf.
 * @return The fraction of all data which is less or equal to x.
 */
 double td_cdf(td_histogram_t *h, double x);
 /**
 * Returns an estimate of the cutoff such that a specified fraction of the data
 * added to this TDigest would be less than or equal to the cutoff.
 *
 * @param q The desired fraction
 * @return The value x such that cdf(x) == q;
 */
 double td_quantile(td_histogram_t *h, double q);
 /**
 * Returns an estimate of the cutoff such that a specified fraction of the data
 * added to this TDigest would be less than or equal to the cutoffs.
 *
 * @param quantiles The ordered percentiles array to get the values for.
 * @param values Destination array containing the values at the given quantiles.
 * The values array should be allocated by the caller.
 * @return 0 on success, ENOMEM if the provided destination array is null.
 */
 int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, size_t length);
 /**
 * Returns the trimmed mean ignoring values outside given cutoff upper and lower limits.
 *
 * @param leftmost_cut Fraction to cut off of the left tail of the distribution.
 * @param rightmost_cut Fraction to cut off of the right tail of the distribution.
 * @return The trimmed mean ignoring values outside given cutoff upper and lower limits;
 */
 double td_trimmed_mean(td_histogram_t *h, double leftmost_cut, double rightmost_cut);
 /**
 * Returns the trimmed mean ignoring values outside given a symmetric cutoff limits.
 *
 * @param proportion_to_cut Fraction to cut off of the left and right tails of the distribution.
 * @return The trimmed mean ignoring values outside given cutoff upper and lower limits;
 */
 double td_trimmed_mean_symmetric(td_histogram_t *h, double proportion_to_cut);
 /**
 * Returns the current compression factor.
 *
 * @return The compression factor originally used to set up the TDigest.
 */
 int td_compression(td_histogram_t *h);
 /**
 * Returns the number of points that have been added to this TDigest.
 *
 * @return The sum of the weights on all centroids.
 */
 long long td_size(td_histogram_t *h);
 /**
 * Returns the number of centroids being used by this TDigest.
 *
 * @return The number of centroids being used.
 */
 int td_centroid_count(td_histogram_t *h);
 /**
 * Get minimum value from the histogram.  Will return __DBL_MAX__ if the histogram
 * is empty.
 *
 * @param h "This" pointer
 */
 double td_min(td_histogram_t *h);
 /**
 * Get maximum value from the histogram.  Will return - __DBL_MAX__ if the histogram
 * is empty.
 *
 * @param h "This" pointer
 */
 double td_max(td_histogram_t *h);
 /**
 * Get the full centroids weight array for 'this' histogram.
 *
 * @param h "This" pointer
 *
 * @return The full centroids weight array.
 */
 const long long *td_centroids_weight(td_histogram_t *h);
 /**
 * Get the full centroids mean array for 'this' histogram.
 *
 * @param h "This" pointer
 *
 * @return The full centroids mean array.
 */
 const double *td_centroids_mean(td_histogram_t *h);
 /**
 * Get the centroid weight for 'this' histogram and 'pos'.
 *
 * @param h "This" pointer
 * @param pos centroid position.
 *
 * @return The centroid weight.
 */
 long long td_centroids_weight_at(td_histogram_t *h, int pos);
 /**
 * Get the centroid mean for 'this' histogram and 'pos'.
 *
 * @param h "This" pointer
 * @param pos centroid position.
 *
 * @return The centroid mean.
 */
 double td_centroids_mean_at(td_histogram_t *h, int pos);
 #ifdef __cplusplus
 }
 #endif
--- a/tests/common.c
+++ b/tests/common.c
@ -1,14 +1,25 @@
-#include <sys/types.h>
+#define _POSIX_C_SOURCE 199309L
 #define X86_INTRIN
 #include <assert.h>
-#include <pthread.h>
+#include <pthread.h> // If using threads
 #include <sparsemap.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 #ifdef __x86_64__ // Check if running on x86_64 architecture
 #ifdef X86_INTRIN
 #include <errno.h>
 #include <x86intrin.h>
 #endif
 #endif
-#include "common.h"
+#include "../include/common.h"
 #include "../include/sparsemap.h"
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvariadic-macros"
@ -22,95 +33,69 @@
 uint64_t
 tsc(void)
 {
 #ifdef __x86_64__ // Check if running on x86_64 architecture
 #ifdef X86_INTRIN
  return __rdtsc();
 #else
  uint32_t low, high;
  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
  return ((uint64_t)high << 32) | low;
 #endif
 #ifdef __arm__ // Check if compiling for ARM architecture
  uint64_t result;
  __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(result));
  return result;
 }
 #endif
 #endif
 return 0;
 }
-static uint64_t
+// get microsecond timestamp
-get_tsc_frequency()
+uint64_t
 msts()
 {
-  uint32_t high, low;
+#ifdef _SC_MONOTONIC_CLOCK
-  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+  struct timespec ts;
-  __asm__ volatile("rdtsc");
+  if (sysconf(_SC_MONOTONIC_CLOCK) > 0) {
-  return ((uint64_t)high << 32) | low;
+    /* A monotonic clock presents */
    if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
      return (uint64_t)(ts.tv_sec * 1000000 + ts.tv_nsec / 1000);
    else
      return 0;
  }
  return 0;
 #else
  struct timeval tv;
  if (gettimeofday(&tv, NULL) == 0)
    return (uint64_t)(tv.tv_sec * 1000000 + tv.tv_usec);
  else
    return 0;
 #endif
 }
 double
-tsc_ticks_to_ns(uint64_t tsc_ticks)
+nsts(void)
 {
-  static uint64_t tsc_freq = 0;
+  struct timespec ts;
  if (tsc_freq == 0) {
    tsc_freq = get_tsc_frequency();
  }
  return (double)tsc_ticks / (double)tsc_freq * 1e9;
 }
-void
+  if (clock_gettime(CLOCK_REALTIME, &ts) == -1) {
-est_sift_up(uint64_t *heap, int child_index)
+    perror("clock_gettime");
-{
+    return -1.0; // Return -1.0 on error
  while (child_index > 0) {
    int parent_index = (child_index - 1) / 2;
    if (heap[parent_index] > heap[child_index]) {
      // Swap parent and child
      uint64_t temp = heap[parent_index];
      heap[parent_index] = heap[child_index];
      heap[child_index] = temp;
      child_index = parent_index;
    } else {
      break; // Heap property satisfied
    }
  }
 }
 void
 est_sift_down(uint64_t *heap, int heap_size, int parent_index)
 {
  int child_index = 2 * parent_index + 1; // Left child
  while (child_index < heap_size) {
    // Right child exists and is smaller than left child
    if (child_index + 1 < heap_size && heap[child_index + 1] < heap[child_index]) {
      child_index++;
    }
    // If the smallest child is smaller than the parent, swap them
    if (heap[child_index] < heap[parent_index]) {
      uint64_t temp = heap[child_index];
      heap[child_index] = heap[parent_index];
      heap[parent_index] = temp;
      parent_index = child_index;
      child_index = 2 * parent_index + 1;
    } else {
      break; // Heap property satisfied
    }
  }
 }
 void
 est_insert_value(uint64_t *heap, int heap_max_size, int *heap_size, uint64_t value)
 {
  if (*heap_size < heap_max_size) { // Heap not full, insert value
    heap[*heap_size] = value;
    est_sift_up(heap, *heap_size);
    (*heap_size)++;
  } else {
    // Heap is full, replace root with new value with a certain probability
    // This is a very naive approach to maintain a sample of the input
    if (rand() % 2) {
      heap[0] = value;
      est_sift_down(heap, heap_max_size, 0);
    }
  }
  return ts.tv_sec + ts.tv_nsec / 1e9;
 }
 int __xorshift32_state = 0;
 // Xorshift algorithm for PRNG
 uint32_t
-xorshift32()
+xorshift32(void)
 {
  uint32_t x = __xorshift32_state;
-  if (x == 0)
+  if (x == 0) {
    x = 123456789;
  }
  x ^= x << 13;
  x ^= x >> 17;
  x ^= x << 5;
@ -119,13 +104,13 @@ xorshift32()
 }
 void
-xorshift32_seed()
+xorshift32_seed(void)
 {
  __xorshift32_state = XORSHIFT_SEED_VALUE;
 }
 void
-shuffle(int *array, size_t n) // TODO working?
+shuffle(int *array, size_t n)
 {
  for (size_t i = n - 1; i > 0; --i) {
    size_t j = xorshift32() % (i + 1);
@ -170,7 +155,7 @@ has_sequential_set(int a[], int l, int r)
 int
 ensure_sequential_set(int a[], int l, int r)
 {
-  if (!a || l == 0 || r < 1 || r > l) {
+  if (!a || l == 0 || r < 1 || r > l - 1) {
    return 0;
  }
@ -190,7 +175,8 @@ ensure_sequential_set(int a[], int l, int r)
  // Generate a random value between min_value and max_value
  int value = random_uint32() % (max_value - min_value - r + 1);
  // Generate a random location between 0 and l - r
-  offset = random_uint32() % (l - r - 1);
+  int d = l - r - 1;
  offset = d == 0 ? 0 : random_uint32() % d;
  // Adjust the array to include a sequential set of 'r' integers at the random offset
  for (int i = 0; i < r; ++i) {
@ -199,21 +185,6 @@ ensure_sequential_set(int a[], int l, int r)
  return value;
 }
 int
 create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r)
 {
  int placed_at;
  if (s >= r + 1) {
    placed_at = 0;
  } else {
    placed_at = random_uint32() % (s - r - 1);
  }
  for (int i = placed_at; i < placed_at + r; i++) {
    sparsemap_set(map, i, true);
  }
  return placed_at;
 }
 void
 print_array(int *array, int l)
 {
@ -322,7 +293,7 @@ bool
 is_set(const int array[], int bit)
 {
  for (int i = 0; i < 1024; i++) {
-    if (array[i] == (int)bit) {
+    if (array[i] == bit) {
      return true;
    }
  }
@ -330,28 +301,44 @@ is_set(const int array[], int bit)
 }
 int
-is_unique(int a[], int l, int value)
+whats_set_uint64(uint64_t number, int pos[64])
 {
-  for (int i = 0; i < l; ++i) {
+  int length = 0;
-    if (a[i] == value) {
+
-      return 0; // Not unique
+  for (int i = 0; i < 64; i++) {
    if (number & ((uint64_t)1 << i)) {
      pos[length++] = i;
    }
  }
  return 1; // Unique
 }
  return length;
 }
 /** @brief Fills an array with unique random values between 0 and max_value.
 *
 * @param[in] a The array to fill.
 * @param[in] l The length of the array to fill.
 * @param[in] max_value The maximum value for the random numbers.
 */
 void
 setup_test_array(int a[], int l, int max_value)
 {
  if (a == NULL || max_value < 0)
    return; // Basic error handling and validation
-  for (int i = 0; i < l; ++i) {
+  // Create a set to store the unique values.
-    int candidate;
+  int unique_values[max_value + 1];
-    do {
+  for (int i = 0; i <= max_value; ++i) {
-      candidate = random_uint32() % (max_value + 1); // Generate a new value within the specified range
+    unique_values[i] = 0;
-    } while (!is_unique(a, i, candidate));           // Repeat until a unique value is found
+  }
-    a[i] = candidate;                                // Assign the unique value to the array
+
  // Keep generating random numbers until we have l unique values.
  int count = 0;
  while (count < l) {
    int random_number = random_uint32() % (max_value + 1);
    if (unique_values[random_number] == 0) {
      unique_values[random_number] = 1;
      a[count] = random_number;
      count++;
    }
  }
 }
@ -364,15 +351,6 @@ bitmap_from_uint32(sparsemap_t *map, uint32_t number)
  }
 }
 void
 bitmap_from_uint64(sparsemap_t *map, uint64_t number)
 {
  for (int i = 0; i < 64; i++) {
    bool bit = number & (1 << i);
    sparsemap_set(map, i, bit);
  }
 }
 uint32_t
 rank_uint64(uint64_t number, int n, int p)
 {
@ -400,28 +378,96 @@ rank_uint64(uint64_t number, int n, int p)
  return count;
 }
-int
+void
-whats_set_uint64(uint64_t number, int pos[64])
+print_bits(char *name, uint64_t value)
 {
-  int length = 0;
+  if (name) {
-
+    printf("%s\t", name);
-  for (int i = 0; i < 64; i++) {
+  }
-    if (number & ((uint64_t)1 << i)) {
+  for (int i = 63; i >= 0; i--) {
-      pos[length++] = i;
+    printf("%lu", (value >> i) & 1);
    if (i % 8 == 0) {
      printf(" "); // Add space for better readability
    }
  }
-
+  printf("\n");
  return length;
 }
 void
-whats_set(sparsemap_t *map, int m)
+sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number)
 {
-  logf("what's set in the range [0, %d): ", m);
+  for (int i = offset; i < 64; i++) {
-  for (int i = 0; i < m; i++) {
+    bool bit = number & ((uint64_t)1 << i);
    sparsemap_set(map, i, bit);
  }
 }
 sparsemap_idx_t
 sm_add_span(sparsemap_t *map, int map_size, int span_length)
 {
  int attempts = map_size / span_length;
  sparsemap_idx_t placed_at;
  do {
    placed_at = random_uint32() % (map_size - span_length - 1);
    if (sm_occupied(map, placed_at, span_length, true)) {
      attempts--;
    } else {
      break;
    }
  } while (attempts);
  for (sparsemap_idx_t i = placed_at; i < placed_at + span_length; i++) {
    if (sparsemap_set(map, i, true) != i) {
      return placed_at; // TODO error?
    }
  }
  return placed_at;
 }
 void
 sm_whats_set(sparsemap_t *map, int off, int len)
 {
  printf("what's set in the range [%d, %d): ", off, off + len);
  for (int i = off; i < off + len; i++) {
    if (sparsemap_is_set(map, i)) {
-      logf("%d ", i);
+      printf("%d ", i);
    }
  }
-  logf("\n");
+  printf("\n");
 }
 bool
 sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value)
 {
  for (sparsemap_idx_t i = m; i < m + len; i++) {
    if (sparsemap_is_set(map, i) != value) {
      return false;
    }
  }
  return true;
 }
 bool
 sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value)
 {
  for (sparsemap_idx_t i = m; i < (sparsemap_idx_t)len; i++) {
    if (sparsemap_is_set(map, i) == value) {
      return true;
    }
  }
  return false;
 }
 char *
 bytes_as(double bytes, char *s, size_t size)
 {
  const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
  size_t i = 0;
  while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
    bytes /= 1024;
    i++;
  }
  snprintf(s, size, "%.2f %s", bytes, units[i]);
  return s;
 }
--- a/lib/roaring.c
+++ b/lib/roaring.c
--- a/lib/tdigest.c
+++ b/lib/tdigest.c
@ -0,0 +1,680 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <math.h>
 #include "tdigest.h"
 #include <errno.h>
 #include <stdint.h>
 #ifndef TD_MALLOC_INCLUDE
 #define TD_MALLOC_INCLUDE "td_malloc.h"
 #endif
 #ifndef TD_ALLOC_H
 #define TD_ALLOC_H
 #define __td_malloc malloc
 #define __td_calloc calloc
 #define __td_realloc realloc
 #define __td_free free
 #endif
 #define __td_max(x, y) (((x) > (y)) ? (x) : (y))
 #define __td_min(x, y) (((x) < (y)) ? (x) : (y))
 static inline double weighted_average_sorted(double x1, double w1, double x2, double w2) {
    const double x = (x1 * w1 + x2 * w2) / (w1 + w2);
    return __td_max(x1, __td_min(x, x2));
 }
 static inline bool _tdigest_long_long_add_safe(long long a, long long b) {
    if (b < 0) {
        return (a >= __LONG_LONG_MAX__ - b);
    } else {
        return (a <= __LONG_LONG_MAX__ - b);
    }
 }
 static inline double weighted_average(double x1, double w1, double x2, double w2) {
    if (x1 <= x2) {
        return weighted_average_sorted(x1, w1, x2, w2);
    } else {
        return weighted_average_sorted(x2, w2, x1, w1);
    }
 }
 static inline void swap(double *arr, int i, int j) {
    const double temp = arr[i];
    arr[i] = arr[j];
    arr[j] = temp;
 }
 static inline void swap_l(long long *arr, int i, int j) {
    const long long temp = arr[i];
    arr[i] = arr[j];
    arr[j] = temp;
 }
 static unsigned int partition(double *means, long long *weights, unsigned int start,
                              unsigned int end, unsigned int pivot_idx) {
    const double pivotMean = means[pivot_idx];
    swap(means, pivot_idx, end);
    swap_l(weights, pivot_idx, end);
    int i = start - 1;
    for (unsigned int j = start; j < end; j++) {
        // If current element is smaller than the pivot
        if (means[j] < pivotMean) {
            // increment index of smaller element
            i++;
            swap(means, i, j);
            swap_l(weights, i, j);
        }
    }
    swap(means, i + 1, end);
    swap_l(weights, i + 1, end);
    return i + 1;
 }
 /**
 * Standard quick sort except that sorting rearranges parallel arrays
 *
 * @param means  Values to sort on
 * @param weights The auxillary values to sort.
 * @param start  The beginning of the values to sort
 * @param end    The value after the last value to sort
 */
 static void td_qsort(double *means, long long *weights, unsigned int start, unsigned int end) {
    if (start < end) {
        // two elements can be directly compared
        if ((end - start) == 1) {
            if (means[start] > means[end]) {
                swap(means, start, end);
                swap_l(weights, start, end);
            }
            return;
        }
        // generating a random number as a pivot was very expensive vs the array size
        // const unsigned int pivot_idx = start + rand()%(end - start + 1);
        const unsigned int pivot_idx = (end + start) / 2; // central pivot
        const unsigned int new_pivot_idx = partition(means, weights, start, end, pivot_idx);
        if (new_pivot_idx > start) {
            td_qsort(means, weights, start, new_pivot_idx - 1);
        }
        td_qsort(means, weights, new_pivot_idx + 1, end);
    }
 }
 static inline size_t cap_from_compression(double compression) {
    if ((size_t)compression > ((SIZE_MAX / sizeof(double) / 6) - 10)) {
        return 0;
    }
    return (6 * (size_t)(compression)) + 10;
 }
 static inline bool should_td_compress(td_histogram_t *h) {
    return ((h->merged_nodes + h->unmerged_nodes) >= (h->cap - 1));
 }
 static inline int next_node(td_histogram_t *h) { return h->merged_nodes + h->unmerged_nodes; }
 int td_compress(td_histogram_t *h);
 static inline int _check_overflow(const double v) {
    // double-precision overflow detected on h->unmerged_weight
    if (v == INFINITY) {
        return EDOM;
    }
    return 0;
 }
 static inline int _check_td_overflow(const double new_unmerged_weight,
                                     const double new_total_weight) {
    // double-precision overflow detected on h->unmerged_weight
    if (new_unmerged_weight == INFINITY) {
        return EDOM;
    }
    if (new_total_weight == INFINITY) {
        return EDOM;
    }
    const double denom = 2 * MM_PI * new_total_weight * log(new_total_weight);
    if (denom == INFINITY) {
        return EDOM;
    }
    return 0;
 }
 int td_centroid_count(td_histogram_t *h) { return next_node(h); }
 void td_reset(td_histogram_t *h) {
    if (!h) {
        return;
    }
    h->min = __DBL_MAX__;
    h->max = -h->min;
    h->merged_nodes = 0;
    h->merged_weight = 0;
    h->unmerged_nodes = 0;
    h->unmerged_weight = 0;
    h->total_compressions = 0;
 }
 int td_init(double compression, td_histogram_t **result) {
    const size_t capacity = cap_from_compression(compression);
    if (capacity < 1) {
        return 1;
    }
    td_histogram_t *histogram;
    histogram = (td_histogram_t *)__td_malloc(sizeof(td_histogram_t));
    if (!histogram) {
        return 1;
    }
    histogram->cap = capacity;
    histogram->compression = (double)compression;
    td_reset(histogram);
    histogram->nodes_mean = (double *)__td_calloc(capacity, sizeof(double));
    if (!histogram->nodes_mean) {
        td_free(histogram);
        return 1;
    }
    histogram->nodes_weight = (long long *)__td_calloc(capacity, sizeof(long long));
    if (!histogram->nodes_weight) {
        td_free(histogram);
        return 1;
    }
    *result = histogram;
    return 0;
 }
 td_histogram_t *td_new(double compression) {
    td_histogram_t *mdigest = NULL;
    td_init(compression, &mdigest);
    return mdigest;
 }
 void td_free(td_histogram_t *histogram) {
    if (histogram->nodes_mean) {
        __td_free((void *)(histogram->nodes_mean));
    }
    if (histogram->nodes_weight) {
        __td_free((void *)(histogram->nodes_weight));
    }
    __td_free((void *)(histogram));
 }
 int td_merge(td_histogram_t *into, td_histogram_t *from) {
    if (td_compress(into) != 0)
        return EDOM;
    if (td_compress(from) != 0)
        return EDOM;
    const int pos = from->merged_nodes + from->unmerged_nodes;
    for (int i = 0; i < pos; i++) {
        const double mean = from->nodes_mean[i];
        const long long weight = from->nodes_weight[i];
        if (td_add(into, mean, weight) != 0) {
            return EDOM;
        }
    }
    return 0;
 }
 long long td_size(td_histogram_t *h) { return h->merged_weight + h->unmerged_weight; }
 double td_cdf(td_histogram_t *h, double val) {
    td_compress(h);
    // no data to examine
    if (h->merged_nodes == 0) {
        return NAN;
    }
    // bellow lower bound
    if (val < h->min) {
        return 0;
    }
    // above upper bound
    if (val > h->max) {
        return 1;
    }
    if (h->merged_nodes == 1) {
        // exactly one centroid, should have max==min
        const double width = h->max - h->min;
        if (val - h->min <= width) {
            // min and max are too close together to do any viable interpolation
            return 0.5;
        } else {
            // interpolate if somehow we have weight > 0 and max != min
            return (val - h->min) / width;
        }
    }
    const int n = h->merged_nodes;
    // check for the left tail
    const double left_centroid_mean = h->nodes_mean[0];
    const double left_centroid_weight = (double)h->nodes_weight[0];
    const double merged_weight_d = (double)h->merged_weight;
    if (val < left_centroid_mean) {
        // note that this is different than h->nodes_mean[0] > min
        // ... this guarantees we divide by non-zero number and interpolation works
        const double width = left_centroid_mean - h->min;
        if (width > 0) {
            // must be a sample exactly at min
            if (val == h->min) {
                return 0.5 / merged_weight_d;
            } else {
                return (1 + (val - h->min) / width * (left_centroid_weight / 2 - 1)) /
                       merged_weight_d;
            }
        } else {
            // this should be redundant with the check val < h->min
            return 0;
        }
    }
    // and the right tail
    const double right_centroid_mean = h->nodes_mean[n - 1];
    const double right_centroid_weight = (double)h->nodes_weight[n - 1];
    if (val > right_centroid_mean) {
        const double width = h->max - right_centroid_mean;
        if (width > 0) {
            if (val == h->max) {
                return 1 - 0.5 / merged_weight_d;
            } else {
                // there has to be a single sample exactly at max
                const double dq = (1 + (h->max - val) / width * (right_centroid_weight / 2 - 1)) /
                                  merged_weight_d;
                return 1 - dq;
            }
        } else {
            return 1;
        }
    }
    // we know that there are at least two centroids and mean[0] < x < mean[n-1]
    // that means that there are either one or more consecutive centroids all at exactly x
    // or there are consecutive centroids, c0 < x < c1
    double weightSoFar = 0;
    for (int it = 0; it < n - 1; it++) {
        // weightSoFar does not include weight[it] yet
        if (h->nodes_mean[it] == val) {
            // we have one or more centroids == x, treat them as one
            // dw will accumulate the weight of all of the centroids at x
            double dw = 0;
            while (it < n && h->nodes_mean[it] == val) {
                dw += (double)h->nodes_weight[it];
                it++;
            }
            return (weightSoFar + dw / 2) / (double)h->merged_weight;
        } else if (h->nodes_mean[it] <= val && val < h->nodes_mean[it + 1]) {
            const double node_weight = (double)h->nodes_weight[it];
            const double node_weight_next = (double)h->nodes_weight[it + 1];
            const double node_mean = h->nodes_mean[it];
            const double node_mean_next = h->nodes_mean[it + 1];
            // landed between centroids ... check for floating point madness
            if (node_mean_next - node_mean > 0) {
                // note how we handle singleton centroids here
                // the point is that for singleton centroids, we know that their entire
                // weight is exactly at the centroid and thus shouldn't be involved in
                // interpolation
                double leftExcludedW = 0;
                double rightExcludedW = 0;
                if (node_weight == 1) {
                    if (node_weight_next == 1) {
                        // two singletons means no interpolation
                        // left singleton is in, right is out
                        return (weightSoFar + 1) / merged_weight_d;
                    } else {
                        leftExcludedW = 0.5;
                    }
                } else if (node_weight_next == 1) {
                    rightExcludedW = 0.5;
                }
                double dw = (node_weight + node_weight_next) / 2;
                // adjust endpoints for any singleton
                double dwNoSingleton = dw - leftExcludedW - rightExcludedW;
                double base = weightSoFar + node_weight / 2 + leftExcludedW;
                return (base + dwNoSingleton * (val - node_mean) / (node_mean_next - node_mean)) /
                       merged_weight_d;
            } else {
                // this is simply caution against floating point madness
                // it is conceivable that the centroids will be different
                // but too near to allow safe interpolation
                double dw = (node_weight + node_weight_next) / 2;
                return (weightSoFar + dw) / merged_weight_d;
            }
        } else {
            weightSoFar += (double)h->nodes_weight[it];
        }
    }
    return 1 - 0.5 / merged_weight_d;
 }
 static double td_internal_iterate_centroids_to_index(const td_histogram_t *h, const double index,
                                                     const double left_centroid_weight,
                                                     const int total_centroids, double *weightSoFar,
                                                     int *node_pos) {
    if (left_centroid_weight > 1 && index < left_centroid_weight / 2) {
        // there is a single sample at min so we interpolate with less weight
        return h->min + (index - 1) / (left_centroid_weight / 2 - 1) * (h->nodes_mean[0] - h->min);
    }
    // usually the last centroid will have unit weight so this test will make it moot
    if (index > h->merged_weight - 1) {
        return h->max;
    }
    // if the right-most centroid has more than one sample, we still know
    // that one sample occurred at max so we can do some interpolation
    const double right_centroid_weight = (double)h->nodes_weight[total_centroids - 1];
    const double right_centroid_mean = h->nodes_mean[total_centroids - 1];
    if (right_centroid_weight > 1 &&
        (double)h->merged_weight - index <= right_centroid_weight / 2) {
        return h->max - ((double)h->merged_weight - index - 1) / (right_centroid_weight / 2 - 1) *
                            (h->max - right_centroid_mean);
    }
    for (; *node_pos < total_centroids - 1; (*node_pos)++) {
        const int i = *node_pos;
        const double node_weight = (double)h->nodes_weight[i];
        const double node_weight_next = (double)h->nodes_weight[i + 1];
        const double node_mean = h->nodes_mean[i];
        const double node_mean_next = h->nodes_mean[i + 1];
        const double dw = (node_weight + node_weight_next) / 2;
        if (*weightSoFar + dw > index) {
            // centroids i and i+1 bracket our current point
            // check for unit weight
            double leftUnit = 0;
            if (node_weight == 1) {
                if (index - *weightSoFar < 0.5) {
                    // within the singleton's sphere
                    return node_mean;
                } else {
                    leftUnit = 0.5;
                }
            }
            double rightUnit = 0;
            if (node_weight_next == 1) {
                if (*weightSoFar + dw - index <= 0.5) {
                    // no interpolation needed near singleton
                    return node_mean_next;
                }
                rightUnit = 0.5;
            }
            const double z1 = index - *weightSoFar - leftUnit;
            const double z2 = *weightSoFar + dw - index - rightUnit;
            return weighted_average(node_mean, z2, node_mean_next, z1);
        }
        *weightSoFar += dw;
    }
    // weightSoFar = totalWeight - weight[total_centroids-1]/2 (very nearly)
    // so we interpolate out to max value ever seen
    const double z1 = index - h->merged_weight - right_centroid_weight / 2.0;
    const double z2 = right_centroid_weight / 2 - z1;
    return weighted_average(right_centroid_mean, z1, h->max, z2);
 }
 double td_quantile(td_histogram_t *h, double q) {
    td_compress(h);
    // q should be in [0,1]
    if (q < 0.0 || q > 1.0 || h->merged_nodes == 0) {
        return NAN;
    }
    // with one data point, all quantiles lead to Rome
    if (h->merged_nodes == 1) {
        return h->nodes_mean[0];
    }
    // if values were stored in a sorted array, index would be the offset we are interested in
    const double index = q * (double)h->merged_weight;
    // beyond the boundaries, we return min or max
    // usually, the first centroid will have unit weight so this will make it moot
    if (index < 1) {
        return h->min;
    }
    // we know that there are at least two centroids now
    const int n = h->merged_nodes;
    // if the left centroid has more than one sample, we still know
    // that one sample occurred at min so we can do some interpolation
    const double left_centroid_weight = (double)h->nodes_weight[0];
    // in between extremes we interpolate between centroids
    double weightSoFar = left_centroid_weight / 2;
    int i = 0;
    return td_internal_iterate_centroids_to_index(h, index, left_centroid_weight, n, &weightSoFar,
                                                  &i);
 }
 int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, size_t length) {
    td_compress(h);
    if (NULL == quantiles || NULL == values) {
        return EINVAL;
    }
    const int n = h->merged_nodes;
    if (n == 0) {
        for (size_t i = 0; i < length; i++) {
            values[i] = NAN;
        }
        return 0;
    }
    if (n == 1) {
        for (size_t i = 0; i < length; i++) {
            const double requested_quantile = quantiles[i];
            // q should be in [0,1]
            if (requested_quantile < 0.0 || requested_quantile > 1.0) {
                values[i] = NAN;
            } else {
                // with one data point, all quantiles lead to Rome
                values[i] = h->nodes_mean[0];
            }
        }
        return 0;
    }
    // we know that there are at least two centroids now
    // if the left centroid has more than one sample, we still know
    // that one sample occurred at min so we can do some interpolation
    const double left_centroid_weight = (double)h->nodes_weight[0];
    // in between extremes we interpolate between centroids
    double weightSoFar = left_centroid_weight / 2;
    int node_pos = 0;
    // to avoid allocations we use the values array for intermediate computation
    // i.e. to store the expected cumulative count at each percentile
    for (size_t qpos = 0; qpos < length; qpos++) {
        const double index = quantiles[qpos] * (double)h->merged_weight;
        values[qpos] = td_internal_iterate_centroids_to_index(h, index, left_centroid_weight, n,
                                                              &weightSoFar, &node_pos);
    }
    return 0;
 }
 static double td_internal_trimmed_mean(const td_histogram_t *h, const double leftmost_weight,
                                       const double rightmost_weight) {
    double count_done = 0;
    double trimmed_sum = 0;
    double trimmed_count = 0;
    for (int i = 0; i < h->merged_nodes; i++) {
        const double n_weight = (double)h->nodes_weight[i];
        // Assume the whole centroid falls into the range
        double count_add = n_weight;
        // If we haven't reached the low threshold yet, skip appropriate part of the centroid.
        count_add -= __td_min(__td_max(0, leftmost_weight - count_done), count_add);
        // If we have reached the upper threshold, ignore the overflowing part of the centroid.
        count_add = __td_min(__td_max(0, rightmost_weight - count_done), count_add);
        // consider the whole centroid processed
        count_done += n_weight;
        // increment the sum / count
        trimmed_sum += h->nodes_mean[i] * count_add;
        trimmed_count += count_add;
        // break once we cross the high threshold
        if (count_done >= rightmost_weight)
            break;
    }
    return trimmed_sum / trimmed_count;
 }
 double td_trimmed_mean_symmetric(td_histogram_t *h, double proportion_to_cut) {
    td_compress(h);
    // proportion_to_cut should be in [0,1]
    if (h->merged_nodes == 0 || proportion_to_cut < 0.0 || proportion_to_cut > 1.0) {
        return NAN;
    }
    // with one data point, all values lead to Rome
    if (h->merged_nodes == 1) {
        return h->nodes_mean[0];
    }
    /* translate the percentiles to counts */
    const double leftmost_weight = floor((double)h->merged_weight * proportion_to_cut);
    const double rightmost_weight = ceil((double)h->merged_weight * (1.0 - proportion_to_cut));
    return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
 }
 double td_trimmed_mean(td_histogram_t *h, double leftmost_cut, double rightmost_cut) {
    td_compress(h);
    // leftmost_cut and rightmost_cut should be in [0,1]
    if (h->merged_nodes == 0 || leftmost_cut < 0.0 || leftmost_cut > 1.0 || rightmost_cut < 0.0 ||
        rightmost_cut > 1.0) {
        return NAN;
    }
    // with one data point, all values lead to Rome
    if (h->merged_nodes == 1) {
        return h->nodes_mean[0];
    }
    /* translate the percentiles to counts */
    const double leftmost_weight = floor((double)h->merged_weight * leftmost_cut);
    const double rightmost_weight = ceil((double)h->merged_weight * rightmost_cut);
    return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
 }
 int td_add(td_histogram_t *h, double mean, long long weight) {
    if (should_td_compress(h)) {
        const int overflow_res = td_compress(h);
        if (overflow_res != 0)
            return overflow_res;
    }
    const int pos = next_node(h);
    if (pos >= h->cap)
        return EDOM;
    if (_tdigest_long_long_add_safe(h->unmerged_weight, weight) == false)
        return EDOM;
    const long long new_unmerged_weight = h->unmerged_weight + weight;
    if (_tdigest_long_long_add_safe(new_unmerged_weight, h->merged_weight) == false)
        return EDOM;
    const long long new_total_weight = new_unmerged_weight + h->merged_weight;
    // double-precision overflow detected
    const int overflow_res =
        _check_td_overflow((double)new_unmerged_weight, (double)new_total_weight);
    if (overflow_res != 0)
        return overflow_res;
    if (mean < h->min) {
        h->min = mean;
    }
    if (mean > h->max) {
        h->max = mean;
    }
    h->nodes_mean[pos] = mean;
    h->nodes_weight[pos] = weight;
    h->unmerged_nodes++;
    h->unmerged_weight = new_unmerged_weight;
    return 0;
 }
 int td_compress(td_histogram_t *h) {
    if (h->unmerged_nodes == 0) {
        return 0;
    }
    int N = h->merged_nodes + h->unmerged_nodes;
    td_qsort(h->nodes_mean, h->nodes_weight, 0, N - 1);
    const double total_weight = (double)h->merged_weight + (double)h->unmerged_weight;
    // double-precision overflow detected
    const int overflow_res = _check_td_overflow((double)h->unmerged_weight, (double)total_weight);
    if (overflow_res != 0)
        return overflow_res;
    if (total_weight <= 1)
        return 0;
    const double denom = 2 * MM_PI * total_weight * log(total_weight);
    if (_check_overflow(denom) != 0)
        return EDOM;
    // Compute the normalizer given compression and number of points.
    const double normalizer = h->compression / denom;
    if (_check_overflow(normalizer) != 0)
        return EDOM;
    int cur = 0;
    double weight_so_far = 0;
    for (int i = 1; i < N; i++) {
        const double proposed_weight = (double)h->nodes_weight[cur] + (double)h->nodes_weight[i];
        const double z = proposed_weight * normalizer;
        // quantile up to cur
        const double q0 = weight_so_far / total_weight;
        // quantile up to cur + i
        const double q2 = (weight_so_far + proposed_weight) / total_weight;
        // Convert  a quantile to the k-scale
        const bool should_add = (z <= (q0 * (1 - q0))) && (z <= (q2 * (1 - q2)));
        // next point will fit
        // so merge into existing centroid
        if (should_add) {
            h->nodes_weight[cur] += h->nodes_weight[i];
            const double delta = h->nodes_mean[i] - h->nodes_mean[cur];
            const double weighted_delta = (delta * h->nodes_weight[i]) / h->nodes_weight[cur];
            h->nodes_mean[cur] += weighted_delta;
        } else {
            weight_so_far += h->nodes_weight[cur];
            cur++;
            h->nodes_weight[cur] = h->nodes_weight[i];
            h->nodes_mean[cur] = h->nodes_mean[i];
        }
        if (cur != i) {
            h->nodes_weight[i] = 0;
            h->nodes_mean[i] = 0.0;
        }
    }
    h->merged_nodes = cur + 1;
    h->merged_weight = total_weight;
    h->unmerged_nodes = 0;
    h->unmerged_weight = 0;
    h->total_compressions++;
    return 0;
 }
 double td_min(td_histogram_t *h) { return h->min; }
 double td_max(td_histogram_t *h) { return h->max; }
 int td_compression(td_histogram_t *h) { return h->compression; }
 const long long *td_centroids_weight(td_histogram_t *h) { return h->nodes_weight; }
 const double *td_centroids_mean(td_histogram_t *h) { return h->nodes_mean; }
 long long td_centroids_weight_at(td_histogram_t *h, int pos) { return h->nodes_weight[pos]; }
 double td_centroids_mean_at(td_histogram_t *h, int pos) {
    if (pos < 0 || pos > h->merged_nodes) {
        return NAN;
    }
    return h->nodes_mean[pos];
 }
--- a/src/sparsemap.c
+++ b/src/sparsemap.c
--- a/tests/midl.c
+++ b/tests/midl.c
@ -0,0 +1,417 @@
 /** @defgroup idls	ID List Management
 *	@{
 */
 /** A generic unsigned ID number. These were entryIDs in back-bdb.
 *	Preferably it should have the same size as a pointer.
 */
 typedef size_t MDB_ID;
 /** An IDL is an ID List, a sorted array of IDs. The first
 * element of the array is a counter for how many actual
 * IDs are in the list. In the original back-bdb code, IDLs are
 * sorted in ascending order. For libmdb IDLs are sorted in
 * descending order.
 */
 typedef MDB_ID *MDB_IDL;
 /* IDL sizes - likely should be even bigger
 *   limiting factors: sizeof(ID), thread stack size
 */
 #define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
 #define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN)
 #define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1))
 #define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1)
 #define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1)
 #define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID))
 #define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0)
 #define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src)))
 #define MDB_IDL_FIRST(ids) ((ids)[1])
 #define MDB_IDL_LAST(ids) ((ids)[(ids)[0]])
 /** Current max length of an #mdb_midl_alloc()ed IDL */
 #define MDB_IDL_ALLOCLEN(ids) ((ids)[-1])
 /** Append ID to IDL. The IDL must be big enough. */
 #define mdb_midl_xappend(idl, id)             \
  do {                                        \
    MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
    xidl[xlen] = (id);                        \
  } while (0)
 /** Search for an ID in an IDL.
 * @param[in] ids	The IDL to search.
 * @param[in] id	The ID to search for.
 * @return	The index of the first ID greater than or equal to \b id.
 */
 unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id);
 /** Allocate an IDL.
 * Allocates memory for an IDL of the given size.
 * @return	IDL on success, NULL on failure.
 */
 MDB_IDL mdb_midl_alloc(int num);
 /** Free an IDL.
 * @param[in] ids	The IDL to free.
 */
 void mdb_midl_free(MDB_IDL ids);
 /** Shrink an IDL.
 * Return the IDL to the default size if it has grown larger.
 * @param[in,out] idp	Address of the IDL to shrink.
 */
 void mdb_midl_shrink(MDB_IDL *idp);
 /** Shrink an IDL to a specific size.
 * Resize the IDL to \b size if it is larger.
 * @param[in,out] idp	Address of the IDL to shrink.
 * @param[in] size	Capacity to have once resized.
 */
 void mdb_midl_shrink(MDB_IDL *idp);
 /** Make room for num additional elements in an IDL.
 * @param[in,out] idp	Address of the IDL.
 * @param[in] num	Number of elements to make room for.
 * @return	0 on success, ENOMEM on failure.
 */
 int mdb_midl_need(MDB_IDL *idp, unsigned num);
 /** Append an ID onto an IDL.
 * @param[in,out] idp	Address of the IDL to append to.
 * @param[in] id	The ID to append.
 * @return	0 on success, ENOMEM if the IDL is too large.
 */
 int mdb_midl_append(MDB_IDL *idp, MDB_ID id);
 /** Append an IDL onto an IDL.
 * @param[in,out] idp	Address of the IDL to append to.
 * @param[in] app	The IDL to append.
 * @return	0 on success, ENOMEM if the IDL is too large.
 */
 int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app);
 /** Append an ID range onto an IDL.
 * @param[in,out] idp	Address of the IDL to append to.
 * @param[in] id	The lowest ID to append.
 * @param[in] n		Number of IDs to append.
 * @return	0 on success, ENOMEM if the IDL is too large.
 */
 int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n);
 /** Merge an IDL onto an IDL. The destination IDL must be big enough.
 * @param[in] idl	The IDL to merge into.
 * @param[in] merge	The IDL to merge.
 */
 void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge);
 /** Sort an IDL.
 * @param[in,out] ids	The IDL to sort.
 */
 void mdb_midl_sort(MDB_IDL ids);
 /* midl.c ------------------------------------------------------------------ */
 /** @defgroup idls	ID List Management
 *	@{
 */
 #define CMP(x, y) ((x) < (y) ? -1 : (x) > (y))
 unsigned
 mdb_midl_search(MDB_IDL ids, MDB_ID id)
 {
  /*
   * binary search of id in ids
   * if found, returns position of id
   * if not found, returns first position greater than id
   */
  unsigned base = 0;
  unsigned cursor = 1;
  int val = 0;
  unsigned n = ids[0];
  while (0 < n) {
    unsigned pivot = n >> 1;
    cursor = base + pivot + 1;
    val = CMP(ids[cursor], id);
    if (val < 0) {
      n = pivot;
    } else if (val > 0) {
      base = cursor;
      n -= pivot + 1;
    } else {
      return cursor;
    }
  }
  if (val > 0) {
    ++cursor;
  }
  return cursor;
 }
 int
 mdb_midl_insert(MDB_IDL ids, MDB_ID id)
 {
  unsigned x, i;
  x = mdb_midl_search(ids, id);
  assert(x > 0);
  if (x < 1) {
    /* internal error */
    return -2;
  }
  if (x <= ids[0] && ids[x] == id) {
    /* duplicate */
    assert(0);
    return -1;
  }
  if (++ids[0] >= MDB_IDL_DB_MAX) {
    /* no room */
    --ids[0];
    return -2;
  } else {
    /* insert id */
    for (i = ids[0]; i > x; i--)
      ids[i] = ids[i - 1];
    ids[x] = id;
  }
  return 0;
 }
 inline void
 mdb_midl_pop_n(MDB_IDL ids, unsigned n)
 {
  ids[0] = ids[0] - n;
 }
 void
 mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
 {
  for (int i = idx - 1; idx < ids[0] - 1;)
    ids[++i] = ids[++idx];
  ids[0] = ids[0] - 1;
 }
 void
 mdb_midl_remove(MDB_IDL ids, MDB_ID id)
 {
  unsigned idx = mdb_midl_search(ids, id);
  if (idx <= ids[0] && ids[idx] == id)
    mdb_midl_remove_at(ids, idx);
 }
 MDB_IDL
 mdb_midl_alloc(int num)
 {
  MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID));
  if (ids) {
    *ids++ = num;
    *ids = 0;
  }
  return ids;
 }
 void
 mdb_midl_free(MDB_IDL ids)
 {
  if (ids)
    free(ids - 1);
 }
 void
 mdb_midl_shrink(MDB_IDL *idp)
 {
  MDB_IDL ids = *idp;
  if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) {
    *ids++ = MDB_IDL_UM_MAX;
    *idp = ids;
  }
 }
 void
 mdb_midl_shrink_to(MDB_IDL *idp, size_t size)
 {
  MDB_IDL ids = *idp;
  if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) {
    *ids++ = size;
    *idp = ids;
    *idp[0] = *idp[0] > size ? size : *idp[0];
  }
 }
 static int
 mdb_midl_grow(MDB_IDL *idp, int num)
 {
  MDB_IDL idn = *idp - 1;
  /* grow it */
  idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
  if (!idn)
    return ENOMEM;
  *idn++ += num;
  *idp = idn;
  return 0;
 }
 int
 mdb_midl_need(MDB_IDL *idp, unsigned num)
 {
  MDB_IDL ids = *idp;
  num += ids[0];
  if (num > ids[-1]) {
    num = (num + num / 4 + (256 + 2)) & -256;
    if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID))))
      return ENOMEM;
    *ids++ = num - 2;
    *idp = ids;
  }
  return 0;
 }
 int
 mdb_midl_append(MDB_IDL *idp, MDB_ID id)
 {
  MDB_IDL ids = *idp;
  /* Too big? */
  if (ids[0] >= ids[-1]) {
    if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
      return ENOMEM;
    ids = *idp;
  }
  ids[0]++;
  ids[ids[0]] = id;
  return 0;
 }
 int
 mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app)
 {
  MDB_IDL ids = *idp;
  /* Too big? */
  if (ids[0] + app[0] >= ids[-1]) {
    if (mdb_midl_grow(idp, app[0]))
      return ENOMEM;
    ids = *idp;
  }
  memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID));
  ids[0] += app[0];
  return 0;
 }
 int
 mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n)
 {
  MDB_ID *ids = *idp, len = ids[0];
  /* Too big? */
  if (len + n > ids[-1]) {
    if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
      return ENOMEM;
    ids = *idp;
  }
  ids[0] = len + n;
  ids += len;
  while (n)
    ids[n--] = id++;
  return 0;
 }
 void
 mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge)
 {
  MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k;
  idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */
  old_id = idl[j];
  while (i) {
    merge_id = merge[i--];
    for (; old_id < merge_id; old_id = idl[--j])
      idl[k--] = old_id;
    idl[k--] = merge_id;
  }
  idl[0] = total;
 }
 /* Quicksort + Insertion sort for small arrays */
 #define SMALL 8
 #define MIDL_SWAP(a, b) \
  {                     \
    itmp = (a);         \
    (a) = (b);          \
    (b) = itmp;         \
  }
 void
 mdb_midl_sort(MDB_IDL ids)
 {
  /* Max possible depth of int-indexed tree * 2 items/level */
  int istack[sizeof(int) * CHAR_BIT * 2];
  int i, j, k, l, ir, jstack;
  MDB_ID a, itmp;
  ir = (int)ids[0];
  l = 1;
  jstack = 0;
  for (;;) {
    if (ir - l < SMALL) { /* Insertion sort */
      for (j = l + 1; j <= ir; j++) {
        a = ids[j];
        for (i = j - 1; i >= 1; i--) {
          if (ids[i] >= a)
            break;
          ids[i + 1] = ids[i];
        }
        ids[i + 1] = a;
      }
      if (jstack == 0)
        break;
      ir = istack[jstack--];
      l = istack[jstack--];
    } else {
      k = (l + ir) >> 1; /* Choose median of left, center, right */
      MIDL_SWAP(ids[k], ids[l + 1]);
      if (ids[l] < ids[ir]) {
        MIDL_SWAP(ids[l], ids[ir]);
      }
      if (ids[l + 1] < ids[ir]) {
        MIDL_SWAP(ids[l + 1], ids[ir]);
      }
      if (ids[l] < ids[l + 1]) {
        MIDL_SWAP(ids[l], ids[l + 1]);
      }
      i = l + 1;
      j = ir;
      a = ids[l + 1];
      for (;;) {
        do
          i++;
        while (ids[i] > a);
        do
          j--;
        while (ids[j] < a);
        if (j < i)
          break;
        MIDL_SWAP(ids[i], ids[j]);
      }
      ids[l + 1] = ids[j];
      ids[j] = a;
      jstack += 2;
      if (ir - i + 1 >= j - l) {
        istack[jstack] = ir;
        istack[jstack - 1] = i;
        ir = j - 1;
      } else {
        istack[jstack] = j - 1;
        istack[jstack - 1] = l;
        l = i;
      }
    }
  }
 }
--- a/tests/soak.c
+++ b/tests/soak.c
--- a/tests/test.c
+++ b/tests/test.c
Author	SHA1	Message	Date
Greg Burd	6c8ad3b25f	shrink the buffer as chunks empty (#12 ) Reviewed-on: #12 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>	2024-07-02 13:07:25 +00:00
Gregory Burd	2f6e9bdf7b	Merge pull request 'fuzz testing' (#11 ) from gburd/fixing-the-fuzz into main Reviewed-on: #11	2024-06-27 08:13:15 +00:00
Greg Burd	7335c8e1d8	find a span of unset bits in an empty map	2024-06-27 04:10:08 -04:00
Greg Burd	f7fd8eed9a	spelling	2024-05-23 19:59:24 -04:00
Greg Burd	7ecd2e5dc2	cleanup	2024-05-21 11:59:07 -04:00
Gregory Burd	eae0743b56	rewrite soak with more flexibility and ability to record/playback events to reproduce bugs (#10 ) Reviewed-on: #10	2024-05-21 00:29:26 +00:00
Greg Burd	68c8cd0858	cmake	2024-05-16 22:13:27 +00:00
Greg Burd	e398d014e8	fixes	2024-05-16 12:00:09 -04:00
Greg Burd	a5c62cfe1e	Add CMake build	2024-05-16 10:38:33 -04:00
Greg Burd	7bb26dbe88	vector offset, fix span	2024-05-15 22:03:36 -04:00
Greg Burd	69dd960558	use chunk alignment; new fill factor API	2024-05-15 15:50:15 -04:00
Greg Burd	b028408150	split/merge (#9 ) Reviewed-on: #9 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>	2024-05-15 17:57:40 +00:00
Greg Burd	f5b500087d	split	2024-05-13 12:46:25 -04:00
Greg Burd	604d48617e	revert split and merge for now	2024-05-13 02:07:45 +00:00
Gregory Burd	7a572453c9	Merge pull request 'gburd/full-merge' (#8 ) from gburd/full-merge into main Reviewed-on: #8	2024-05-11 01:26:44 +00:00
Gregory Burd	6990f94278	Merge branch 'main' into gburd/full-merge	2024-05-11 01:26:19 +00:00
Greg Burd	984a1a920d	working	2024-05-11 01:25:15 +00:00
Greg Burd	e641e6cc63	WIP	2024-05-10 20:26:40 +00:00
Greg Burd	32721da645	disable neovim for a bit	2024-05-10 20:26:40 +00:00
Greg Burd	cea31ab184	disable neovim for a bit	2024-05-10 15:32:50 +00:00
Greg Burd	f525a097c7	WIP	2024-05-10 11:27:43 -04:00
Greg Burd	a24a4e1cc5	fixes	2024-05-10 09:40:05 -04:00
Greg Burd	08e8c0a5d1	chunk align	2024-05-09 16:01:28 -04:00
Greg Burd	7494a9a3a2	many new API; fixes	2024-05-09 15:50:56 -04:00
Greg Burd	0e348efaf6	a few more fixes	2024-05-07 08:47:56 -04:00
Greg Burd	367d15a160	integrat ecrupp review suggestions	2024-05-06 15:43:47 -04:00
Greg Burd	3b8e083806	add copy()	2024-05-05 16:46:07 -04:00
Greg Burd	c53bd53f9a	try using env to force ssh key	2024-05-04 20:23:52 +00:00
Greg Burd	a2b1a22a79	fix	2024-05-04 09:45:43 -04:00
Greg Burd	1909954b42	merge amt	2024-05-04 09:44:46 -04:00
Greg Burd	cbd53976f9	merge when out of space	2024-05-04 09:39:19 -04:00
Greg Burd	40de89bd8a	fixes	2024-05-03 22:15:36 +00:00
Greg Burd	756476561f	updates	2024-05-03 22:14:11 +00:00
Greg Burd	4e2b4bde26	spelling	2024-05-03 21:12:57 +00:00
Greg Burd	9dccdcbf76	spelling	2024-05-03 21:00:30 +00:00
Greg Burd	ad910f0adf	add clang-format, etc.	2024-05-03 20:56:14 +00:00
Greg Burd	7c9ecc6d79	fixes to ide	2024-05-03 20:47:41 +00:00
gregburd	ea32274524	add Google IDX config	2024-05-03 20:32:14 +00:00
Greg Burd	4bf1a5a00c	grow map when merge requires it	2024-05-03 16:07:46 -04:00
Greg Burd	2afbfa946e	fixups	2024-05-03 16:03:09 -04:00
Greg Burd	ffc762a796	cleanup	2024-05-03 15:24:57 -04:00
Greg Burd	fcab709f62	roaring and sparsemap should identify the same spans	2024-05-03 15:20:35 -04:00
Greg Burd	b9612f12cc	compare against roaring bitmaps	2024-05-03 15:15:39 -04:00
Greg Burd	57a8f99a32	fixes	2024-05-02 21:35:37 -04:00
Greg Burd	a7754b05ba	cleanup	2024-05-02 21:13:17 -04:00
Greg Burd	86798b32bd	formatting	2024-05-02 14:57:56 -04:00
Greg Burd	c9ae042b40	fix scan	2024-05-02 14:55:04 -04:00
Greg Burd	6b6ed5290f	fixes	2024-05-02 08:55:38 -04:00
Greg Burd	897e1af9df	disable shrinking in soak	2024-05-02 08:46:35 -04:00
Greg Burd	7192c7dcff	fixups	2024-05-01 09:16:50 -04:00
Greg Burd	c65fcebaad	macOS/M2 fixes	2024-05-01 09:15:22 -04:00
Greg Burd	ee695b7243	cleanup	2024-04-30 14:40:23 -04:00
Greg Burd	b37d390e12	fix merge/soak	2024-04-30 13:58:35 -04:00
Greg Burd	94fbf768c0	fixes	2024-04-30 11:25:03 -04:00
Greg Burd	2dac3ed385	adding a merge map function (#6 ) Reviewed-on: #6 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>	2024-04-30 15:20:48 +00:00
Greg Burd	4ebe555fac	docs	2024-04-29 12:10:21 -04:00
Greg Burd	ef7ac48e32	fix header	2024-04-28 16:40:58 -04:00
Greg Burd	faea4510dc	buffer when not debugging	2024-04-28 12:28:58 -04:00
Greg Burd	c742185b73	fixes	2024-04-28 12:26:31 -04:00
Greg Burd	0297757856	adding soak test (#5 ) Reviewed-on: #5 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>	2024-04-26 20:25:17 +00:00
Greg Burd	b3dfd745e7	select/rank for unset as well as set bits (#4 ) Reviewed-on: #4 Co-authored-by: Greg Burd <greg@burd.me> Co-committed-by: Greg Burd <greg@burd.me>	2024-04-24 20:32:09 +00:00