WIP

2024-04-11 13:04:39 -04:00
29 changed files with 1058 additions and 34761 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -3,11 +3,8 @@ Checks:    >
  bugprone-*,
  clang-analyzer-*,
  google-*,
-  -google-objectivec-*,
-  modernize-*,
-  -modernize-deprecated-headers,
-  -modernize-use-using,
  misc-*,
+  modernize-*,
  performance-*,
  portability-*,
  -bugprone-branch-clone,
--- a/.envrc
+++ b/.envrc
@ -1,5 +1,5 @@
 if ! has nix_direnv_version || ! nix_direnv_version 3.0.4; then
    source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/3.0.4/direnvrc" "sha256-DzlYZ33mWF/Gs8DDeyjr8mnVmQGx7ASYqA5WlxwvBG4="
 fi
-watch_file shell.nix flake.nix
+watch_file devShell.nix shell.nix flake.nix
 use flake || use nix
--- a/.gitignore
+++ b/.gitignore
@ -3,7 +3,6 @@
 **/*.o
 tests/test
 examples/ex_?
-tests/soak
 .cache
 hints.txt
 tmp/
@ -28,7 +27,6 @@ compile_commands.json
 *.dat
 *.fsm
 *.db
-.vscode/

 # Created by https://www.gitignore.io/api/jetbrains
 # Edit at https://www.gitignore.io/?templates=jetbrains
--- a/.idea/customTargets.xml
+++ b/.idea/customTargets.xml
@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="CLionExternalBuildManager">
-    <target id="db0ccaeb-4851-470b-83d0-afa663f6ceb9" name="tests/soak" defaultType="MAKE">
-      <configuration id="98973a90-a9d0-431b-9071-9ce6960b0b01" name="tests/soak">
-        <build type="MAKE">
-          <make targetName="tests/soak" />
-        </build>
-        <clean type="MAKE">
-          <make targetName="clean" />
-        </clean>
-      </configuration>
-    </target>
-  </component>
-</project>
--- a/.idea/makefile.xml
+++ b/.idea/makefile.xml
@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="MakefileBuildTargetsManager">
-    <user-build-targets>
-      <build-target name="tests/soak">
-        <build-configurations>
-          <build-configuration>
-            <make-targets>
-              <make-target>tests/soak</make-target>
-            </make-targets>
-          </build-configuration>
-        </build-configurations>
-      </build-target>
-      <build-target name="clean">
-        <build-configurations>
-          <build-configuration>
-            <make-targets>
-              <make-target>clean</make-target>
-            </make-targets>
-          </build-configuration>
-        </build-configurations>
-      </build-target>
-    </user-build-targets>
-  </component>
-</project>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -8,7 +8,6 @@
    <sourceRoots>
      <file path="$PROJECT_DIR$/examples" />
      <file path="$PROJECT_DIR$/include" />
-      <file path="$PROJECT_DIR$/lib" />
      <file path="$PROJECT_DIR$/src" />
      <file path="$PROJECT_DIR$/tests" />
    </sourceRoots>
--- a/.idx/dev.nix
+++ b/.idx/dev.nix
@ -1,105 +0,0 @@
-# To learn more about how to use Nix to configure your environment
-# see: https://developers.google.com/idx/guides/customize-idx-env
-{ pkgs, ... }: {
-  # Which nixpkgs channel to use.
-  channel = "stable-23.11"; # or "unstable"
-  # Use https://search.nixos.org/packages to find packages
-  packages = with pkgs; [
-    act
-    autoconf
-    clang
-    clang-tools
-    cmake
-    ed
-    fira-code-nerdfont
-    gcc
-    gdb
-    gettext
-    glibc.out
-    glibc.static
-    gnumake
-    graphviz-nox
-    libbacktrace
-    libtool
-    lldb
-    m4
-    neovim
-    ninja
-    openssh
-    perl
-    pkg-config
-    python3
-    ripgrep
-    # pkgs.python311
-    # pkgs.python311Packages.pip
-  ];
-  # Sets environment variables in the workspace
-  env = {
-    GIT_SSH_COMMAND="ssh -i ~/.ssh/id_ed25519 -F /dev/null";
-  };
-  idx = {
-    # Search for the extensions you want on https://open-vsx.org/ and use "publisher.id"
-    extensions = [
-      "coolbear.systemd-unit-file"
-      "dotjoshjohnson.xml"
-      "editorconfig.editorconfig"
-      "esbenp.prettier-vscode"
-      "mads-hartmann.bash-ide-vscode"
-      "ms-python.python"
-      "ms-vscode.clangd"
-      "ms-vscode.cmake-tools"
-      "ms-vscode.cpptools"
-      "ms-vscode.cpptools-extension-pack"
-      "ms-vscode.makefile-tools"
-      "ms-vsliveshare.vsliveshare"
-      "redhat.vscode-yaml"
-      "rogalmic.bash-debug"
-      "ryu1kn.partial-diff"
-      "streetsidesoftware.code-spell-checker"
-      "timonwong.shellcheck"
-      "twxs.cmake"
-      "vadimcn.vscode-lldb"
-      #"vscode-icons-team.vscode-icons"
-      "yzhang.markdown-all-in-one"
-      "znck.grammarly"
-      #"llvm-vs-code-extensions.vscode-clangd"
-      #"eamodio.gitlens"
-      "asvetliakov.vscode-neovim"
-      #"golang.go"
-      #"jnoortheen.nix-ide"
-      #"ms-python.vscode-pylance"
-      #"mspython.debugpy"
-      #"scala-lang.scala"
-      #"scalameta.metals"
-      #"vscodevim.vim"
-    ];
-    # Enable previews
-    previews = {
-      enable = true;
-      previews = {
-        # web = {
-        #   # Example: run "npm run dev" with PORT set to IDX's defined port for previews,
-        #   # and show it in IDX's web preview panel
-        #   command = ["npm" "run" "dev"];
-        #   manager = "web";
-        #   env = {
-        #     # Environment variables to set for your server
-        #     PORT = "$PORT";
-        #   };
-        # };
-      };
-    };
-    # Workspace lifecycle hooks
-    workspace = {
-      # Runs when a workspace is first created
-      onCreate = {
-        # Example: install JS dependencies from NPM
-        # npm-install = 'npm install';
-      };
-      onStart = {
-        # Example: start a background task to watch and re-build backend code
-        # watch-backend = "npm run watch-backend";
-      };
-    };
-  };
-}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,94 +0,0 @@
-cmake_minimum_required(VERSION 3.27)
-
-if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
-    message(FATAL_ERROR "Do not build in-source. Please remove CMakeCache.txt and the CMakeFiles/ directory. Then build out-of-source.")
-endif()
-
-project(sparsemap LANGUAGES C)
-
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_C_STANDARD_REQUIRED ON)
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(CMAKE_C_OUTPUT_EXTENSION .o)
-
-# Set source and header file locations
-set(SOURCE_DIR src)
-set(HEADER_DIR include)
-
-set(COMMON_CMAKE_C_FLAGS "-Wall -Wextra -Wpedantic")
-set(CMAKE_C_FLAGS_DEBUG "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -O0")
-set(CMAKE_C_FLAGS_PROFILE "-DSPARSEMAP_DIAGNOSTIC -DDEBUG -g -Og -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope")
-set(CMAKE_C_FLAGS_RELEASE "-Ofast")
-
-# Include all header files from the header directory
-file(GLOB_RECURSE HEADERS CONFIGURE_FILES ${HEADER_DIR}/*.h)
-
-# Configure library sources
-set(LIB_SRC
-    ${SOURCE_DIR}/sparsemap.c
-)
-
-# Option to control building shared/static libraries
-option(BUILD_SHARED_LIBS "Build shared libraries" ON)
-
-# Add shared library
-add_library(sparsemap_SHARED SHARED ${LIB_SRC} ${HEADERS})
-# Set target properties for the shared library (adjust if needed)
-set_target_properties(sparsemap_SHARED PROPERTIES
-    VERSION 1.0.0  # Set library version
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"  # Set output directory
-    COMPILE_FLAGS "${CMAKE_C_FLAGS_${CMAKE_CURRENT_LIST_MODE}}"
-)
-target_include_directories(sparsemap_SHARED PRIVATE ${HEADER_DIR})
-
-# Add static library
-add_library(sparsemap STATIC ${LIB_SRC} ${HEADERS})
-# Set target properties for static library (adjust if needed)
-set_target_properties(sparsemap PROPERTIES
-    OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"  # Set output directory
-)
-target_include_directories(sparsemap PRIVATE ${HEADER_DIR})
-
-# Add ex_1 program
-add_executable(ex_1 examples/ex_1.c tests/munit.c lib/common.c)
-target_link_libraries(ex_1 PRIVATE sparsemap)
-target_include_directories(ex_1 PRIVATE ${HEADER_DIR})
-add_custom_target(run_ex_1 COMMAND ex_1 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add ex_2 program
-add_executable(ex_2 examples/ex_2.c tests/munit.c lib/common.c)
-target_link_libraries(ex_2 PRIVATE sparsemap)
-target_include_directories(ex_2 PRIVATE ${HEADER_DIR})
-add_custom_target(run_ex_2 COMMAND ex_2 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add ex_3 program
-add_executable(ex_3 examples/ex_3.c tests/munit.c lib/common.c)
-target_link_libraries(ex_3 PRIVATE sparsemap)
-target_include_directories(ex_3 PRIVATE ${HEADER_DIR})
-add_custom_target(run_ex_3 COMMAND ex_3 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add ex_4 program
-add_executable(ex_4 examples/ex_4.c tests/munit.c lib/common.c)
-target_link_libraries(ex_4 PRIVATE sparsemap)
-target_include_directories(ex_4 PRIVATE ${HEADER_DIR})
-add_custom_target(run_ex_4 COMMAND ex_4 WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add test program
-add_executable(test tests/test.c tests/munit.c lib/common.c)
-target_link_libraries(test PRIVATE sparsemap)
-target_include_directories(test PRIVATE ${HEADER_DIR})
-add_custom_target(run_test COMMAND test WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add soak program
-add_executable(soak tests/soak.c lib/common.c lib/tdigest.c lib/roaring.c)
-target_link_libraries(soak PRIVATE sparsemap)
-target_include_directories(soak PRIVATE ${HEADER_DIR} lib)
-target_link_libraries(soak PUBLIC m)
-add_custom_target(run_soak COMMAND soak WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-
-# Add fuzzer program
-# add_executable(fuzzer tests/fuzzer.c)
-# target_link_libraries(fuzzer PRIVATE sparsemap)
-# target_include_directories(fuzzer PRIVATE ${HEADER_DIR} lib)
-# target_link_libraries(fuzzer PUBLIC m)
-# add_custom_target(run_fuzzer COMMAND fuzzer WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
--- a/69
+++ b/69
@ -3,24 +3,16 @@ OBJS = sparsemap.o
 STATIC_LIB = libsparsemap.a
 SHARED_LIB = libsparsemap.so

-LIBS =  -lm
 #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
-#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
-CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Ofast -g -std=c11 -Iinclude/ -fPIC
-#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
-#CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
-#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
+CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
+#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
 #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC

-#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
-TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
-#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
-#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
+TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC

-TESTS = tests/test tests/soak
-TEST_OBJS = tests/test.o lib/munit.o lib/tdigest.o lib/common.o
-LIB_OBJS = lib/munit.o lib/tdigest.o lib/common.o lib/roaring.o
+TESTS = tests/test
+TEST_OBJS = tests/test.o tests/munit.o tests/common.o
 EXAMPLES = examples/ex_1 examples/ex_2 examples/ex_3 examples/ex_4

 .PHONY: all shared static clean test examples mls
@ -37,23 +29,17 @@ $(STATIC_LIB): $(OBJS)
 $(SHARED_LIB): $(OBJS)
 	$(CC) $(CFLAGS) -o $@ $? -shared

-examples: $(STATIC_LIB) $(EXAMPLES) $(TEST_OBJS)
+examples: $(STATIC_LIB) $(EXAMPLES) examples/common.o

 mls: examples/mls

-tests: $(TESTS)
+test: $(TESTS)

-test: tests
+check: test
 	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test

-soak: tests
-	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak
-
-fuzzer: tests
-	env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/fuzzer ./crash.case
-
-tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
+tests/test: $(TEST_OBJS) $(STATIC_LIB)
+	$(CC) $^ -o $@ $(TEST_FLAGS)

 clean:
 	rm -f $(OBJS)
@ -63,44 +49,31 @@ clean:
 	rm -f $(EXAMPLES) examples/*.o

 format:
-	clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/soak.c tests/test.c tests/midl.c lib/common.c include/common.h
+	clang-format -i src/sparsemap.c include/sparsemap.h examples/ex_*.c tests/test.c tests/common.c tests/common.h
 #	clang-format -i include/*.h src/*.c tests/*.c tests/*.h examples/*.c

 %.o: src/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^

-lib/%.o: tests/%.c
-	$(CC) $(CFLAGS) -c -o $@ $^
-
 tests/%.o: tests/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^

 examples/%.o: examples/%.c
 	$(CC) $(CFLAGS) -c -o $@ $^

-examples/ex_1:  $(LIB_OBJS) examples/ex_1.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
+examples/common.o: tests/common.c
+	$(CC) $(CFLAGS) -c -o $@ $^

-examples/ex_2: $(LIB_OBJS) examples/ex_2.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
+examples/ex_1: examples/common.o examples/ex_1.o $(STATIC_LIB)
+	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)

-examples/ex_3: $(LIB_OBJS) examples/ex_3.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
+examples/ex_2: examples/common.o examples/ex_2.o $(STATIC_LIB)
+	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)

-examples/ex_4: $(LIB_OBJS) examples/ex_4.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
+examples/ex_3: examples/common.o examples/ex_3.o $(STATIC_LIB)
+	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)

-tests/soak: $(LIB_OBJS) tests/soak.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
-
-tests/fuzzer: $(LIB_OBJS) tests/fuzzer.o $(STATIC_LIB)
-	$(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS) -DFUZZ_DEBUG
-
-todo:
-	rg -i 'todo|gsb|abort'
+examples/ex_4: examples/common.o examples/ex_4.o $(STATIC_LIB)
+	$(CC) $^ -o $@ $(CFLAGS) $(TEST_FLAGS)

 # cp src/sparsemap.c /tmp && clang-tidy src/sparsemap.c -fix -fix-errors -checks="readability-braces-around-statements" -- -DDEBUG -DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_ASSERT -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
-
-# clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/test
-
-# clear; make clean examples test && env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./examples/soak
--- a/README.md
+++ b/README.md
@ -1,12 +1,8 @@
 # Sparsemap

-Bitsets, also called bitmaps, are commonly used as fast data structures.
-Unfortunately, they can use too much memory. To compensate, we often use
-compressed bitmaps.
-
-`sparsemap` is a sparse, compressed bitmap. In the best case, it can store 2048
-bits in just 8 bytes. In the worst case, it stores the 2048 bits uncompressed and
-requires an additional 8 bytes of overhead.
+`sparsemap` is a sparse, compressed bitmap. In best case, it can store 2048
+bits in just 8 bytes. In worst case, it stores the 2048 bits uncompressed and
+requires additional 8 bytes of overhead.

 The "best" case happens when large consecutive sequences of the bits are
 either set ("1") or not set ("0"). If your numbers are consecutive 64bit
@ -18,9 +14,9 @@ On the lowest level stores bits in sm_bitvec_t's (a uint32_t or uint64_t).

 Each sm_bitvec_t has an additional descriptor (2 bits). A single word prepended
 to each sm_bitvec_t describes its condition. The descriptor word and the
-sm_bitvec_t's have the same size. The descriptor of a sm_bitvec_t
+sm_bitvec_t's have the same size.) The descriptor of a sm_bitvec_t
 specifies whether the sm_bitvec_t consists only of set bits ("1"), unset
-bits ("0") or has a mixed payload. In the first and second cases, the
+bits ("0") or has a mixed payload. In the first and second case the
 sm_bitvec_t is not stored.

 An example shows a sequence of 4 x 16 bits (here, each sm_bitvec_t and the
@ -31,7 +27,7 @@ Descriptor word has 16 bits):
      ^^ ^^ ^^ ^^-- sm_bitvec_t #0 - #3 are "0000000000000000"
                  ^^-- sm_bitvec_t #4 is "1111111111111111"
                     ^^-- sm_bitvec_t #5 is "0000000000000000"
-                        ^^-- sm_bitvec_t #6 is "1111111111111111"
+                        ^^-- sm_bitvec_t #7 is "1111111111111111"
                           ^^-- sm_bitvec_t #7 is "0110010101111001"

 Since the first 7 sm_bitvec_t's are either all "1" or "0" they are not stored.
@ -40,34 +36,29 @@ The actual memory sequence looks like this:
      0000000011001110 0110010101111001

 Instead of storing 8 Words (16 bytes), we only store 2 Words (2 bytes): one
-for the descriptor, and one for the last sm_bitvec_t #7.
+for the descriptor, one for last sm_bitvec_t #7.

-The sparsemap stores a list of chunk maps, and for each chunk map, it stores the
+The sparsemap stores a list of chunk maps, and for each chunk map it stores the
 absolute address (i.e. if the user sets bit 0 and bit 10000, and the chunk map
 capacity is 2048, the sparsemap creates two chunk maps; the first starts at
 offset 0, the second starts at offset 8192).

 ## Usage instructions

-Copy the files `src/sparsemap.c` and `include/sparsemap.h` into your project.
-Review the `examples/*` and `tests/*` code.
+The file `examples/ex_1.c` has example code.

 ## Final words

 This bitmap has efficient compression when used on long sequences of set (or
-unset) bits (i.e. with a word size of 64 bit and a payload of consecutive
+unset) bits (i.e. with a word size of 64bit, and a payload of consecutive
 numbers without gaps, the payload of 2048 x sizeof(uint64_t) = 16kb will occupy
-only 8 bytes!).
+only 8 bytes!

 However, if the sequence is not consecutive and has gaps, it's possible that
 the compression is inefficient, and the size (in the worst case) is identical
 to an uncompressed bit vector (sometimes higher due to the bytes required for
 metadata). In such cases, other compression schemes are more efficient (i.e.
-http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).  We
-include in `lib` the amalgamated (git `2dc8070`) and well-known
-[Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring/tree/master) and
-use it in the soak test to ensure our results are as accurate as theirs.
+http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).

-This library was originally created by [Christoph Rupp](https://crupp.de) in
-C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
-for use in LMDB and OpenLDAP.
+This library was originally created for hamsterdb [http://hamsterdb.com] in
+C++ and then translated to C99 code by Greg Burd <greg@burd.me>.
--- a/cmake-it.sh
+++ b/cmake-it.sh
@ -1,14 +0,0 @@
-#!/usr/bin/env bash
-
-target=${1:-Debug}
-set targets="Debug Profile Release"
-case "$target" in
-    $targets*) echo "Building ${target}..." ;;
-    *) echo "Unknown target ${target}, exiting." ;;
-esac
-name=${target,,}
-echo $name
-rm -rf "./cmake-build-${name}-system" && \
-    cmake -DCMAKE_BUILD_TYPE=${target} -DCMAKE_MAKE_PROGRAM=ninja -DCMAKE_C_COMPILER=clang -G Ninja -S "${PWD}" -B "${PWD}/cmake-build-${name}-system" && \
-    (cd "${PWD}/cmake-build-${name}-system" && ninja)
-
--- a/examples/ex_1.c
+++ b/examples/ex_1.c
@ -1,7 +1,4 @@
 #include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
 #include <stdio.h>

 #include "../include/sparsemap.h"
@ -17,18 +14,16 @@

 /* !!! Duplicated here for testing purposes. Keep in sync, or suffer. !!! */
 struct sparsemap {
+  uint8_t *m_data;
  size_t m_capacity;
  size_t m_data_used;
-  uint8_t *m_data;
 };

 int
 main()
 {
  size_t size = 4;
-  setvbuf(stdout, NULL, _IONBF, 0); // Disable buffering for stdout
-  setvbuf(stderr, NULL, _IONBF, 0); // Disable buffering for stdout
-
+  setbuf(stderr, 0); // disable buffering
  __diag("Please wait a moment...");
  sparsemap_t mmap, *map = &mmap;
  uint8_t buffer[1024];
@ -140,7 +135,7 @@ main()
    sparsemap_set(map, i, true);
  }
  for (int i = 0; i < 100000; i++) {
-    assert(sparsemap_select(map, i, true) == (unsigned)i);
+    assert(sparsemap_select(map, i) == (unsigned)i);
  }

  sparsemap_clear(map);
@ -150,7 +145,7 @@ main()
    sparsemap_set(map, i, true);
  }
  for (int i = 1; i < 513; i++) {
-    assert(sparsemap_select(map, i - 1, true) == (unsigned)i);
+    assert(sparsemap_select(map, i - 1) == (unsigned)i);
  }

  sparsemap_clear(map);
@ -160,10 +155,10 @@ main()
    sparsemap_set(map, i * 10, true);
  }
  for (size_t i = 0; i < 8; i++) {
-    assert(sparsemap_select(map, i, true) == (sparsemap_idx_t)i * 10);
+    assert(sparsemap_select(map, i) == i * 10);
  }

-  // split and move, aligned to MiniMap capacity
+  // split and move, aligned to chunk map capacity
  sparsemap_t _sm2, *sm2 = &_sm2;
  sparsemap_init(sm2, buffer2, sizeof(buffer2));
  sparsemap_clear(sm2);
@ -186,17 +181,15 @@ main()
  sparsemap_clear(map);
  for (int i = 0; i < 2048 * 3; i++) {
    sparsemap_set(map, i, true);
-    assert(sparsemap_is_set(map, i) == true);
  }
  sparsemap_split(map, 64, sm2);
-  for (int i = 0; i < 2048 * 3; i++) {
-    if (i < 64) {
-      assert(sparsemap_is_set(map, i) == true);
-      assert(sparsemap_is_set(sm2, i) == false);
-    } else {
-      assert(sparsemap_is_set(map, i) == false);
-      assert(sparsemap_is_set(sm2, i) == true);
-    }
+  for (int i = 0; i < 64; i++) {
+    assert(sparsemap_is_set(map, i) == true);
+    assert(sparsemap_is_set(sm2, i) == false);
+  }
+  for (int i = 64; i < 2048 * 3; i++) {
+    assert(sparsemap_is_set(map, i) == false);
+    assert(sparsemap_is_set(sm2, i) == true);
  }

  fprintf(stderr, " ok\n");
--- a/examples/ex_2.c
+++ b/examples/ex_2.c
@ -1,7 +1,9 @@
 #include <assert.h>
-#include <stdbool.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <time.h>
+#include <unistd.h>

 #include "../include/sparsemap.h"

@ -14,32 +16,35 @@
  } while (0)
 #pragma GCC diagnostic pop

+#define SEED
+
 int
 main(void)
 {
-  int i;
+  int i = 0;

  // disable buffering
-  setvbuf(stdout, NULL, _IONBF, 0); // Disable buffering for stdout
-  setvbuf(stderr, NULL, _IONBF, 0); // Disable buffering for stdout
+  setbuf(stderr, 0);

  // start with a 1KiB buffer, 1024 bits
  uint8_t *buf = calloc(1024, sizeof(uint8_t));

  // create the sparse bitmap
-  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 1024);
+  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024);
+  map->resize = sparsemap_on_heap_resize_fn;

  // Set every other bit (pathologically worst case) to see what happens
  // when the map is full.
  for (i = 0; i < 7744; i++) {
-    if (!i % 2) {
-      sparsemap_set(map, i, true);
-      assert(sparsemap_is_set(map, i) == true);
-    }
+    if (i % 2)
+      continue;
+    sparsemap_set(map, i, true);
+    assert(sparsemap_is_set(map, i) == true);
  }
  // On 1024 KiB of buffer with every other bit set the map holds 7744 bits
-  // and then runs out of space.  This next _set() call will fail.
-  sparsemap_set(map, ++i, true);
+  // and then runs out of space.  This next _set() call will fail/abort.
+  i++;
+  sparsemap_set(map, i, true);
  assert(sparsemap_is_set(map, i) == true);
  return 0;
 }
--- a/examples/ex_3.c
+++ b/examples/ex_3.c
@ -1,14 +1,17 @@
 #include <assert.h>
-#include <common.h>
-#include <sparsemap.h>
-#include <stdbool.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../include/sparsemap.h"
+#include "../tests/common.h"

 int
 main(void)
 {
-  int i;
+  int i = 0;
  int array[1024] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
    38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
    77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
@ -57,7 +60,7 @@ main(void)
  uint8_t *buf = calloc(1024, sizeof(uint8_t));

  // create the sparse bitmap
-  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 1024);
+  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024);

  // set all the bits on in a random order
  for (i = 0; i < 1024; i++) {
--- a/examples/ex_4.c
+++ b/examples/ex_4.c
@ -1,10 +1,11 @@
 #include <assert.h>
-#include <common.h>
-#include <sparsemap.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>

+#include "../include/sparsemap.h"
+#include "../tests/common.h"
+
 #define TEST_ARRAY_SIZE 1024

 int
@ -23,7 +24,7 @@ main(void)
  uint8_t *buf = calloc((size_t)3 * 1024, sizeof(uint8_t));

  // create the sparse bitmap
-  sparsemap_t *map = sparsemap_wrap(buf, sizeof(uint8_t) * 3 * 1024);
+  sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 3 * 1024);

  // create an array of ints
  setup_test_array(array, TEST_ARRAY_SIZE, 1024 * 3);
@ -59,7 +60,7 @@ main(void)
      assert(sparsemap_is_set(map, array[i]) == true);
    }
    has_span(map, array, TEST_ARRAY_SIZE, (int)len);
-    size_t l = sparsemap_span(map, 0, len, true);
+    size_t l = sparsemap_span(map, 0, len);
    if (l != (size_t)-1) {
      __diag("Found span in map starting at %lu of length %lu\n", l, len);
      __diag("is_span(%lu, %lu) == %s\n", l, len, is_span(array, TEST_ARRAY_SIZE, l, len) ? "yes" : "no");
--- a/flake.lock
+++ b/flake.lock
@ -1,43 +1,6 @@
 {
  "nodes": {
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1701282334,
-        "narHash": "sha256-MxCVrXY6v4QmfTwIysjjaX0XUhqBbxTWWB4HXtDYsdk=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "057f9aecfb71c4437d2b27d3323df7f93c010b7e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "23.11",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "nixpkgs": "nixpkgs",
-        "utils": "utils"
-      }
-    },
-    "systems": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    },
-    "utils": {
+    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
@ -54,6 +17,43 @@
        "repo": "flake-utils",
        "type": "github"
      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1712192574,
+        "narHash": "sha256-LbbVOliJKTF4Zl2b9salumvdMXuQBr2kuKP5+ZwbYq4=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "f480f9d09e4b4cf87ee6151eba068197125714de",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@ -1,57 +1,59 @@
 {
-  description = "A sparse bitmapped index library in C.";
+  description = "A Concurrent Skip List library for key/value pairs.";

  inputs = {
-    # nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
-    nixpkgs.url = "github:NixOS/nixpkgs/23.11";
-    utils.url = "github:numtide/flake-utils";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
  };

-  outputs = { self, nixpkgs, ... }
-    @inputs: inputs.utils.lib.eachSystem [
-      "x86_64-linux" "i686-linux" "aarch64-linux" "x86_64-darwin"
-    ] (system:
-      let pkgs = import nixpkgs {
-            inherit system;
-            overlays = [];
-            config.allowUnfree = true;
-          };
-      in {
-        flake-utils.inputs.systems.follows = "system";
-        devShell = pkgs.mkShell rec {
-          name = "sparsemap";
-          packages = with pkgs; [
-            act
-            autoconf
-            clang
-            cmake
-            ed
-            gcc
-            gdb
-            gettext
-            graphviz-nox
-            libtool
-            m4
-            ninja
-            perl
-            pkg-config
-            python3
-            ripgrep
-            valgrind
-          ];
-
-          buildInputs = with pkgs; [
-            libbacktrace
-            glibc.out
-            glibc.static
-          ];
-
-          shellHook = let
-            icon = "f121";
-          in ''
-        export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} (${name}) \\$ \[$(tput sgr0)\]"
-        '';
+  outputs =
+    { self
+    , nixpkgs
+    , flake-utils
+    , ...
+    }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = import nixpkgs {
+          inherit system;
+          config = { allowUnfree = true; };
+        };
+        supportedSystems = [ "x86_64-linux" ];
+        forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
+        nixpkgsFor = forAllSystems (system: import nixpkgs {
+          inherit system;
+          overlays = [ self.overlay ];
+        });
+      in {
+        pkgs = import nixpkgs {
+          inherit system;
+          devShell = nixpkgs.legacyPackages.${system} {
+            pkgs.mkShell = {
+              nativeBuildInputs = with pkgs.buildPackages; [
+                act
+                autoconf
+                clang
+                ed
+                gcc
+                gdb
+                gettext
+                graphviz-nox
+                libtool
+                m4
+                perl
+                pkg-config
+                python3
+                ripgrep
+                valgrind
+              ];
+              buildInputs = with pkgs; [
+                libbacktrace
+                glibc.out
+                glibc.static
+              ];
+            };
+            DOCKER_BUILDKIT = 1;
+          };
        };
-        DOCKER_BUILDKIT = 1;
      });
 }
--- a/include/roaring.h
+++ b/include/roaring.h
--- a/include/sparsemap.h
+++ b/include/sparsemap.h
@ -69,14 +69,14 @@
 #ifndef SPARSEMAP_H
 #define SPARSEMAP_H

+#include <sys/types.h>
+
+#include <assert.h>
 #include <limits.h>
 #include <stdbool.h>
-#include <stddef.h>
 #include <stdint.h>
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
+#include <stdio.h>
+#include <string.h>

 /*
 * The public interface for a sparse bit-mapped index, a "sparse map".
@ -88,299 +88,80 @@ extern "C" {
 */

 typedef struct sparsemap sparsemap_t;
-typedef size_t sparsemap_idx_t;
-#define SPARSEMAP_IDX_MAX SIZE_MAX
-#define SPARSEMAP_FOUND(x) ((x) != SPARSEMAP_IDX_MAX)
-#define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX)
 typedef uint32_t sm_idx_t;
 typedef uint64_t sm_bitvec_t;

-/** @brief Allocate a new, empty sparsemap_t with a buffer of \b size on the
- * heap to use for storage of bitmap data.
- *
- * The buffer used for the bitmap is allocated in the same heap allocation as
- * the structure, this means that you only need to call free() on the returned
- * object to free all resources.  Using this method allows you to grow the
- * buffer size by calling #sparsemap_set_data_size().  This function calls
- * #sparsemap_init().
- *
- * @param[in] size The starting size of the buffer used for the bitmap, default
- * is 1024 bytes.
- * @returns The newly allocated sparsemap reference.
- */
-sparsemap_t *sparsemap(size_t size);
+typedef enum {
+  /* return code for set(): needs to grow this __sm_chunk_t */
+  SM_NEEDS_TO_GROW = 1,

-/** @brief Allocate a new, copy of the \b other sparsemap_t.
- *
- * @param[in] other The sparsemap to copy.
- */
-sparsemap_t *sparsemap_copy(sparsemap_t *other);
+  /* return code for set(): needs to shrink this __sm_chunk_t */
+  SM_NEEDS_TO_SHRINK = 2
+} sparsemap_adaptations_t;

-/** @brief Allocate a new, empty sparsemap_t that references (wraps) the buffer
- * \b data of \b size bytes to use for storage of bitmap data.
- *
- * This function allocates a new sparsemap_t but not the buffer which is
- * provided by the caller as \b data which can be allocated on the stack or
- * heap.  Caller is responsible for calling free() on the returned heap object
- * and releasing the memory used for \b data.  Resizing the buffer is only
- * supported when the heap object for the map includes the buffer and the
- * \b data offset supplied is relative to the object (see #sparsemap()).
- *
- * @param[in] data A heap or stack memory buffer of \b size for use storing
- * bitmap data.
- * @param[in] size The size of the buffer \b data used for the bitmap.
- * @returns The newly allocated sparsemap reference.
- */
-sparsemap_t *sparsemap_wrap(uint8_t *data, size_t size);
+typedef struct sparsemap {
+  uint8_t *m_data;    /* The serialized bitmap data */
+  size_t m_data_size; /* The total size of m_data */
+  size_t m_data_used; /* The used size of m_data */
+  int (*resize)(struct sparsemap *, sparsemap_adaptations_t, size_t, size_t *);
+} sparsemap_t;

-/** @brief Initialize an existing sparsemap_t by assigning \b data of \b size
- * bytes for storage of bitmap data.
- *
- * Given the address of an existing \b map allocated on the stack or heap this
- * function will initialize the data structure and use the provided \b data of
- * \b size for bitmap data.  Caller is responsible for all memory management.
- * Resizing the buffer is not directly supported, you
- * may resize it and call #sparsemap_set_data_size() and then ensure that should
- * the address of the object changed you need to update it by calling #sparsemap_
- * m_data field.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] data A heap or stack memory buffer of \b size for use storing
- * bitmap data.
- * @param[in] size The size of the buffer \b data used for the bitmap.
- */
+
+/* Allocate on a sparsemap_t on the heap and initialize it. */
+sparsemap_t *sparsemap(uint8_t *data, size_t size);
+
+/* Initialize sparsemap_t with data. */
 void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size);

-/** @brief Opens, without initializing, an existing sparsemap contained within
- * the specified buffer.
- *
- * Given the address of an existing \b map this function will assign to the
- * provided data structure \b data of \b size for bitmap data.  Caller is
- * responsible for all memory management.  Use this when as a way to
- * "deserialize" bytes and make them ready for use as a bitmap.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] data A heap or stack memory buffer of \b size for use storing
- * bitmap data.
- * @param[in] size The size of the buffer \b data used for the bitmap.
- */
-void sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size);
-
-/** @brief Resets values and empties the buffer making it ready to accept new
- *  data but does not free the memory.
- *
- * @param[in] map The sparsemap reference.
- */
+/* Clears the whole buffer. */
 void sparsemap_clear(sparsemap_t *map);

-/** @brief Update the size of the buffer \b data used for storing the bitmap.
- *
- * When called with \b data NULL on a \b map that was created with #sparsemap()
- * this function will reallocate the storage for both the map and data possibly
- * changing the address of the map itself so it is important for the caller to
- * update all references to this map to the address returned in this scenario.
- * Access to stale references will result in memory violations and program
- * termination.  Caller is not required to free() the old address, only the new
- * one should it have changed.  This uses #realloc() under the covers, all
- * caveats apply here as well.
- *
- * When called referencing a \b map that was allocate by the caller this
- * function will only update the values within the data structure.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] size The desired size of the buffer \b data used for the bitmap.
- * @returns The -- potentially changed -- sparsemap reference, or NULL should a
- * #realloc() fail (\b ENOMEM)
- * @note The resizing of caller supplied allocated objects is not yet fully
- * supported.
- */
-sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, uint8_t *data, size_t size);
+/* Opens an existing sparsemap at the specified buffer. */
+void sparsemap_open(sparsemap_t *, uint8_t *data, size_t data_size);

-/** @brief Calculate remaining capacity, approaches 0 when full.
- *
- * Provides an estimate in the range [0.0, 100.0] of the remaining capacity of
- * the buffer storing bitmap data.  This can change up or down as more data
- * is added/removed due to the method for compressed representation, do not
- * expect a smooth progression either direction.  This is a rough estimate only
- * and may also jump in value after seemingly indiscriminate changes to the map.
- *
- * @param[in] map The sparsemap reference.
- * @returns an estimate for remaining capacity that approaches 0.0 when full or
- * 100.0 when empty
- */
+/* Resizes the data range. */
+void sparsemap_set_data_size(sparsemap_t *map, size_t data_size);
+
+/* Calculate remaining capacity, full when 0. */
 double sparsemap_capacity_remaining(sparsemap_t *map);

-/** @brief Returns the capacity of the underlying byte array in bytes.
- *
- * Specifically, this returns the byte \b size provided for the underlying
- * buffer used to store bitmap data.
- *
- * @param[in] map The sparsemap reference.
- * @returns byte size of the buffer used for storing bitmap data
- */
+/* Returns the size of the underlying byte array. */
 size_t sparsemap_get_capacity(sparsemap_t *map);

-/** @brief Returns the value of a bit at index \b idx, either true for "set" (1)
- * or \b false for "unset" (0).
- *
- * @param[in] map The sparsemap reference.
- * @param[in] idx The 0-based offset into the bitmap index to examine.
- * @returns either true or false
- */
-bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx);
+/* Returns the value of a bit at index |idx|. */
+bool sparsemap_is_set(sparsemap_t *map, size_t idx);

-/** @brief Sets the bit at index \b idx to \b value.
- *
- * A sparsemap has a fixed size buffer with a capacity that can be exhausted by
- * when calling this function.  In such cases the return value is not equal to
- * the provided \b idx and errno is set to ENOSPC.  In such situations it is
- * possible to grow the data size and retry the set() operation under certain
- * circumstances (see #sparsemap() and #sparsemap_set_data_size()).
- *
- * @param[in] map The sparsemap reference.
- * @param[in] idx The 0-based offset into the bitmap index to modify.
- * @returns the \b idx supplied on success or SPARSEMAP_IDX_MAX on error
- * with \b errno set to ENOSPC when the map is full.
- */
-sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value);
+/* Sets the bit at index |idx| to true or false, depending on |value|. */
+int sparsemap_set(sparsemap_t *map, size_t idx, bool value);

-/** @brief Returns the byte size of the data buffer that has been used thus far.
- *
- * @param[in] map The sparsemap reference.
- * @returns the byte size of the data buffer that has been used thus far
- */
+/* Returns the offset of the very first bit. */
+sm_idx_t sparsemap_get_start_offset(sparsemap_t *map);
+
+/* Returns the used size in the data buffer. */
 size_t sparsemap_get_size(sparsemap_t *map);

-/** @brief Returns a pointer to the data buffer used for the map.
- *
- * @param[in] map The sparsemap reference.
- * @returns a pointer to the data buffer used for the map
- */
-void *sparsemap_get_data(sparsemap_t *map);
+/* Decompresses the whole bitmap; calls scanner for all bits. */
+void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t), size_t skip);

-/** @brief Returns the number of elements in the map.
- *
- * @param[in] map The sparsemap reference.
- * @returns the number of elements in the map
- */
-size_t sparsemap_count(sparsemap_t *map);
+/* Appends all chunk maps from |map| starting at |sstart| to |other|, then
+   reduces the chunk map-count appropriately. */
+void sparsemap_split(sparsemap_t *map, size_t sstart, sparsemap_t *other);

-/** @brief Returns the offset of the first bit set in the map.
- *
- * This is the same as the value of the first set bit in the
- * map.
- *
- * @param[in] map The sparsemap reference.
- * @returns the offset of the first bit set in the map
- */
-sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map);
-
-/** @brief Returns the offset of the last bit set in the map.
- *
- * This is the same as the value of the last bit set in the
- * map.
- *
- * @param[in] map The sparsemap reference.
- * @returns the offset of the index bit set in the map
- */
-sparsemap_idx_t sparsemap_get_ending_offset(sparsemap_t *map);
-
-/** @brief Returns the percent of bits set in the map.
- *
- * @param[in] map The sparsemap reference.
- * @returns the percent of bits set.
- */
-double sparsemap_fill_factor(sparsemap_t *map);
-
-/** @brief Provides a method for a callback function to examine every bit set in
- * the index.
- *
- * This decompresses the whole bitmap and invokes #scanner() passing an array
- * of the positions of set bits in order from 0 index to the end of the map.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] skip Start the scan after \b skip position in the map.
- * @param[in] aux Auxiliary information passed to the scanner.
- */
-void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux);
-
-/** @brief Merges the values from \b source into \b destination, \b source is unchanged.
- *
- * Efficiently adds all set bits from \b source into \b destination.
- *
- * @param[in] destination The sparsemap reference into which we will merge \b source.
- * @param[in] source The bitmap to merge into \b destination.
- * @returns 0 on success, or sets errno to ENOSPC and returns the amount of
- * additional space required to successfully merge the maps.
- */
-int sparsemap_merge(sparsemap_t *destination, sparsemap_t *source);
-
-/** @brief Splits the bitmap by assigning all bits starting at \b offset to the
- * \b other bitmap while removing them from \b map.
- *
- * The \b other bitmap is expected to be empty.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] offset The 0-based offset into the bitmap at which to split, if
- * set to SPARSEMAP_IDX_MAX then the bits will be evenly split.
- * @param[in] other The bitmap into which we place the split.
- * @returns the offset at which the map was split
- */
-sparsemap_idx_t sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other);
-
-/** @brief Finds the index of the \b n'th bit set to \b value.
- *
- * Locates the \b n'th bit either set, \b value is true, or unset, \b value is
- * false, from the start of the bitmap.
- * So, if your bit pattern is: ```1101 1110 1010 1101 1011 1110 1110 1111``` and
- * you request the first set bit the result is `0` (meaning the 1st bit in the
- * map which is index 0 because this is 0-based indexing).  The first unset bit
- * is `2` (or the third bit in the pattern).  When n is 3 and value is true the
- * result would be `3` (the fourth bit, or the third set bit which is at index
- * 3 when 0-based).
- *
- * @param[in] map The sparsemap reference.
- * @param[in] n Specifies how many bits to ignore (when n=2 return the position
- * of the third matching bit).
- * @param[in] value Determines if the search is to examine set (true) or unset
- * (false) bits in the bitmap index.
- * @returns the 0-based index of the located bit position within the map; when
- * not found either SPARSEMAP_IDX_MAX.
- */
-sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value);
-
-/** @brief Counts the bits matching \b value in the provided range, [\b x, \b
- * y].
- *
- * Counts the set, \b value is true, or unset, \b value is false, bits starting
- * at the \b idx'th bit (0-based) in the range [\b x, \b y] (inclusive on either
- * end).  If range is [0, 0] this examines 1 bit, the first one in the map, and
- * returns 1 if value is true and the bit was set.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] x 0-based start of the inclusive range to examine.
- * @param[in] y 0-based end of the inclusive range to examine.
- * @param[in] value Determines if the scan is to count the set (true) or unset
- * (false) bits in the range.
- * @returns the count of bits found within the range that match the \b value
- */
-size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value);
-
-/** @brief Locates the first contiguous set of bits of \b len starting at \b idx
- * matching \b value in the bitmap.
- *
- * @param[in] map The sparsemap reference.
- * @param[in] start 0-based start of search within the bitmap.
- * @param[in] len The length of contiguous bits we're seeking.
- * @param[in] value Determines if the scan is to find all set (true) or unset
- * (false) bits of \b len.
- * @returns the index of the first bit matching the criteria; when not found
- * found SPARSEMAP_IDX_MAX
- */
-size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t start, size_t len, bool value);
-
-#if defined(__cplusplus)
-}
+#if 0 // TODO
+/* Sets/clears bits starting at |ssize| in other in |map| possibly invoking the resize function. */
+void sparsemap_combine(sparsemap_t *map, size_t sstart, sparsemap_t *other);
 #endif

-#endif /* !defined(SPARSEMAP_H) */
+/* Returns the index of the n'th set bit; uses a 0-based index. */
+size_t sparsemap_select(sparsemap_t *map, size_t n);
+
+/* Counts the set bits in the range [offset, idx]. */
+size_t sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx);
+
+/* Returns the 0-based index of a span of the first set bits of at least |len| starting after |offset|. */
+size_t sparsemap_span(sparsemap_t *map, size_t offset, size_t len);
+
+/* This isn't API, it's the default implementation for a resize function. */
+int sparsemap_on_heap_resize_fn(sparsemap_t *map, sparsemap_adaptations_t desire, size_t cur_size, size_t *new_size);
+
+#endif
--- a/include/tdigest.h
+++ b/include/tdigest.h
@ -1,258 +0,0 @@
-#pragma once
-#include <stdlib.h>
-
-/**
- * Adaptive histogram based on something like streaming k-means crossed with Q-digest.
- * The implementation is a direct descendent of MergingDigest
- * https://github.com/tdunning/t-digest/
- *
- * Copyright (c) 2021 Redis, All rights reserved.
- * Copyright (c) 2018 Andrew Werner, All rights reserved.
- *
- * The special characteristics of this algorithm are:
- *
- * - smaller summaries than Q-digest
- *
- * - provides part per million accuracy for extreme quantiles and typically &lt;1000 ppm accuracy
- * for middle quantiles
- *
- * - fast
- *
- * - simple
- *
- * - easy to adapt for use with map-reduce
- */
-
-#define MM_PI 3.14159265358979323846
-
-struct td_histogram {
-    // compression is a setting used to configure the size of centroids when merged.
-    double compression;
-
-    double min;
-    double max;
-
-    // cap is the total size of nodes
-    int cap;
-    // merged_nodes is the number of merged nodes at the front of nodes.
-    int merged_nodes;
-    // unmerged_nodes is the number of buffered nodes.
-    int unmerged_nodes;
-
-    // we run the merge in reverse every other merge to avoid left-to-right bias in merging
-    long long total_compressions;
-
-    long long merged_weight;
-    long long unmerged_weight;
-
-    double *nodes_mean;
-    long long *nodes_weight;
-};
-
-typedef struct td_histogram td_histogram_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * Allocate the memory, initialise the t-digest, and return the histogram as output parameter.
- * @param compression The compression parameter.
- * 100 is a common value for normal uses.
- * 1000 is extremely large.
- * The number of centroids retained will be a smallish (usually less than 10) multiple of this
- * number.
- * @return the histogram on success, NULL if allocation failed.
- */
-td_histogram_t *td_new(double compression);
-
-/**
- * Allocate the memory and initialise the t-digest.
- *
- * @param compression The compression parameter.
- * 100 is a common value for normal uses.
- * 1000 is extremely large.
- * The number of centroids retained will be a smallish (usually less than 10) multiple of this
- * number.
- * @param result Output parameter to capture allocated histogram.
- * @return 0 on success, 1 if allocation failed.
- */
-int td_init(double compression, td_histogram_t **result);
-
-/**
- * Frees the memory associated with the t-digest.
- *
- * @param h The histogram you want to free.
- */
-void td_free(td_histogram_t *h);
-
-/**
- * Reset a histogram to zero - empty out a histogram and re-initialise it
- *
- * If you want to re-use an existing histogram, but reset everything back to zero, this
- * is the routine to use.
- *
- * @param h The histogram you want to reset to empty.
- *
- */
-void td_reset(td_histogram_t *h);
-
-/**
- * Adds a sample to a histogram.
- *
- * @param val The value to add.
- * @param weight The weight of this point.
- * @return 0 on success, EDOM if overflow was detected as a consequence of adding the provided
- * weight.
- *
- */
-int td_add(td_histogram_t *h, double val, long long weight);
-
-/**
- * Re-examines a t-digest to determine whether some centroids are redundant.  If your data are
- * perversely ordered, this may be a good idea.  Even if not, this may save 20% or so in space.
- *
- * The cost is roughly the same as adding as many data points as there are centroids.  This
- * is typically &lt; 10 * compression, but could be as high as 100 * compression.
- * This is a destructive operation that is not thread-safe.
- *
- * @param h The histogram you want to compress.
- * @return 0 on success, EDOM if overflow was detected as a consequence of adding the provided
- * weight. If overflow is detected the histogram is not changed.
- *
- */
-int td_compress(td_histogram_t *h);
-
-/**
- * Merges all of the values from 'from' to 'this' histogram.
- *
- * @param h "This" pointer
- * @param from Histogram to copy values from.
- * * @return 0 on success, EDOM if overflow was detected as a consequence of merging the the
- * provided histogram. If overflow is detected the original histogram is not detected.
- */
-int td_merge(td_histogram_t *h, td_histogram_t *from);
-
-/**
- * Returns the fraction of all points added which are &le; x.
- *
- * @param x The cutoff for the cdf.
- * @return The fraction of all data which is less or equal to x.
- */
-double td_cdf(td_histogram_t *h, double x);
-
-/**
- * Returns an estimate of the cutoff such that a specified fraction of the data
- * added to this TDigest would be less than or equal to the cutoff.
- *
- * @param q The desired fraction
- * @return The value x such that cdf(x) == q;
- */
-double td_quantile(td_histogram_t *h, double q);
-
-/**
- * Returns an estimate of the cutoff such that a specified fraction of the data
- * added to this TDigest would be less than or equal to the cutoffs.
- *
- * @param quantiles The ordered percentiles array to get the values for.
- * @param values Destination array containing the values at the given quantiles.
- * The values array should be allocated by the caller.
- * @return 0 on success, ENOMEM if the provided destination array is null.
- */
-int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, size_t length);
-
-/**
- * Returns the trimmed mean ignoring values outside given cutoff upper and lower limits.
- *
- * @param leftmost_cut Fraction to cut off of the left tail of the distribution.
- * @param rightmost_cut Fraction to cut off of the right tail of the distribution.
- * @return The trimmed mean ignoring values outside given cutoff upper and lower limits;
- */
-double td_trimmed_mean(td_histogram_t *h, double leftmost_cut, double rightmost_cut);
-
-/**
- * Returns the trimmed mean ignoring values outside given a symmetric cutoff limits.
- *
- * @param proportion_to_cut Fraction to cut off of the left and right tails of the distribution.
- * @return The trimmed mean ignoring values outside given cutoff upper and lower limits;
- */
-double td_trimmed_mean_symmetric(td_histogram_t *h, double proportion_to_cut);
-
-/**
- * Returns the current compression factor.
- *
- * @return The compression factor originally used to set up the TDigest.
- */
-int td_compression(td_histogram_t *h);
-
-/**
- * Returns the number of points that have been added to this TDigest.
- *
- * @return The sum of the weights on all centroids.
- */
-long long td_size(td_histogram_t *h);
-
-/**
- * Returns the number of centroids being used by this TDigest.
- *
- * @return The number of centroids being used.
- */
-int td_centroid_count(td_histogram_t *h);
-
-/**
- * Get minimum value from the histogram.  Will return __DBL_MAX__ if the histogram
- * is empty.
- *
- * @param h "This" pointer
- */
-double td_min(td_histogram_t *h);
-
-/**
- * Get maximum value from the histogram.  Will return - __DBL_MAX__ if the histogram
- * is empty.
- *
- * @param h "This" pointer
- */
-double td_max(td_histogram_t *h);
-
-/**
- * Get the full centroids weight array for 'this' histogram.
- *
- * @param h "This" pointer
- *
- * @return The full centroids weight array.
- */
-const long long *td_centroids_weight(td_histogram_t *h);
-
-/**
- * Get the full centroids mean array for 'this' histogram.
- *
- * @param h "This" pointer
- *
- * @return The full centroids mean array.
- */
-const double *td_centroids_mean(td_histogram_t *h);
-
-/**
- * Get the centroid weight for 'this' histogram and 'pos'.
- *
- * @param h "This" pointer
- * @param pos centroid position.
- *
- * @return The centroid weight.
- */
-long long td_centroids_weight_at(td_histogram_t *h, int pos);
-
-/**
- * Get the centroid mean for 'this' histogram and 'pos'.
- *
- * @param h "This" pointer
- * @param pos centroid position.
- *
- * @return The centroid mean.
- */
-double td_centroids_mean_at(td_histogram_t *h, int pos);
-
-#ifdef __cplusplus
-}
-#endif
--- a/lib/roaring.c
+++ b/lib/roaring.c
--- a/lib/tdigest.c
+++ b/lib/tdigest.c
@ -1,680 +0,0 @@
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <math.h>
-#include "tdigest.h"
-#include <errno.h>
-#include <stdint.h>
-
-#ifndef TD_MALLOC_INCLUDE
-#define TD_MALLOC_INCLUDE "td_malloc.h"
-#endif
-
-#ifndef TD_ALLOC_H
-#define TD_ALLOC_H
-#define __td_malloc malloc
-#define __td_calloc calloc
-#define __td_realloc realloc
-#define __td_free free
-#endif
-
-#define __td_max(x, y) (((x) > (y)) ? (x) : (y))
-#define __td_min(x, y) (((x) < (y)) ? (x) : (y))
-
-static inline double weighted_average_sorted(double x1, double w1, double x2, double w2) {
-    const double x = (x1 * w1 + x2 * w2) / (w1 + w2);
-    return __td_max(x1, __td_min(x, x2));
-}
-
-static inline bool _tdigest_long_long_add_safe(long long a, long long b) {
-    if (b < 0) {
-        return (a >= __LONG_LONG_MAX__ - b);
-    } else {
-        return (a <= __LONG_LONG_MAX__ - b);
-    }
-}
-
-static inline double weighted_average(double x1, double w1, double x2, double w2) {
-    if (x1 <= x2) {
-        return weighted_average_sorted(x1, w1, x2, w2);
-    } else {
-        return weighted_average_sorted(x2, w2, x1, w1);
-    }
-}
-
-static inline void swap(double *arr, int i, int j) {
-    const double temp = arr[i];
-    arr[i] = arr[j];
-    arr[j] = temp;
-}
-
-static inline void swap_l(long long *arr, int i, int j) {
-    const long long temp = arr[i];
-    arr[i] = arr[j];
-    arr[j] = temp;
-}
-
-static unsigned int partition(double *means, long long *weights, unsigned int start,
-                              unsigned int end, unsigned int pivot_idx) {
-    const double pivotMean = means[pivot_idx];
-    swap(means, pivot_idx, end);
-    swap_l(weights, pivot_idx, end);
-
-    int i = start - 1;
-
-    for (unsigned int j = start; j < end; j++) {
-        // If current element is smaller than the pivot
-        if (means[j] < pivotMean) {
-            // increment index of smaller element
-            i++;
-            swap(means, i, j);
-            swap_l(weights, i, j);
-        }
-    }
-    swap(means, i + 1, end);
-    swap_l(weights, i + 1, end);
-    return i + 1;
-}
-
-/**
- * Standard quick sort except that sorting rearranges parallel arrays
- *
- * @param means  Values to sort on
- * @param weights The auxillary values to sort.
- * @param start  The beginning of the values to sort
- * @param end    The value after the last value to sort
- */
-static void td_qsort(double *means, long long *weights, unsigned int start, unsigned int end) {
-    if (start < end) {
-        // two elements can be directly compared
-        if ((end - start) == 1) {
-            if (means[start] > means[end]) {
-                swap(means, start, end);
-                swap_l(weights, start, end);
-            }
-            return;
-        }
-        // generating a random number as a pivot was very expensive vs the array size
-        // const unsigned int pivot_idx = start + rand()%(end - start + 1);
-        const unsigned int pivot_idx = (end + start) / 2; // central pivot
-        const unsigned int new_pivot_idx = partition(means, weights, start, end, pivot_idx);
-        if (new_pivot_idx > start) {
-            td_qsort(means, weights, start, new_pivot_idx - 1);
-        }
-        td_qsort(means, weights, new_pivot_idx + 1, end);
-    }
-}
-
-static inline size_t cap_from_compression(double compression) {
-    if ((size_t)compression > ((SIZE_MAX / sizeof(double) / 6) - 10)) {
-        return 0;
-    }
-
-    return (6 * (size_t)(compression)) + 10;
-}
-
-static inline bool should_td_compress(td_histogram_t *h) {
-    return ((h->merged_nodes + h->unmerged_nodes) >= (h->cap - 1));
-}
-
-static inline int next_node(td_histogram_t *h) { return h->merged_nodes + h->unmerged_nodes; }
-
-int td_compress(td_histogram_t *h);
-
-static inline int _check_overflow(const double v) {
-    // double-precision overflow detected on h->unmerged_weight
-    if (v == INFINITY) {
-        return EDOM;
-    }
-    return 0;
-}
-
-static inline int _check_td_overflow(const double new_unmerged_weight,
-                                     const double new_total_weight) {
-    // double-precision overflow detected on h->unmerged_weight
-    if (new_unmerged_weight == INFINITY) {
-        return EDOM;
-    }
-    if (new_total_weight == INFINITY) {
-        return EDOM;
-    }
-    const double denom = 2 * MM_PI * new_total_weight * log(new_total_weight);
-    if (denom == INFINITY) {
-        return EDOM;
-    }
-
-    return 0;
-}
-
-int td_centroid_count(td_histogram_t *h) { return next_node(h); }
-
-void td_reset(td_histogram_t *h) {
-    if (!h) {
-        return;
-    }
-    h->min = __DBL_MAX__;
-    h->max = -h->min;
-    h->merged_nodes = 0;
-    h->merged_weight = 0;
-    h->unmerged_nodes = 0;
-    h->unmerged_weight = 0;
-    h->total_compressions = 0;
-}
-
-int td_init(double compression, td_histogram_t **result) {
-
-    const size_t capacity = cap_from_compression(compression);
-    if (capacity < 1) {
-        return 1;
-    }
-    td_histogram_t *histogram;
-    histogram = (td_histogram_t *)__td_malloc(sizeof(td_histogram_t));
-    if (!histogram) {
-        return 1;
-    }
-    histogram->cap = capacity;
-    histogram->compression = (double)compression;
-    td_reset(histogram);
-    histogram->nodes_mean = (double *)__td_calloc(capacity, sizeof(double));
-    if (!histogram->nodes_mean) {
-        td_free(histogram);
-        return 1;
-    }
-    histogram->nodes_weight = (long long *)__td_calloc(capacity, sizeof(long long));
-    if (!histogram->nodes_weight) {
-        td_free(histogram);
-        return 1;
-    }
-    *result = histogram;
-
-    return 0;
-}
-
-td_histogram_t *td_new(double compression) {
-    td_histogram_t *mdigest = NULL;
-    td_init(compression, &mdigest);
-    return mdigest;
-}
-
-void td_free(td_histogram_t *histogram) {
-    if (histogram->nodes_mean) {
-        __td_free((void *)(histogram->nodes_mean));
-    }
-    if (histogram->nodes_weight) {
-        __td_free((void *)(histogram->nodes_weight));
-    }
-    __td_free((void *)(histogram));
-}
-
-int td_merge(td_histogram_t *into, td_histogram_t *from) {
-    if (td_compress(into) != 0)
-        return EDOM;
-    if (td_compress(from) != 0)
-        return EDOM;
-    const int pos = from->merged_nodes + from->unmerged_nodes;
-    for (int i = 0; i < pos; i++) {
-        const double mean = from->nodes_mean[i];
-        const long long weight = from->nodes_weight[i];
-        if (td_add(into, mean, weight) != 0) {
-            return EDOM;
-        }
-    }
-    return 0;
-}
-
-long long td_size(td_histogram_t *h) { return h->merged_weight + h->unmerged_weight; }
-
-double td_cdf(td_histogram_t *h, double val) {
-    td_compress(h);
-    // no data to examine
-    if (h->merged_nodes == 0) {
-        return NAN;
-    }
-    // bellow lower bound
-    if (val < h->min) {
-        return 0;
-    }
-    // above upper bound
-    if (val > h->max) {
-        return 1;
-    }
-    if (h->merged_nodes == 1) {
-        // exactly one centroid, should have max==min
-        const double width = h->max - h->min;
-        if (val - h->min <= width) {
-            // min and max are too close together to do any viable interpolation
-            return 0.5;
-        } else {
-            // interpolate if somehow we have weight > 0 and max != min
-            return (val - h->min) / width;
-        }
-    }
-    const int n = h->merged_nodes;
-    // check for the left tail
-    const double left_centroid_mean = h->nodes_mean[0];
-    const double left_centroid_weight = (double)h->nodes_weight[0];
-    const double merged_weight_d = (double)h->merged_weight;
-    if (val < left_centroid_mean) {
-        // note that this is different than h->nodes_mean[0] > min
-        // ... this guarantees we divide by non-zero number and interpolation works
-        const double width = left_centroid_mean - h->min;
-        if (width > 0) {
-            // must be a sample exactly at min
-            if (val == h->min) {
-                return 0.5 / merged_weight_d;
-            } else {
-                return (1 + (val - h->min) / width * (left_centroid_weight / 2 - 1)) /
-                       merged_weight_d;
-            }
-        } else {
-            // this should be redundant with the check val < h->min
-            return 0;
-        }
-    }
-    // and the right tail
-    const double right_centroid_mean = h->nodes_mean[n - 1];
-    const double right_centroid_weight = (double)h->nodes_weight[n - 1];
-    if (val > right_centroid_mean) {
-        const double width = h->max - right_centroid_mean;
-        if (width > 0) {
-            if (val == h->max) {
-                return 1 - 0.5 / merged_weight_d;
-            } else {
-                // there has to be a single sample exactly at max
-                const double dq = (1 + (h->max - val) / width * (right_centroid_weight / 2 - 1)) /
-                                  merged_weight_d;
-                return 1 - dq;
-            }
-        } else {
-            return 1;
-        }
-    }
-    // we know that there are at least two centroids and mean[0] < x < mean[n-1]
-    // that means that there are either one or more consecutive centroids all at exactly x
-    // or there are consecutive centroids, c0 < x < c1
-    double weightSoFar = 0;
-    for (int it = 0; it < n - 1; it++) {
-        // weightSoFar does not include weight[it] yet
-        if (h->nodes_mean[it] == val) {
-            // we have one or more centroids == x, treat them as one
-            // dw will accumulate the weight of all of the centroids at x
-            double dw = 0;
-            while (it < n && h->nodes_mean[it] == val) {
-                dw += (double)h->nodes_weight[it];
-                it++;
-            }
-            return (weightSoFar + dw / 2) / (double)h->merged_weight;
-        } else if (h->nodes_mean[it] <= val && val < h->nodes_mean[it + 1]) {
-            const double node_weight = (double)h->nodes_weight[it];
-            const double node_weight_next = (double)h->nodes_weight[it + 1];
-            const double node_mean = h->nodes_mean[it];
-            const double node_mean_next = h->nodes_mean[it + 1];
-            // landed between centroids ... check for floating point madness
-            if (node_mean_next - node_mean > 0) {
-                // note how we handle singleton centroids here
-                // the point is that for singleton centroids, we know that their entire
-                // weight is exactly at the centroid and thus shouldn't be involved in
-                // interpolation
-                double leftExcludedW = 0;
-                double rightExcludedW = 0;
-                if (node_weight == 1) {
-                    if (node_weight_next == 1) {
-                        // two singletons means no interpolation
-                        // left singleton is in, right is out
-                        return (weightSoFar + 1) / merged_weight_d;
-                    } else {
-                        leftExcludedW = 0.5;
-                    }
-                } else if (node_weight_next == 1) {
-                    rightExcludedW = 0.5;
-                }
-                double dw = (node_weight + node_weight_next) / 2;
-
-                // adjust endpoints for any singleton
-                double dwNoSingleton = dw - leftExcludedW - rightExcludedW;
-
-                double base = weightSoFar + node_weight / 2 + leftExcludedW;
-                return (base + dwNoSingleton * (val - node_mean) / (node_mean_next - node_mean)) /
-                       merged_weight_d;
-            } else {
-                // this is simply caution against floating point madness
-                // it is conceivable that the centroids will be different
-                // but too near to allow safe interpolation
-                double dw = (node_weight + node_weight_next) / 2;
-                return (weightSoFar + dw) / merged_weight_d;
-            }
-        } else {
-            weightSoFar += (double)h->nodes_weight[it];
-        }
-    }
-    return 1 - 0.5 / merged_weight_d;
-}
-
-static double td_internal_iterate_centroids_to_index(const td_histogram_t *h, const double index,
-                                                     const double left_centroid_weight,
-                                                     const int total_centroids, double *weightSoFar,
-                                                     int *node_pos) {
-    if (left_centroid_weight > 1 && index < left_centroid_weight / 2) {
-        // there is a single sample at min so we interpolate with less weight
-        return h->min + (index - 1) / (left_centroid_weight / 2 - 1) * (h->nodes_mean[0] - h->min);
-    }
-
-    // usually the last centroid will have unit weight so this test will make it moot
-    if (index > h->merged_weight - 1) {
-        return h->max;
-    }
-
-    // if the right-most centroid has more than one sample, we still know
-    // that one sample occurred at max so we can do some interpolation
-    const double right_centroid_weight = (double)h->nodes_weight[total_centroids - 1];
-    const double right_centroid_mean = h->nodes_mean[total_centroids - 1];
-    if (right_centroid_weight > 1 &&
-        (double)h->merged_weight - index <= right_centroid_weight / 2) {
-        return h->max - ((double)h->merged_weight - index - 1) / (right_centroid_weight / 2 - 1) *
-                            (h->max - right_centroid_mean);
-    }
-
-    for (; *node_pos < total_centroids - 1; (*node_pos)++) {
-        const int i = *node_pos;
-        const double node_weight = (double)h->nodes_weight[i];
-        const double node_weight_next = (double)h->nodes_weight[i + 1];
-        const double node_mean = h->nodes_mean[i];
-        const double node_mean_next = h->nodes_mean[i + 1];
-        const double dw = (node_weight + node_weight_next) / 2;
-        if (*weightSoFar + dw > index) {
-            // centroids i and i+1 bracket our current point
-            // check for unit weight
-            double leftUnit = 0;
-            if (node_weight == 1) {
-                if (index - *weightSoFar < 0.5) {
-                    // within the singleton's sphere
-                    return node_mean;
-                } else {
-                    leftUnit = 0.5;
-                }
-            }
-            double rightUnit = 0;
-            if (node_weight_next == 1) {
-                if (*weightSoFar + dw - index <= 0.5) {
-                    // no interpolation needed near singleton
-                    return node_mean_next;
-                }
-                rightUnit = 0.5;
-            }
-            const double z1 = index - *weightSoFar - leftUnit;
-            const double z2 = *weightSoFar + dw - index - rightUnit;
-            return weighted_average(node_mean, z2, node_mean_next, z1);
-        }
-        *weightSoFar += dw;
-    }
-
-    // weightSoFar = totalWeight - weight[total_centroids-1]/2 (very nearly)
-    // so we interpolate out to max value ever seen
-    const double z1 = index - h->merged_weight - right_centroid_weight / 2.0;
-    const double z2 = right_centroid_weight / 2 - z1;
-    return weighted_average(right_centroid_mean, z1, h->max, z2);
-}
-
-double td_quantile(td_histogram_t *h, double q) {
-    td_compress(h);
-    // q should be in [0,1]
-    if (q < 0.0 || q > 1.0 || h->merged_nodes == 0) {
-        return NAN;
-    }
-    // with one data point, all quantiles lead to Rome
-    if (h->merged_nodes == 1) {
-        return h->nodes_mean[0];
-    }
-
-    // if values were stored in a sorted array, index would be the offset we are interested in
-    const double index = q * (double)h->merged_weight;
-
-    // beyond the boundaries, we return min or max
-    // usually, the first centroid will have unit weight so this will make it moot
-    if (index < 1) {
-        return h->min;
-    }
-
-    // we know that there are at least two centroids now
-    const int n = h->merged_nodes;
-
-    // if the left centroid has more than one sample, we still know
-    // that one sample occurred at min so we can do some interpolation
-    const double left_centroid_weight = (double)h->nodes_weight[0];
-
-    // in between extremes we interpolate between centroids
-    double weightSoFar = left_centroid_weight / 2;
-    int i = 0;
-    return td_internal_iterate_centroids_to_index(h, index, left_centroid_weight, n, &weightSoFar,
-                                                  &i);
-}
-
-int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, size_t length) {
-    td_compress(h);
-
-    if (NULL == quantiles || NULL == values) {
-        return EINVAL;
-    }
-
-    const int n = h->merged_nodes;
-    if (n == 0) {
-        for (size_t i = 0; i < length; i++) {
-            values[i] = NAN;
-        }
-        return 0;
-    }
-    if (n == 1) {
-        for (size_t i = 0; i < length; i++) {
-            const double requested_quantile = quantiles[i];
-
-            // q should be in [0,1]
-            if (requested_quantile < 0.0 || requested_quantile > 1.0) {
-                values[i] = NAN;
-            } else {
-                // with one data point, all quantiles lead to Rome
-                values[i] = h->nodes_mean[0];
-            }
-        }
-        return 0;
-    }
-
-    // we know that there are at least two centroids now
-    // if the left centroid has more than one sample, we still know
-    // that one sample occurred at min so we can do some interpolation
-    const double left_centroid_weight = (double)h->nodes_weight[0];
-
-    // in between extremes we interpolate between centroids
-    double weightSoFar = left_centroid_weight / 2;
-    int node_pos = 0;
-
-    // to avoid allocations we use the values array for intermediate computation
-    // i.e. to store the expected cumulative count at each percentile
-    for (size_t qpos = 0; qpos < length; qpos++) {
-        const double index = quantiles[qpos] * (double)h->merged_weight;
-        values[qpos] = td_internal_iterate_centroids_to_index(h, index, left_centroid_weight, n,
-                                                              &weightSoFar, &node_pos);
-    }
-    return 0;
-}
-
-static double td_internal_trimmed_mean(const td_histogram_t *h, const double leftmost_weight,
-                                       const double rightmost_weight) {
-    double count_done = 0;
-    double trimmed_sum = 0;
-    double trimmed_count = 0;
-    for (int i = 0; i < h->merged_nodes; i++) {
-
-        const double n_weight = (double)h->nodes_weight[i];
-        // Assume the whole centroid falls into the range
-        double count_add = n_weight;
-
-        // If we haven't reached the low threshold yet, skip appropriate part of the centroid.
-        count_add -= __td_min(__td_max(0, leftmost_weight - count_done), count_add);
-
-        // If we have reached the upper threshold, ignore the overflowing part of the centroid.
-
-        count_add = __td_min(__td_max(0, rightmost_weight - count_done), count_add);
-
-        // consider the whole centroid processed
-        count_done += n_weight;
-
-        // increment the sum / count
-        trimmed_sum += h->nodes_mean[i] * count_add;
-        trimmed_count += count_add;
-
-        // break once we cross the high threshold
-        if (count_done >= rightmost_weight)
-            break;
-    }
-
-    return trimmed_sum / trimmed_count;
-}
-
-double td_trimmed_mean_symmetric(td_histogram_t *h, double proportion_to_cut) {
-    td_compress(h);
-    // proportion_to_cut should be in [0,1]
-    if (h->merged_nodes == 0 || proportion_to_cut < 0.0 || proportion_to_cut > 1.0) {
-        return NAN;
-    }
-    // with one data point, all values lead to Rome
-    if (h->merged_nodes == 1) {
-        return h->nodes_mean[0];
-    }
-
-    /* translate the percentiles to counts */
-    const double leftmost_weight = floor((double)h->merged_weight * proportion_to_cut);
-    const double rightmost_weight = ceil((double)h->merged_weight * (1.0 - proportion_to_cut));
-
-    return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
-}
-
-double td_trimmed_mean(td_histogram_t *h, double leftmost_cut, double rightmost_cut) {
-    td_compress(h);
-    // leftmost_cut and rightmost_cut should be in [0,1]
-    if (h->merged_nodes == 0 || leftmost_cut < 0.0 || leftmost_cut > 1.0 || rightmost_cut < 0.0 ||
-        rightmost_cut > 1.0) {
-        return NAN;
-    }
-    // with one data point, all values lead to Rome
-    if (h->merged_nodes == 1) {
-        return h->nodes_mean[0];
-    }
-
-    /* translate the percentiles to counts */
-    const double leftmost_weight = floor((double)h->merged_weight * leftmost_cut);
-    const double rightmost_weight = ceil((double)h->merged_weight * rightmost_cut);
-
-    return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
-}
-
-int td_add(td_histogram_t *h, double mean, long long weight) {
-    if (should_td_compress(h)) {
-        const int overflow_res = td_compress(h);
-        if (overflow_res != 0)
-            return overflow_res;
-    }
-    const int pos = next_node(h);
-    if (pos >= h->cap)
-        return EDOM;
-    if (_tdigest_long_long_add_safe(h->unmerged_weight, weight) == false)
-        return EDOM;
-    const long long new_unmerged_weight = h->unmerged_weight + weight;
-    if (_tdigest_long_long_add_safe(new_unmerged_weight, h->merged_weight) == false)
-        return EDOM;
-    const long long new_total_weight = new_unmerged_weight + h->merged_weight;
-    // double-precision overflow detected
-    const int overflow_res =
-        _check_td_overflow((double)new_unmerged_weight, (double)new_total_weight);
-    if (overflow_res != 0)
-        return overflow_res;
-
-    if (mean < h->min) {
-        h->min = mean;
-    }
-    if (mean > h->max) {
-        h->max = mean;
-    }
-    h->nodes_mean[pos] = mean;
-    h->nodes_weight[pos] = weight;
-    h->unmerged_nodes++;
-    h->unmerged_weight = new_unmerged_weight;
-    return 0;
-}
-
-int td_compress(td_histogram_t *h) {
-    if (h->unmerged_nodes == 0) {
-        return 0;
-    }
-    int N = h->merged_nodes + h->unmerged_nodes;
-    td_qsort(h->nodes_mean, h->nodes_weight, 0, N - 1);
-    const double total_weight = (double)h->merged_weight + (double)h->unmerged_weight;
-    // double-precision overflow detected
-    const int overflow_res = _check_td_overflow((double)h->unmerged_weight, (double)total_weight);
-    if (overflow_res != 0)
-        return overflow_res;
-    if (total_weight <= 1)
-        return 0;
-    const double denom = 2 * MM_PI * total_weight * log(total_weight);
-    if (_check_overflow(denom) != 0)
-        return EDOM;
-
-    // Compute the normalizer given compression and number of points.
-    const double normalizer = h->compression / denom;
-    if (_check_overflow(normalizer) != 0)
-        return EDOM;
-    int cur = 0;
-    double weight_so_far = 0;
-
-    for (int i = 1; i < N; i++) {
-        const double proposed_weight = (double)h->nodes_weight[cur] + (double)h->nodes_weight[i];
-        const double z = proposed_weight * normalizer;
-        // quantile up to cur
-        const double q0 = weight_so_far / total_weight;
-        // quantile up to cur + i
-        const double q2 = (weight_so_far + proposed_weight) / total_weight;
-        // Convert  a quantile to the k-scale
-        const bool should_add = (z <= (q0 * (1 - q0))) && (z <= (q2 * (1 - q2)));
-        // next point will fit
-        // so merge into existing centroid
-        if (should_add) {
-            h->nodes_weight[cur] += h->nodes_weight[i];
-            const double delta = h->nodes_mean[i] - h->nodes_mean[cur];
-            const double weighted_delta = (delta * h->nodes_weight[i]) / h->nodes_weight[cur];
-            h->nodes_mean[cur] += weighted_delta;
-        } else {
-            weight_so_far += h->nodes_weight[cur];
-            cur++;
-            h->nodes_weight[cur] = h->nodes_weight[i];
-            h->nodes_mean[cur] = h->nodes_mean[i];
-        }
-        if (cur != i) {
-            h->nodes_weight[i] = 0;
-            h->nodes_mean[i] = 0.0;
-        }
-    }
-    h->merged_nodes = cur + 1;
-    h->merged_weight = total_weight;
-    h->unmerged_nodes = 0;
-    h->unmerged_weight = 0;
-    h->total_compressions++;
-    return 0;
-}
-
-double td_min(td_histogram_t *h) { return h->min; }
-
-double td_max(td_histogram_t *h) { return h->max; }
-
-int td_compression(td_histogram_t *h) { return h->compression; }
-
-const long long *td_centroids_weight(td_histogram_t *h) { return h->nodes_weight; }
-
-const double *td_centroids_mean(td_histogram_t *h) { return h->nodes_mean; }
-
-long long td_centroids_weight_at(td_histogram_t *h, int pos) { return h->nodes_weight[pos]; }
-
-double td_centroids_mean_at(td_histogram_t *h, int pos) {
-    if (pos < 0 || pos > h->merged_nodes) {
-        return NAN;
-    }
-    return h->nodes_mean[pos];
-}
--- a/src/sparsemap.c
+++ b/src/sparsemap.c
--- a/tests/common.c
+++ b/tests/common.c
@ -1,25 +1,14 @@
-#define _POSIX_C_SOURCE 199309L
-#define X86_INTRIN
+#include <sys/types.h>

 #include <assert.h>
-#include <pthread.h> // If using threads
+#include <pthread.h>
+#include <sparsemap.h>
 #include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
-#include <time.h>
 #include <unistd.h>
-#ifdef __x86_64__ // Check if running on x86_64 architecture
-#ifdef X86_INTRIN
-#include <errno.h>
-#include <x86intrin.h>
-#endif
-#endif

-#include "../include/common.h"
-#include "../include/sparsemap.h"
+#include "common.h"

 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvariadic-macros"
@ -33,69 +22,95 @@
 uint64_t
 tsc(void)
 {
-#ifdef __x86_64__ // Check if running on x86_64 architecture
-#ifdef X86_INTRIN
-  return __rdtsc();
-#else
  uint32_t low, high;
  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
  return ((uint64_t)high << 32) | low;
-#endif
-#ifdef __arm__ // Check if compiling for ARM architecture
-  uint64_t result;
-  __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(result));
-  return result;
-}
-#endif
-#endif
-return 0;
 }

-// get microsecond timestamp
-uint64_t
-msts()
+static uint64_t
+get_tsc_frequency()
 {
-#ifdef _SC_MONOTONIC_CLOCK
-  struct timespec ts;
-  if (sysconf(_SC_MONOTONIC_CLOCK) > 0) {
-    /* A monotonic clock presents */
-    if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
-      return (uint64_t)(ts.tv_sec * 1000000 + ts.tv_nsec / 1000);
-    else
-      return 0;
-  }
-  return 0;
-#else
-  struct timeval tv;
-  if (gettimeofday(&tv, NULL) == 0)
-    return (uint64_t)(tv.tv_sec * 1000000 + tv.tv_usec);
-  else
-    return 0;
-#endif
+  uint32_t high, low;
+  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+  __asm__ volatile("rdtsc");
+  return ((uint64_t)high << 32) | low;
 }

 double
-nsts(void)
+tsc_ticks_to_ns(uint64_t tsc_ticks)
 {
-  struct timespec ts;
-
-  if (clock_gettime(CLOCK_REALTIME, &ts) == -1) {
-    perror("clock_gettime");
-    return -1.0; // Return -1.0 on error
+  static uint64_t tsc_freq = 0;
+  if (tsc_freq == 0) {
+    tsc_freq = get_tsc_frequency();
+  }
+  return (double)tsc_ticks / (double)tsc_freq * 1e9;
+}
+
+void
+est_sift_up(uint64_t *heap, int child_index)
+{
+  while (child_index > 0) {
+    int parent_index = (child_index - 1) / 2;
+    if (heap[parent_index] > heap[child_index]) {
+      // Swap parent and child
+      uint64_t temp = heap[parent_index];
+      heap[parent_index] = heap[child_index];
+      heap[child_index] = temp;
+      child_index = parent_index;
+    } else {
+      break; // Heap property satisfied
+    }
+  }
+}
+
+void
+est_sift_down(uint64_t *heap, int heap_size, int parent_index)
+{
+  int child_index = 2 * parent_index + 1; // Left child
+  while (child_index < heap_size) {
+    // Right child exists and is smaller than left child
+    if (child_index + 1 < heap_size && heap[child_index + 1] < heap[child_index]) {
+      child_index++;
+    }
+    // If the smallest child is smaller than the parent, swap them
+    if (heap[child_index] < heap[parent_index]) {
+      uint64_t temp = heap[child_index];
+      heap[child_index] = heap[parent_index];
+      heap[parent_index] = temp;
+      parent_index = child_index;
+      child_index = 2 * parent_index + 1;
+    } else {
+      break; // Heap property satisfied
+    }
+  }
+}
+
+void
+est_insert_value(uint64_t *heap, int heap_max_size, int *heap_size, uint64_t value)
+{
+  if (*heap_size < heap_max_size) { // Heap not full, insert value
+    heap[*heap_size] = value;
+    est_sift_up(heap, *heap_size);
+    (*heap_size)++;
+  } else {
+    // Heap is full, replace root with new value with a certain probability
+    // This is a very naive approach to maintain a sample of the input
+    if (rand() % 2) {
+      heap[0] = value;
+      est_sift_down(heap, heap_max_size, 0);
+    }
  }
-  return ts.tv_sec + ts.tv_nsec / 1e9;
 }

 int __xorshift32_state = 0;

 // Xorshift algorithm for PRNG
 uint32_t
-xorshift32(void)
+xorshift32()
 {
  uint32_t x = __xorshift32_state;
-  if (x == 0) {
+  if (x == 0)
    x = 123456789;
-  }
  x ^= x << 13;
  x ^= x >> 17;
  x ^= x << 5;
@ -104,13 +119,13 @@ xorshift32(void)
 }

 void
-xorshift32_seed(void)
+xorshift32_seed()
 {
  __xorshift32_state = XORSHIFT_SEED_VALUE;
 }

 void
-shuffle(int *array, size_t n)
+shuffle(int *array, size_t n) // TODO working?
 {
  for (size_t i = n - 1; i > 0; --i) {
    size_t j = xorshift32() % (i + 1);
@ -155,7 +170,7 @@ has_sequential_set(int a[], int l, int r)
 int
 ensure_sequential_set(int a[], int l, int r)
 {
-  if (!a || l == 0 || r < 1 || r > l - 1) {
+  if (!a || l == 0 || r < 1 || r > l) {
    return 0;
  }

@ -175,8 +190,7 @@ ensure_sequential_set(int a[], int l, int r)
  // Generate a random value between min_value and max_value
  int value = random_uint32() % (max_value - min_value - r + 1);
  // Generate a random location between 0 and l - r
-  int d = l - r - 1;
-  offset = d == 0 ? 0 : random_uint32() % d;
+  offset = random_uint32() % (l - r - 1);

  // Adjust the array to include a sequential set of 'r' integers at the random offset
  for (int i = 0; i < r; ++i) {
@ -185,6 +199,21 @@ ensure_sequential_set(int a[], int l, int r)
  return value;
 }

+int
+create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r)
+{
+  int placed_at;
+  if (s >= r + 1) {
+    placed_at = 0;
+  } else {
+    placed_at = random_uint32() % (s - r - 1);
+  }
+  for (int i = placed_at; i < placed_at + r; i++) {
+    sparsemap_set(map, i, true);
+  }
+  return placed_at;
+}
+
 void
 print_array(int *array, int l)
 {
@ -293,7 +322,7 @@ bool
 is_set(const int array[], int bit)
 {
  for (int i = 0; i < 1024; i++) {
-    if (array[i] == bit) {
+    if (array[i] == (int)bit) {
      return true;
    }
  }
@ -301,44 +330,28 @@ is_set(const int array[], int bit)
 }

 int
-whats_set_uint64(uint64_t number, int pos[64])
+is_unique(int a[], int l, int value)
 {
-  int length = 0;
-
-  for (int i = 0; i < 64; i++) {
-    if (number & ((uint64_t)1 << i)) {
-      pos[length++] = i;
+  for (int i = 0; i < l; ++i) {
+    if (a[i] == value) {
+      return 0; // Not unique
    }
  }
-
-  return length;
+  return 1; // Unique
 }

-/** @brief Fills an array with unique random values between 0 and max_value.
- *
- * @param[in] a The array to fill.
- * @param[in] l The length of the array to fill.
- * @param[in] max_value The maximum value for the random numbers.
- */
 void
 setup_test_array(int a[], int l, int max_value)
 {
+  if (a == NULL || max_value < 0)
+    return; // Basic error handling and validation

-  // Create a set to store the unique values.
-  int unique_values[max_value + 1];
-  for (int i = 0; i <= max_value; ++i) {
-    unique_values[i] = 0;
-  }
-
-  // Keep generating random numbers until we have l unique values.
-  int count = 0;
-  while (count < l) {
-    int random_number = random_uint32() % (max_value + 1);
-    if (unique_values[random_number] == 0) {
-      unique_values[random_number] = 1;
-      a[count] = random_number;
-      count++;
-    }
+  for (int i = 0; i < l; ++i) {
+    int candidate;
+    do {
+      candidate = random_uint32() % (max_value + 1); // Generate a new value within the specified range
+    } while (!is_unique(a, i, candidate));           // Repeat until a unique value is found
+    a[i] = candidate;                                // Assign the unique value to the array
  }
 }

@ -351,6 +364,15 @@ bitmap_from_uint32(sparsemap_t *map, uint32_t number)
  }
 }

+void
+bitmap_from_uint64(sparsemap_t *map, uint64_t number)
+{
+  for (int i = 0; i < 64; i++) {
+    bool bit = number & (1 << i);
+    sparsemap_set(map, i, bit);
+  }
+}
+
 uint32_t
 rank_uint64(uint64_t number, int n, int p)
 {
@ -378,96 +400,28 @@ rank_uint64(uint64_t number, int n, int p)
  return count;
 }

-void
-print_bits(char *name, uint64_t value)
+int
+whats_set_uint64(uint64_t number, int pos[64])
 {
-  if (name) {
-    printf("%s\t", name);
-  }
-  for (int i = 63; i >= 0; i--) {
-    printf("%lu", (value >> i) & 1);
-    if (i % 8 == 0) {
-      printf(" "); // Add space for better readability
+  int length = 0;
+
+  for (int i = 0; i < 64; i++) {
+    if (number & ((uint64_t)1 << i)) {
+      pos[length++] = i;
    }
  }
-  printf("\n");
+
+  return length;
 }

 void
-sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number)
+whats_set(sparsemap_t *map, int m)
 {
-  for (int i = offset; i < 64; i++) {
-    bool bit = number & ((uint64_t)1 << i);
-    sparsemap_set(map, i, bit);
-  }
-}
-
-sparsemap_idx_t
-sm_add_span(sparsemap_t *map, int map_size, int span_length)
-{
-  int attempts = map_size / span_length;
-  sparsemap_idx_t placed_at;
-  do {
-    placed_at = random_uint32() % (map_size - span_length - 1);
-    if (sm_occupied(map, placed_at, span_length, true)) {
-      attempts--;
-    } else {
-      break;
-    }
-  } while (attempts);
-  for (sparsemap_idx_t i = placed_at; i < placed_at + span_length; i++) {
-    if (sparsemap_set(map, i, true) != i) {
-      return placed_at; // TODO error?
-    }
-  }
-  return placed_at;
-}
-
-void
-sm_whats_set(sparsemap_t *map, int off, int len)
-{
-  printf("what's set in the range [%d, %d): ", off, off + len);
-  for (int i = off; i < off + len; i++) {
+  logf("what's set in the range [0, %d): ", m);
+  for (int i = 0; i < m; i++) {
    if (sparsemap_is_set(map, i)) {
-      printf("%d ", i);
+      logf("%d ", i);
    }
  }
-  printf("\n");
-}
-
-bool
-sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value)
-{
-  for (sparsemap_idx_t i = m; i < m + len; i++) {
-    if (sparsemap_is_set(map, i) != value) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool
-sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value)
-{
-  for (sparsemap_idx_t i = m; i < (sparsemap_idx_t)len; i++) {
-    if (sparsemap_is_set(map, i) == value) {
-      return true;
-    }
-  }
-  return false;
-}
-
-char *
-bytes_as(double bytes, char *s, size_t size)
-{
-  const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
-  size_t i = 0;
-
-  while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
-    bytes /= 1024;
-    i++;
-  }
-
-  snprintf(s, size, "%.2f %s", bytes, units[i]);
-  return s;
+  logf("\n");
 }
--- a/include/common.h
+++ b/include/common.h
@ -1,6 +1,4 @@

-#include "../include/sparsemap.h"
-
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvariadic-macros"
 #define __diag(...)                                                \
@ -25,12 +23,23 @@
 #define XORSHIFT_SEED_VALUE ((unsigned int)time(NULL) ^ getpid())
 #endif

+#define EST_MEDIAN_DECL(decl, size)   \
+  uint64_t heap_##decl[size] = { 0 }; \
+  int heap_##decl##_max_size = size;  \
+  int heap_##decl##_size = 0;
+
+#define EST_MEDIAN_ADD(decl, value) est_insert_value(heap_##decl, heap_##decl##_max_size, &heap_##decl##_size, (value));
+
+#define EST_MEDIAN_GET(decl) heap_##decl[0]
+
 uint64_t tsc(void);
 double tsc_ticks_to_ns(uint64_t tsc_ticks);
-double nsts(void);
+void est_sift_up(uint64_t *heap, int child_index);
+void est_sift_down(uint64_t *heap, int heap_size, int parent_index);
+void est_insert_value(uint64_t *heap, int heap_max_size, int *heap_size, uint64_t value);

-void xorshift32_seed(void);
-uint32_t xorshift32(void);
+void xorshift32_seed();
+uint32_t xorshift32();

 void print_array(int *array, int l);
 void print_spans(int *array, int n);
@ -43,18 +52,11 @@ int is_unique(int a[], int l, int value);
 void setup_test_array(int a[], int l, int max_value);
 void shuffle(int *array, size_t n);
 int ensure_sequential_set(int a[], int l, int r);
-sparsemap_idx_t sm_add_span(sparsemap_t *map, int map_size, int span_length);
-
-void print_bits(char *name, uint64_t value);
+int create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r);

 void bitmap_from_uint32(sparsemap_t *map, uint32_t number);
-void sm_bitmap_from_uint64(sparsemap_t *map, int offset, uint64_t number);
+void bitmap_from_uint64(sparsemap_t *map, uint64_t number);
 uint32_t rank_uint64(uint64_t number, int n, int p);
 int whats_set_uint64(uint64_t number, int bitPositions[64]);

-void sm_whats_set(sparsemap_t *map, int off, int len);
-
-bool sm_is_span(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
-bool sm_occupied(sparsemap_t *map, sparsemap_idx_t m, int len, bool value);
-
-char *bytes_as(double bytes, char *s, size_t size);
+void whats_set(sparsemap_t *map, int m);
--- a/tests/midl.c
+++ b/tests/midl.c
@ -1,417 +0,0 @@
-/** @defgroup idls	ID List Management
- *	@{
- */
-/** A generic unsigned ID number. These were entryIDs in back-bdb.
- *	Preferably it should have the same size as a pointer.
- */
-typedef size_t MDB_ID;
-
-/** An IDL is an ID List, a sorted array of IDs. The first
- * element of the array is a counter for how many actual
- * IDs are in the list. In the original back-bdb code, IDLs are
- * sorted in ascending order. For libmdb IDLs are sorted in
- * descending order.
- */
-typedef MDB_ID *MDB_IDL;
-
-/* IDL sizes - likely should be even bigger
- *   limiting factors: sizeof(ID), thread stack size
- */
-#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
-#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN)
-#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1))
-
-#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1)
-#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1)
-
-#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID))
-#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0)
-#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src)))
-#define MDB_IDL_FIRST(ids) ((ids)[1])
-#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]])
-
-/** Current max length of an #mdb_midl_alloc()ed IDL */
-#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1])
-
-/** Append ID to IDL. The IDL must be big enough. */
-#define mdb_midl_xappend(idl, id)             \
-  do {                                        \
-    MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
-    xidl[xlen] = (id);                        \
-  } while (0)
-
-/** Search for an ID in an IDL.
- * @param[in] ids	The IDL to search.
- * @param[in] id	The ID to search for.
- * @return	The index of the first ID greater than or equal to \b id.
- */
-unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id);
-
-/** Allocate an IDL.
- * Allocates memory for an IDL of the given size.
- * @return	IDL on success, NULL on failure.
- */
-MDB_IDL mdb_midl_alloc(int num);
-
-/** Free an IDL.
- * @param[in] ids	The IDL to free.
- */
-void mdb_midl_free(MDB_IDL ids);
-
-/** Shrink an IDL.
- * Return the IDL to the default size if it has grown larger.
- * @param[in,out] idp	Address of the IDL to shrink.
- */
-void mdb_midl_shrink(MDB_IDL *idp);
-
-/** Shrink an IDL to a specific size.
- * Resize the IDL to \b size if it is larger.
- * @param[in,out] idp	Address of the IDL to shrink.
- * @param[in] size	Capacity to have once resized.
- */
-void mdb_midl_shrink(MDB_IDL *idp);
-
-/** Make room for num additional elements in an IDL.
- * @param[in,out] idp	Address of the IDL.
- * @param[in] num	Number of elements to make room for.
- * @return	0 on success, ENOMEM on failure.
- */
-int mdb_midl_need(MDB_IDL *idp, unsigned num);
-
-/** Append an ID onto an IDL.
- * @param[in,out] idp	Address of the IDL to append to.
- * @param[in] id	The ID to append.
- * @return	0 on success, ENOMEM if the IDL is too large.
- */
-int mdb_midl_append(MDB_IDL *idp, MDB_ID id);
-
-/** Append an IDL onto an IDL.
- * @param[in,out] idp	Address of the IDL to append to.
- * @param[in] app	The IDL to append.
- * @return	0 on success, ENOMEM if the IDL is too large.
- */
-int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app);
-
-/** Append an ID range onto an IDL.
- * @param[in,out] idp	Address of the IDL to append to.
- * @param[in] id	The lowest ID to append.
- * @param[in] n		Number of IDs to append.
- * @return	0 on success, ENOMEM if the IDL is too large.
- */
-int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n);
-
-/** Merge an IDL onto an IDL. The destination IDL must be big enough.
- * @param[in] idl	The IDL to merge into.
- * @param[in] merge	The IDL to merge.
- */
-void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge);
-
-/** Sort an IDL.
- * @param[in,out] ids	The IDL to sort.
- */
-void mdb_midl_sort(MDB_IDL ids);
-
-/* midl.c ------------------------------------------------------------------ */
-/** @defgroup idls	ID List Management
- *	@{
- */
-#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y))
-
-unsigned
-mdb_midl_search(MDB_IDL ids, MDB_ID id)
-{
-  /*
-   * binary search of id in ids
-   * if found, returns position of id
-   * if not found, returns first position greater than id
-   */
-  unsigned base = 0;
-  unsigned cursor = 1;
-  int val = 0;
-  unsigned n = ids[0];
-
-  while (0 < n) {
-    unsigned pivot = n >> 1;
-    cursor = base + pivot + 1;
-    val = CMP(ids[cursor], id);
-
-    if (val < 0) {
-      n = pivot;
-
-    } else if (val > 0) {
-      base = cursor;
-      n -= pivot + 1;
-
-    } else {
-      return cursor;
-    }
-  }
-
-  if (val > 0) {
-    ++cursor;
-  }
-  return cursor;
-}
-
-int
-mdb_midl_insert(MDB_IDL ids, MDB_ID id)
-{
-  unsigned x, i;
-
-  x = mdb_midl_search(ids, id);
-  assert(x > 0);
-
-  if (x < 1) {
-    /* internal error */
-    return -2;
-  }
-
-  if (x <= ids[0] && ids[x] == id) {
-    /* duplicate */
-    assert(0);
-    return -1;
-  }
-
-  if (++ids[0] >= MDB_IDL_DB_MAX) {
-    /* no room */
-    --ids[0];
-    return -2;
-
-  } else {
-    /* insert id */
-    for (i = ids[0]; i > x; i--)
-      ids[i] = ids[i - 1];
-    ids[x] = id;
-  }
-
-  return 0;
-}
-
-inline void
-mdb_midl_pop_n(MDB_IDL ids, unsigned n)
-{
-  ids[0] = ids[0] - n;
-}
-
-void
-mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
-{
-  for (int i = idx - 1; idx < ids[0] - 1;)
-    ids[++i] = ids[++idx];
-  ids[0] = ids[0] - 1;
-}
-
-void
-mdb_midl_remove(MDB_IDL ids, MDB_ID id)
-{
-  unsigned idx = mdb_midl_search(ids, id);
-  if (idx <= ids[0] && ids[idx] == id)
-    mdb_midl_remove_at(ids, idx);
-}
-
-MDB_IDL
-mdb_midl_alloc(int num)
-{
-  MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID));
-  if (ids) {
-    *ids++ = num;
-    *ids = 0;
-  }
-  return ids;
-}
-
-void
-mdb_midl_free(MDB_IDL ids)
-{
-  if (ids)
-    free(ids - 1);
-}
-
-void
-mdb_midl_shrink(MDB_IDL *idp)
-{
-  MDB_IDL ids = *idp;
-  if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) {
-    *ids++ = MDB_IDL_UM_MAX;
-    *idp = ids;
-  }
-}
-
-void
-mdb_midl_shrink_to(MDB_IDL *idp, size_t size)
-{
-  MDB_IDL ids = *idp;
-  if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) {
-    *ids++ = size;
-    *idp = ids;
-    *idp[0] = *idp[0] > size ? size : *idp[0];
-  }
-}
-
-static int
-mdb_midl_grow(MDB_IDL *idp, int num)
-{
-  MDB_IDL idn = *idp - 1;
-  /* grow it */
-  idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
-  if (!idn)
-    return ENOMEM;
-  *idn++ += num;
-  *idp = idn;
-  return 0;
-}
-
-int
-mdb_midl_need(MDB_IDL *idp, unsigned num)
-{
-  MDB_IDL ids = *idp;
-  num += ids[0];
-  if (num > ids[-1]) {
-    num = (num + num / 4 + (256 + 2)) & -256;
-    if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID))))
-      return ENOMEM;
-    *ids++ = num - 2;
-    *idp = ids;
-  }
-  return 0;
-}
-
-int
-mdb_midl_append(MDB_IDL *idp, MDB_ID id)
-{
-  MDB_IDL ids = *idp;
-  /* Too big? */
-  if (ids[0] >= ids[-1]) {
-    if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
-      return ENOMEM;
-    ids = *idp;
-  }
-  ids[0]++;
-  ids[ids[0]] = id;
-  return 0;
-}
-
-int
-mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app)
-{
-  MDB_IDL ids = *idp;
-  /* Too big? */
-  if (ids[0] + app[0] >= ids[-1]) {
-    if (mdb_midl_grow(idp, app[0]))
-      return ENOMEM;
-    ids = *idp;
-  }
-  memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID));
-  ids[0] += app[0];
-  return 0;
-}
-
-int
-mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n)
-{
-  MDB_ID *ids = *idp, len = ids[0];
-  /* Too big? */
-  if (len + n > ids[-1]) {
-    if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
-      return ENOMEM;
-    ids = *idp;
-  }
-  ids[0] = len + n;
-  ids += len;
-  while (n)
-    ids[n--] = id++;
-  return 0;
-}
-
-void
-mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge)
-{
-  MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k;
-  idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */
-  old_id = idl[j];
-  while (i) {
-    merge_id = merge[i--];
-    for (; old_id < merge_id; old_id = idl[--j])
-      idl[k--] = old_id;
-    idl[k--] = merge_id;
-  }
-  idl[0] = total;
-}
-
-/* Quicksort + Insertion sort for small arrays */
-
-#define SMALL 8
-#define MIDL_SWAP(a, b) \
-  {                     \
-    itmp = (a);         \
-    (a) = (b);          \
-    (b) = itmp;         \
-  }
-
-void
-mdb_midl_sort(MDB_IDL ids)
-{
-  /* Max possible depth of int-indexed tree * 2 items/level */
-  int istack[sizeof(int) * CHAR_BIT * 2];
-  int i, j, k, l, ir, jstack;
-  MDB_ID a, itmp;
-
-  ir = (int)ids[0];
-  l = 1;
-  jstack = 0;
-  for (;;) {
-    if (ir - l < SMALL) { /* Insertion sort */
-      for (j = l + 1; j <= ir; j++) {
-        a = ids[j];
-        for (i = j - 1; i >= 1; i--) {
-          if (ids[i] >= a)
-            break;
-          ids[i + 1] = ids[i];
-        }
-        ids[i + 1] = a;
-      }
-      if (jstack == 0)
-        break;
-      ir = istack[jstack--];
-      l = istack[jstack--];
-    } else {
-      k = (l + ir) >> 1; /* Choose median of left, center, right */
-      MIDL_SWAP(ids[k], ids[l + 1]);
-      if (ids[l] < ids[ir]) {
-        MIDL_SWAP(ids[l], ids[ir]);
-      }
-      if (ids[l + 1] < ids[ir]) {
-        MIDL_SWAP(ids[l + 1], ids[ir]);
-      }
-      if (ids[l] < ids[l + 1]) {
-        MIDL_SWAP(ids[l], ids[l + 1]);
-      }
-      i = l + 1;
-      j = ir;
-      a = ids[l + 1];
-      for (;;) {
-        do
-          i++;
-        while (ids[i] > a);
-        do
-          j--;
-        while (ids[j] < a);
-        if (j < i)
-          break;
-        MIDL_SWAP(ids[i], ids[j]);
-      }
-      ids[l + 1] = ids[j];
-      ids[j] = a;
-      jstack += 2;
-      if (ir - i + 1 >= j - l) {
-        istack[jstack] = ir;
-        istack[jstack - 1] = i;
-        ir = j - 1;
-      } else {
-        istack[jstack] = j - 1;
-        istack[jstack - 1] = l;
-        l = i;
-      }
-    }
-  }
-}
--- a/tests/soak.c
+++ b/tests/soak.c
--- a/tests/test.c
+++ b/tests/test.c