sparsemap/tests/soak.c

1286 lines
34 KiB
C
Raw Normal View History

#include <assert.h>
#include <errno.h>
2024-04-28 16:26:31 +00:00
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2024-05-17 16:06:12 +00:00
#include <unistd.h>
2024-05-03 19:15:39 +00:00
#include "../include/common.h"
#include "../include/roaring.h"
#include "../include/sparsemap.h"
2024-05-03 19:15:39 +00:00
#include "../include/tdigest.h"
2024-05-17 16:06:12 +00:00
#include "midl.c"
2024-05-17 16:06:12 +00:00
typedef size_t pgno_t;
2024-05-17 16:06:12 +00:00
char *
bytes_as(double bytes, char *s, size_t size)
{
const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
size_t i = 0;
2024-05-17 16:06:12 +00:00
while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
bytes /= 1024;
i++;
}
2024-05-17 16:06:12 +00:00
snprintf(s, size, "%.2f %s", bytes, units[i]);
return s;
}
2024-05-17 16:06:12 +00:00
/**
* A "coin toss" function that is critical to the proper operation of the
* Skiplist. For example, when `max = 6` this function returns 0 with
* probability 0.5, 1 with 0.25, 2 with 0.125, etc. until 6 with 0.5^7.
*/
2024-05-17 16:06:12 +00:00
static int
toss(size_t max)
{
2024-05-17 16:06:12 +00:00
size_t level = 0;
double probability = 0.5;
2024-05-17 16:06:12 +00:00
double random_value = (double)xorshift32() / RAND_MAX;
while (random_value < probability && level < max) {
level++;
probability *= 0.5;
}
2024-05-17 16:06:12 +00:00
return level;
}
bool recording = true;
2024-05-17 19:52:42 +00:00
static void
2024-05-17 16:06:12 +00:00
record_set_mutation(FILE *out, pgno_t pg)
{
if (recording) {
fprintf(out, "set %lu\n", pg);
}
}
2024-05-17 19:52:42 +00:00
static void
2024-05-17 16:06:12 +00:00
record_clear_mutation(FILE *out, pgno_t pg)
2024-04-28 16:26:31 +00:00
{
2024-05-17 16:06:12 +00:00
if (recording) {
fprintf(out, "clear %lu\n", pg);
2024-04-28 16:26:31 +00:00
}
2024-05-17 16:06:12 +00:00
}
2024-04-28 16:26:31 +00:00
2024-05-17 19:52:42 +00:00
static void
2024-05-17 16:06:12 +00:00
record_take_span_mutation(FILE *out, pgno_t pg, unsigned len)
{
if (recording) {
fprintf(out, "take %lu %u\n", pg, len);
2024-04-28 16:26:31 +00:00
}
2024-05-17 16:06:12 +00:00
}
2024-04-28 16:26:31 +00:00
2024-05-17 19:52:42 +00:00
static void
2024-05-17 16:06:12 +00:00
record_release_span_mutation(FILE *out, pgno_t pg, unsigned len)
{
if (recording) {
fprintf(out, "release %lu %u\n", pg, len);
2024-04-28 16:26:31 +00:00
}
}
2024-05-17 19:52:42 +00:00
static void
__scan_record_offsets(sm_idx_t v[], size_t n, void *aux)
{
FILE *out = (FILE *)aux;
for (size_t i = 0; i < n; i++) {
fprintf(out, "%u ", v[i]);
}
}
2024-05-17 16:06:12 +00:00
2024-05-17 19:52:42 +00:00
static void
record_merge_mutation(FILE *out, void *handle)
2024-04-28 16:26:31 +00:00
{
2024-05-17 19:52:42 +00:00
if (recording) {
sparsemap_t *map = (sparsemap_t *)handle;
fprintf(out, "merge %zu ", sparsemap_get_ending_offset(map));
sparsemap_scan(map, __scan_record_offsets, 0, (void *)out);
fprintf(out, "\n");
}
2024-04-28 16:26:31 +00:00
}
2024-05-17 19:52:42 +00:00
/* sparsemap ------------------------------------------------------------- */
2024-05-17 16:06:12 +00:00
static sparsemap_idx_t
_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
{
2024-05-17 16:06:12 +00:00
do {
sparsemap_idx_t l = sparsemap_set(*map, idx, value);
if (l != idx) {
if (errno == ENOSPC) {
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64);
assert(*map != NULL);
errno = 0;
} else {
assert(false);
}
} else {
return l;
}
} while (true);
2024-04-28 16:26:31 +00:00
}
2024-05-17 16:06:12 +00:00
static void *
__sm_alloc(size_t capacity)
2024-04-28 16:26:31 +00:00
{
2024-05-17 20:45:22 +00:00
sparsemap_t *map = sparsemap(capacity);
assert(map != NULL);
return map;
}
2024-05-17 16:06:12 +00:00
static void
__sm_free(void *handle)
{
2024-05-17 16:06:12 +00:00
sparsemap_t *map = (sparsemap_t *)handle;
free(map);
}
2024-05-17 16:06:12 +00:00
static pgno_t
__sm_set(void **handle, pgno_t pg)
{
2024-05-17 16:06:12 +00:00
sparsemap_t **map = (sparsemap_t **)handle;
return (pgno_t)_sparsemap_set(map, pg, true);
}
2024-05-17 16:06:12 +00:00
static bool
__sm_is_set(void *handle, pgno_t pg)
{
2024-05-17 16:06:12 +00:00
sparsemap_t *map = (sparsemap_t *)handle;
return sparsemap_is_set(map, pg);
}
2024-05-17 16:06:12 +00:00
static pgno_t
__sm_clear(void **handle, pgno_t pg)
{
2024-05-17 16:06:12 +00:00
sparsemap_t **map = (sparsemap_t **)handle;
return (pgno_t)_sparsemap_set(map, pg, false);
}
2024-05-17 16:06:12 +00:00
static pgno_t
__sm_find_span(void *handle, unsigned len)
{
sparsemap_t *map = (sparsemap_t *)handle;
pgno_t pgno = (pgno_t)sparsemap_span(map, 0, len, true);
assert(SPARSEMAP_NOT_FOUND(pgno) == false);
return pgno;
}
2024-05-17 16:06:12 +00:00
static bool
__sm_take_span(void **handle, pgno_t pg, unsigned len)
{
sparsemap_t **map = (sparsemap_t **)handle;
for (pgno_t i = pg; i < pg + len; i++) {
assert(_sparsemap_set(map, i, false) == i);
}
2024-05-17 16:06:12 +00:00
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__sm_release_span(void **handle, pgno_t pg, unsigned len)
{
sparsemap_t **map = (sparsemap_t **)handle;
2024-05-18 01:35:45 +00:00
for (pgno_t i = pg; i < pg + len; i++) {
2024-05-17 16:06:12 +00:00
assert(_sparsemap_set(map, i, true) == i);
}
2024-05-17 16:06:12 +00:00
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__sm_is_span(void *handle, pgno_t pg, unsigned len)
{
sparsemap_t *map = (sparsemap_t *)handle;
for (pgno_t i = pg; i < pg + len; i++) {
if (sparsemap_is_set(map, i) != true) {
return false;
}
}
2024-05-17 16:06:12 +00:00
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__sm_is_empty(void *handle, pgno_t pg, unsigned len)
{
sparsemap_t *map = (sparsemap_t *)handle;
for (pgno_t i = 0; i < len; i++) {
if (sparsemap_is_set(map, pg + i) != false) {
return false;
}
}
2024-05-17 16:06:12 +00:00
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__sm_is_first(void *handle, pgno_t pg, unsigned len)
{
sparsemap_t *map = (sparsemap_t *)handle;
for (sparsemap_idx_t i = 0; i < pg + len; i++) {
sparsemap_idx_t j = 0;
while (sparsemap_is_set(map, i + j) == true && j < len) {
j++;
}
if (j == len) {
return i == pg;
}
}
2024-05-17 16:06:12 +00:00
return false;
}
2024-05-17 19:52:42 +00:00
static bool
__sm_merge(void **handle, void *other_handle)
{
sparsemap_t **map = (sparsemap_t **)handle;
sparsemap_t *other = (sparsemap_t *)other_handle;
do {
int retval = sparsemap_merge(*map, other);
if (retval != 0) {
if (errno == ENOSPC) {
size_t new_size = retval + (64 - (retval % 64)) + 64;
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size);
assert(*map != NULL);
errno = 0;
} else {
assert(false);
}
} else {
break;
}
} while (true);
return true;
}
static size_t
__sm_size(void *handle)
{
sparsemap_t *map = (sparsemap_t *)handle;
return sparsemap_get_size(map);
}
static size_t
__sm_count(void *handle)
{
sparsemap_t *map = (sparsemap_t *)handle;
return sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true);
}
2024-05-17 16:06:12 +00:00
/* midl ------------------------------------------------------------------ */
2024-05-17 16:06:12 +00:00
static void *
__midl_alloc(size_t capacity)
{
MDB_IDL list = mdb_midl_alloc(capacity);
2024-05-17 20:45:22 +00:00
assert(list != NULL);
2024-05-17 16:06:12 +00:00
return (void *)list;
}
2024-05-17 16:06:12 +00:00
static void
__midl_free(void *handle)
{
2024-05-17 16:06:12 +00:00
MDB_IDL list = (MDB_IDL)handle;
mdb_midl_free(list);
}
2024-05-17 16:06:12 +00:00
static pgno_t
__midl_set(void **handle, pgno_t pg)
{
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
if (list[0] + 1 == list[-1]) {
2024-05-17 19:52:42 +00:00
mdb_midl_need(_list, list[-1] + 1);
2024-05-17 20:45:22 +00:00
list = *_list;
}
2024-05-17 16:06:12 +00:00
mdb_midl_insert(list, pg);
return pg;
}
2024-05-17 16:06:12 +00:00
static bool
__midl_is_set(void *handle, pgno_t pg)
{
MDB_IDL list = (MDB_IDL)handle;
pgno_t i = mdb_midl_search(list, pg);
return i <= list[0] && list[i] == pg;
}
2024-05-17 16:06:12 +00:00
static pgno_t
__midl_clear(void **handle, pgno_t pg)
{
2024-05-18 01:35:45 +00:00
MDB_IDL list = *(MDB_IDL *)handle;
2024-05-17 16:06:12 +00:00
unsigned len = list[0];
list[0] = len -= 1;
for (unsigned j = pg - 1; j < len;)
list[++j] = list[++pg];
for (unsigned j = len + 1; j <= list[-1]; j++)
list[j] = 0;
return pg;
}
2024-05-17 16:06:12 +00:00
static pgno_t
__midl_find_span(void *handle, unsigned len)
{
MDB_IDL list = (MDB_IDL)handle;
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
int retry = 1;
unsigned i = 0;
pgno_t pgno = 0, *mop = list;
unsigned n2 = len, mop_len = mop[0];
do {
if (mop_len > n2) {
i = mop_len;
do {
pgno = mop[i];
if (mop[i - n2] == pgno + n2)
goto search_done;
} while (--i > n2);
if (--retry < 0)
break;
}
} while (1);
search_done:;
return pgno;
}
2024-05-17 16:06:12 +00:00
static bool
__midl_take_span(void **handle, pgno_t pg, unsigned len)
{
2024-05-18 01:35:45 +00:00
MDB_IDL list = *(MDB_IDL *)handle;
int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg);
2024-05-17 16:06:12 +00:00
unsigned j, num = len;
pgno_t *mop = list;
unsigned mop_len = mop[0];
mop[0] = mop_len -= num;
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
/* Set all unused values in the array to 0
for (j = mop_len + 1; j <= mop[-1]; j++)
2024-05-17 19:52:42 +00:00
mop[j] = 0; */
2024-05-17 16:06:12 +00:00
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__midl_release_span(void **handle, pgno_t pg, unsigned len)
{
2024-05-17 19:52:42 +00:00
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
2024-05-17 16:06:12 +00:00
if (list[0] + len >= list[-1]) {
2024-05-17 19:52:42 +00:00
mdb_midl_need(_list, list[-1] + len);
2024-05-17 20:45:22 +00:00
list = *_list;
2024-05-17 16:06:12 +00:00
}
for (size_t i = pg; i < pg + len; i++) {
mdb_midl_insert(list, i);
}
2024-05-17 16:06:12 +00:00
mdb_midl_sort(list);
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__midl_is_span(void *handle, pgno_t pg, unsigned len)
{
2024-05-17 16:06:12 +00:00
MDB_IDL list = (MDB_IDL)handle;
pgno_t idx = mdb_midl_search(list, pg);
bool found = idx <= list[0] && list[idx] == pg;
if (!found)
return false;
if (len == 1)
return true;
if (list[len] + 1 != list[len - 1])
return false;
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__midl_is_empty(void *handle, pgno_t pg, unsigned len)
{
2024-05-17 16:06:12 +00:00
MDB_IDL list = (MDB_IDL)handle;
for (pgno_t i = pg; i < pg + len; i++) {
pgno_t idx = mdb_midl_search(list, pg);
bool found = idx <= list[0] && list[idx] == pg;
if (found)
return false;
}
return true;
}
2024-05-17 19:52:42 +00:00
static bool
__midl_merge(void **handle, void *other_handle)
{
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
MDB_IDL other = (MDB_IDL)other_handle;
2024-05-17 20:45:22 +00:00
if (list[0] + other[0] >= list[-1]) {
mdb_midl_need(_list, list[-1] + other[0]);
list = *_list;
}
2024-05-17 19:52:42 +00:00
mdb_midl_append_list(_list, other);
2024-05-17 20:45:22 +00:00
list = *_list;
2024-05-17 19:52:42 +00:00
mdb_midl_sort(list);
return true;
}
static size_t
__midl_size(void *handle)
{
MDB_IDL list = (MDB_IDL)handle;
return list[0] * sizeof(pgno_t);
}
static size_t
__midl_count(void *handle)
{
MDB_IDL list = (MDB_IDL)handle;
return list[0];
}
2024-05-17 16:06:12 +00:00
static bool
__midl_validate(void *handle)
2024-05-03 19:15:39 +00:00
{
2024-05-17 16:06:12 +00:00
MDB_IDL list = (MDB_IDL)handle;
pgno_t id = 1;
while (id < list[0]) {
if (list[id] >= list[id + 1])
2024-05-03 19:15:39 +00:00
return false;
2024-05-17 16:06:12 +00:00
id++;
2024-05-03 19:15:39 +00:00
}
return true;
}
2024-05-17 16:06:12 +00:00
/* roaring --------------------------------------------------------------- */
static void *
__roar_alloc(size_t capacity)
{
2024-05-17 20:45:22 +00:00
roaring_bitmap_t *map = roaring_bitmap_create();
assert(map != NULL);
return map;
2024-05-17 16:06:12 +00:00
}
static void
__roar_free(void *handle)
{
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
roaring_free(rbm);
}
static pgno_t
__roar_set(void **handle, pgno_t pg)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
assert(roaring_bitmap_add_checked(rbm, pg) == true);
return pg;
}
static bool
__roar_is_set(void *handle, pgno_t pg)
{
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
return roaring_bitmap_contains(rbm, pg);
}
static pgno_t
__roar_clear(void **handle, pgno_t pg)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
roaring_bitmap_remove(rbm, pg);
return pg;
}
static pgno_t
__roar_find_span(void *handle, unsigned len)
{
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
uint64_t max = roaring_bitmap_maximum(rbm);
uint64_t offset = roaring_bitmap_minimum(rbm);
do {
if (len == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + len) == len) {
break;
}
2024-05-17 16:06:12 +00:00
offset++;
} while (offset <= max);
return offset;
}
static bool
__roar_take_span(void **handle, pgno_t pg, unsigned len)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
roaring_bitmap_remove_range(rbm, pg, pg + len);
return true;
}
static bool
__roar_release_span(void **handle, pgno_t pg, unsigned len)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
for (size_t i = pg; i < pg + len; i++) {
assert(roaring_bitmap_add_checked(rbm, i) == true);
}
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__roar_is_span(void *handle, pgno_t pg, unsigned len)
{
2024-05-17 16:06:12 +00:00
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
for (pgno_t i = pg; i < pg + len; i++) {
if (roaring_bitmap_contains(rbm, i) != true) {
return false;
}
}
return true;
}
2024-05-17 16:06:12 +00:00
static bool
__roar_is_empty(void *handle, pgno_t pg, unsigned len)
2024-05-03 19:15:39 +00:00
{
2024-05-17 16:06:12 +00:00
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
2024-05-03 19:15:39 +00:00
for (pgno_t i = 0; i < len; i++) {
if (roaring_bitmap_contains(rbm, pg + i) != false) {
return false;
}
}
return true;
}
2024-05-17 19:52:42 +00:00
static bool
__roar_merge(void **handle, void *other_handle)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
roaring_bitmap_t *other = (roaring_bitmap_t *)other_handle;
roaring_bitmap_or_inplace(rbm, other);
return true;
}
static size_t
__roar_size(void *handle)
{
// TODO
return 0;
}
static size_t
__roar_count(void *handle)
{
// TODO
return 0;
}
2024-05-17 16:06:12 +00:00
static bool
__roar_validate(void *handle)
2024-05-02 18:55:04 +00:00
{
2024-05-17 16:06:12 +00:00
roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle;
roaring_bitmap_run_optimize(rbm);
return true;
2024-05-02 18:55:04 +00:00
}
2024-05-17 16:06:12 +00:00
/* ----------------------------------------------------------------------- */
typedef enum { SM, ML, RB } container_impl_t;
typedef struct container {
const char *name;
void *(*alloc)(size_t capacity);
void (*free)(void *handle);
pgno_t (*set)(void **handle, pgno_t pg);
bool (*is_set)(void *handle, pgno_t pg);
pgno_t (*clear)(void **handle, pgno_t pg);
pgno_t (*find_span)(void *handle, unsigned len);
bool (*take_span)(void **handle, pgno_t pg, unsigned len);
bool (*release_span)(void **handle, pgno_t pg, unsigned len);
bool (*is_span)(void *handle, pgno_t pg, unsigned len);
bool (*is_empty)(void *handle, pgno_t pg, unsigned len);
bool (*is_first)(void *handle, pgno_t pg, unsigned len);
2024-05-17 19:52:42 +00:00
bool (*merge)(void **handle, void *other_handle);
size_t (*size)(void *handle);
size_t (*count)(void *handle);
2024-05-17 16:06:12 +00:00
bool (*validate)(void *handle);
} container_t;
// clang-format off
container_t containers[] = {
{ "sparsemap",
.alloc = __sm_alloc,
.free = __sm_free,
.set = __sm_set,
.is_set = __sm_is_set,
.clear = __sm_clear,
.find_span = __sm_find_span,
.take_span = __sm_take_span,
.release_span = __sm_release_span,
.is_span = __sm_is_span,
.is_empty = __sm_is_empty,
.is_first = __sm_is_first,
2024-05-17 19:52:42 +00:00
.merge = __sm_merge,
.size = __sm_size,
.count = __sm_count,
2024-05-17 16:06:12 +00:00
.validate = NULL
},
{ "midl",
.alloc = __midl_alloc,
.free = __midl_free,
.set = __midl_set,
.is_set = __midl_is_set,
.clear = __midl_clear,
.find_span = __midl_find_span,
.take_span = __midl_take_span,
.release_span = __midl_release_span,
.is_span = __midl_is_span,
.is_empty = __midl_is_empty,
.is_first = NULL,
2024-05-17 19:52:42 +00:00
.merge = __midl_merge,
.size = __midl_size,
.count = __midl_count,
2024-05-17 16:06:12 +00:00
.validate = __midl_validate
},
{ "roaring",
.alloc = __roar_alloc,
.free = __roar_free,
.set = __roar_set,
.is_set = __roar_is_set,
.clear = __roar_clear,
.find_span = __roar_find_span,
.take_span = __roar_take_span,
.release_span = __roar_release_span,
.is_span = __roar_is_span,
.is_empty = __roar_is_empty,
.is_first = NULL,
2024-05-17 19:52:42 +00:00
.merge = __roar_merge,
.size = __roar_size,
.count = __roar_count,
2024-05-17 16:06:12 +00:00
.validate = __roar_validate,
},
};
// clang-format on
2024-05-17 19:52:42 +00:00
void *handles[(sizeof((containers)) / sizeof((containers)[0]))];
void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))];
2024-05-17 16:06:12 +00:00
FILE *fp;
#define alloc(type, size) containers[type].alloc(size);
#define cast(type, fn, ...) \
if (containers[type].fn) \
containers[type].fn(handles[type], ##__VA_ARGS__)
#define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__)
2024-05-17 19:52:42 +00:00
#define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__)
2024-05-17 16:06:12 +00:00
#define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++)
#define compare(set) \
for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \
verify_eq(0, handles[0], type, handles[type]); \
}
2024-05-03 19:15:39 +00:00
bool
2024-05-17 16:06:12 +00:00
verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm)
2024-05-03 19:15:39 +00:00
{
uint64_t max = roaring_bitmap_maximum(rbm);
roaring_uint32_iterator_t iter;
roaring_iterator_init(rbm, &iter);
for (uint64_t i = 0; i <= max; i++) {
if (i == iter.current_value) {
assert(sparsemap_is_set(map, i) == true);
roaring_uint32_iterator_advance(&iter);
} else {
assert(sparsemap_is_set(map, i) == false);
}
}
return true;
}
bool
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
{
2024-04-28 16:26:31 +00:00
for (MDB_ID i = 1; i <= list[0]; i++) {
pgno_t pg = list[i];
2024-04-28 16:26:31 +00:00
unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1;
if (skipped) {
for (MDB_ID j = list[i - 1]; j > list[i]; j--) {
if (sparsemap_is_set(map, pg - j) != false) {
__diag("%zu\n", pg - j);
return false;
}
}
}
2024-04-28 16:26:31 +00:00
if (sparsemap_is_set(map, pg) != true) {
__diag("%zu\n", pg);
return false;
2024-04-28 16:26:31 +00:00
}
}
return true;
}
2024-05-17 16:06:12 +00:00
bool
verify_eq(unsigned a, void *ad, unsigned b, void *bd)
2024-05-03 20:07:46 +00:00
{
2024-05-17 16:06:12 +00:00
bool ret = true;
// 'a' should always be a Sparsemap
switch (b) {
case ML:
assert((ret = verify_sm_eq_ml((sparsemap_t *)ad, (MDB_IDL)bd)) == true);
break;
case RB:
assert((ret = verify_sm_eq_rb((sparsemap_t *)ad, (roaring_bitmap_t *)bd)) == true);
break;
default:
break;
}
return ret;
2024-05-03 20:07:46 +00:00
}
2024-04-28 16:26:31 +00:00
td_histogram_t *l_span_loc;
td_histogram_t *b_span_loc;
td_histogram_t *l_span_take;
td_histogram_t *b_span_take;
2024-04-30 17:58:35 +00:00
td_histogram_t *l_span_merge;
td_histogram_t *b_span_merge;
2024-04-28 16:26:31 +00:00
void
2024-05-02 12:55:38 +00:00
stats_header(void)
2024-04-28 16:26:31 +00:00
{
printf(
2024-04-30 17:58:35 +00:00
"timestamp,iterations,idl_cap,idl_used,idl_bytes,sm_cap,sm_used,idl_loc_p50,idl_loc_p75,idl_loc_p90,idl_loc_p99,idl_loc_p999,sm_loc_p50,sm_loc_p75,sm_loc_p90,sm_loc_p99,sm_loc_p999,idl_take_p50,idl_take_p75,idl_take_p90,idl_take_p99,idl_take_p999,sm_take_p50,sm_take_p75,sm_take_p90,sm_take_p99,sm_take_p999,idl_merge_p50,idl_merge_p75,idl_merge_p90,idl_merge_p99,idl_merge_p999,sm_merge_p50,sm_merge_p75,sm_merge_p90,sm_merge_p99,sm_merge_p999\n");
2024-04-28 16:26:31 +00:00
}
void
stats(size_t iterations, sparsemap_t *map, MDB_IDL list)
{
if (iterations < 10)
return;
td_compress(l_span_loc);
td_compress(b_span_loc);
td_compress(l_span_take);
td_compress(b_span_take);
2024-04-30 17:58:35 +00:00
td_compress(l_span_merge);
td_compress(b_span_merge);
2024-04-28 16:26:31 +00:00
2024-04-30 17:58:35 +00:00
printf(
"%f,%zu,%zu,%zu,%zu,%zu,%zu,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f\n",
2024-04-28 16:26:31 +00:00
nsts(), iterations, list[-1], list[0], MDB_IDL_SIZEOF(list), sparsemap_get_capacity(map), sparsemap_get_size(map), td_quantile(l_span_loc, .5),
td_quantile(l_span_loc, .75), td_quantile(l_span_loc, .90), td_quantile(l_span_loc, .99), td_quantile(l_span_loc, .999), td_quantile(b_span_loc, .5),
td_quantile(b_span_loc, .75), td_quantile(b_span_loc, .90), td_quantile(b_span_loc, .99), td_quantile(b_span_loc, .999), td_quantile(l_span_take, .5),
td_quantile(l_span_take, .75), td_quantile(l_span_take, .90), td_quantile(l_span_take, .99), td_quantile(l_span_take, .999), td_quantile(b_span_take, .5),
2024-04-30 17:58:35 +00:00
td_quantile(b_span_take, .75), td_quantile(b_span_take, .90), td_quantile(b_span_take, .99), td_quantile(b_span_take, .999), td_quantile(l_span_merge, .5),
td_quantile(l_span_merge, .75), td_quantile(l_span_merge, .90), td_quantile(l_span_merge, .99), td_quantile(l_span_merge, .999),
td_quantile(b_span_merge, .5), td_quantile(b_span_merge, .75), td_quantile(b_span_merge, .90), td_quantile(b_span_merge, .99),
td_quantile(b_span_merge, .999));
2024-04-28 16:26:31 +00:00
}
#define INITIAL_AMOUNT 1024 * 2
2024-05-17 16:06:12 +00:00
#define SHORT_OPT "r:fa:bh"
#define LONG_OPT "record:,force,amount:,buffer,help"
void
print_usage(const char *program_name)
{
printf("Usage: %s [OPTIONS]\n", program_name);
printf(" -r, --record <file> Path to the file for recording (optional)\n");
printf(" -f, --force Force overwrite of existing file (optional)\n");
printf(" -b, --buffer Disable buffering writes to stdout/err (optional)\n");
printf(" -a, --amount <number> Specify the number of entries to record (must be positive, optional)\n");
printf(" -h, --help Print this help message\n");
}
int
main(int argc, char *argv[])
{
int opt;
const char *record_file = NULL;
int force_flag = 0;
2024-05-17 19:52:42 +00:00
size_t left, amt = INITIAL_AMOUNT;
2024-05-17 16:06:12 +00:00
bool buffer = true;
fp = stdout;
while ((opt = getopt(argc, argv, SHORT_OPT LONG_OPT)) != -1) {
switch (opt) {
case 'r':
record_file = optarg;
break;
case 'f':
force_flag = 1;
break;
case 'b':
buffer = false;
break;
case 'a':
amt = atoi(optarg);
if (amt <= 0) {
fprintf(stderr, "Error: Invalid amount. Amount must be a positive number.\n");
return 1;
}
break;
case 'h':
print_usage(argv[0]);
return 0;
case '?':
fprintf(stderr, "Unknown option: %c\n", optopt);
return 1;
default:
break;
}
}
// Check if record file is specified
if (record_file == NULL) {
2024-05-18 01:35:45 +00:00
recording = true;//TODO
2024-05-17 16:06:12 +00:00
} else {
// Check for existing file without force flag
if (access(record_file, F_OK) == 0 && !force_flag) {
fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file);
return 1;
}
// Open the file for writing (truncate if force flag is set)
fp = fopen(record_file, force_flag ? "w" : "a");
if (fp == NULL) {
perror("Error opening file");
return 1;
}
}
// disable buffering
if (!buffer) {
setvbuf(stdout, NULL, _IONBF, 0);
2024-05-17 19:52:42 +00:00
setvbuf(fp, NULL, _IONBF, 0);
2024-05-17 16:06:12 +00:00
}
unsigned types[] = { SM, ML, RB };
unsigned num_types = (sizeof((types)) / sizeof((types)[0]));
/* Setup: add an amt of bits to each container. */
foreach(types)
{
handles[type] = alloc(type, amt);
for (size_t i = 0; i < amt; i++) {
assert(invoke(type, is_set, i) == false);
assert(mutate(type, set, i) == i);
assert(invoke(type, is_set, i) == true);
}
cast(type, validate);
}
compare(types);
2024-05-17 19:52:42 +00:00
left = amt;
2024-05-17 16:06:12 +00:00
while (true) {
// the an amount [1, 16] of pages to find preferring smaller sizes
unsigned len = toss(15) + 1;
pgno_t loc[num_types];
foreach(types)
{
loc[type] = invoke(type, find_span, len);
}
for (unsigned n = 0; n < num_types; n++) {
foreach(types)
{
assert(invoke(type, is_span, loc[n], len));
}
}
foreach(types)
{
cast(type, validate);
}
unsigned which_loc = (unsigned)xorshift32() % num_types;
foreach(types)
{
assert(mutate(type, take_span, loc[which_loc], len));
cast(type, validate);
}
compare(types);
2024-05-17 19:52:42 +00:00
left -= len;
// Once we've used 1/10th of the free list, let's replenish it a bit.
if (amt - left > amt / 10) {
do {
pgno_t pgno;
size_t len, retries = amt;
// Find a hole in the map to replenish.
do {
len = toss(15) + 1;
pgno = sparsemap_span(handles[SM], 0, len, false);
} while (SPARSEMAP_NOT_FOUND(pgno) && --retries);
if (retries == 0) {
goto larger_please;
}
if (SPARSEMAP_FOUND(pgno)) {
foreach(types)
{
assert(invoke(type, is_empty, pgno, len));
}
compare(types);
foreach(types)
{
assert(invoke(type, is_span, pgno, len) == false);
assert(mutate(type, release_span, pgno, len));
assert(invoke(type, is_span, pgno, len) == true);
cast(type, validate);
}
compare(types);
left += len;
}
} while (amt - left > amt / 100);
}
2024-05-18 01:35:45 +00:00
if (toss(1000) > 800) {
size_t new_offset, new_amt;
2024-05-17 19:52:42 +00:00
pgno_t max;
larger_please:
new_amt = 1024 + (xorshift32() % 2048) + toss(1024);
2024-05-18 01:35:45 +00:00
new_offset = xorshift32() % 4096 + 1024;
2024-05-17 19:52:42 +00:00
max = sparsemap_get_ending_offset(handles[SM]);
// Build a new container to merge with the existing one.
foreach(types)
{
new_handles[type] = alloc(type, new_amt);
for (size_t i = 0; i < new_amt; i++) {
// We don't want to record and we're using new_handles not
// handles, so call fn directly.
2024-05-18 01:35:45 +00:00
assert(containers[type].is_set(new_handles[type], i + new_offset) == false);
containers[type].set(&new_handles[type], i + new_offset);
assert(containers[type].is_set(new_handles[type], i + new_offset) == true);
2024-05-17 19:52:42 +00:00
}
}
foreach(types)
{
assert(mutate(type, merge, new_handles[type]));
cast(type, validate);
}
compare(types);
left += new_amt;
amt += new_amt;
foreach(types)
{
containers[type].free(new_handles[type]);
}
}
2024-05-17 16:06:12 +00:00
}
return 0;
}
#if 0
/*
* A "soak test" that tries to replicate behavior in LMDB for page allocation.
*/
int
2024-05-02 12:55:38 +00:00
main(void)
{
2024-04-28 16:26:31 +00:00
size_t replenish = 0, iterations = 0;
// disable buffering
2024-04-28 16:28:58 +00:00
#ifdef DEBUG
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
2024-04-28 16:28:58 +00:00
#endif
2024-04-28 16:26:31 +00:00
l_span_loc = td_new(100);
b_span_loc = td_new(100);
l_span_take = td_new(100);
b_span_take = td_new(100);
2024-04-30 17:58:35 +00:00
l_span_merge = td_new(100);
b_span_merge = td_new(100);
2024-04-28 16:26:31 +00:00
stats_header();
2024-04-28 16:26:31 +00:00
sparsemap_idx_t amt = INITIAL_AMOUNT;
MDB_IDL list = mdb_midl_alloc(amt);
2024-04-28 16:26:31 +00:00
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
2024-05-03 19:15:39 +00:00
roaring_bitmap_t *rbm = roaring_bitmap_create();
// start with 2GiB of 4KiB free pages to track:
// - MDB_IDL requires one int for each free page
// - Sparsemap will compress the set bits using less memory
mdb_midl_need(&list, amt);
2024-04-28 16:26:31 +00:00
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
// We list every free (unallocated) page in the IDL, while...
mdb_midl_xappend(list, pg);
2024-05-03 19:15:39 +00:00
// ... true (unset in the bitmap) indicates free in the bitmap, ...
assert(_sparsemap_set(&map, pg, true) == pg);
2024-05-03 19:15:39 +00:00
assert(roaring_bitmap_add_checked(rbm, pg));
}
mdb_midl_sort(list);
2024-05-03 19:15:39 +00:00
roaring_bitmap_run_optimize(rbm);
assert(verify_sm_eq_ml(map, list));
2024-05-03 19:15:39 +00:00
assert(verify_sm_eq_rm(map, rbm));
2024-04-28 16:26:31 +00:00
double b, e;
while (1) {
unsigned mi;
2024-05-03 19:15:39 +00:00
pgno_t ml, sl, rl;
// get an amount [1, 16] of pages to find preferring smaller sizes
unsigned n = toss(15) + 1;
// find a set of pages using the MDB_IDL
{
2024-04-28 16:26:31 +00:00
b = nsts();
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
int retry = 1;
2024-04-28 16:26:31 +00:00
unsigned i = 0;
pgno_t pgno = 0, *mop = list;
2024-05-17 16:06:12 +00:00
unsigned n2 = len, mop_len = mop[0];
if (mop_len > n2) {
i = mop_len;
do {
pgno = mop[i];
if (mop[i - n2] == pgno + n2)
goto search_done;
} while (--i > n2);
if (--retry < 0)
break;
}
search_done:;
ml = pgno;
mi = i;
2024-04-28 16:26:31 +00:00
e = nsts();
td_add(l_span_loc, e - b, 1);
}
assert(verify_span_midl(list, ml, n));
assert(verify_span_sparsemap(map, ml, n));
2024-05-03 19:15:39 +00:00
assert(verify_span_roaring(rbm, ml, n));
// find a set of pages using the Sparsemap
{
2024-04-28 16:26:31 +00:00
b = nsts();
pgno_t pgno = sparsemap_span(map, 0, n, true);
assert(SPARSEMAP_NOT_FOUND(pgno) == false);
sl = pgno;
2024-04-28 16:26:31 +00:00
e = nsts();
td_add(b_span_loc, e - b, 1);
2024-05-02 18:55:04 +00:00
assert(verify_sm_is_first_available_span(map, pgno, n, true));
}
assert(verify_span_midl(list, sl, n));
assert(verify_span_sparsemap(map, sl, n));
2024-05-03 19:15:39 +00:00
assert(verify_span_roaring(rbm, sl, n));
// find a set of pages using the Roaring Bitmap
{
b = nsts();
uint64_t max = roaring_bitmap_maximum(rbm);
uint64_t offset = roaring_bitmap_minimum(rbm);
do {
if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) {
break;
}
offset++;
} while (offset <= max);
rl = offset;
e = nsts();
}
/*
if (rl != sl) {
assert(verify_span_midl(list, rl, n));
assert(verify_span_sparsemap(map, rl, n));
assert(verify_span_roaring(rbm, rl, n));
}
*/
assert(rl == sl);
2024-05-03 19:15:39 +00:00
bool prefer_mdb_idl_loc = (bool)xorshift32() % 2;
// acquire the set of pages within the list
2024-05-03 19:15:39 +00:00
if (prefer_mdb_idl_loc) {
2024-04-28 16:26:31 +00:00
b = nsts();
unsigned j, num = n;
int i = mi;
pgno_t *mop = list;
unsigned mop_len = mop[0];
mop[0] = mop_len -= num;
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
2024-04-28 16:26:31 +00:00
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
2024-04-28 16:26:31 +00:00
td_add(l_span_take, e - b, 1);
} else {
2024-04-28 16:26:31 +00:00
b = nsts();
unsigned j, num = n;
int i = mdb_midl_search(list, sl) + num;
pgno_t *mop = list;
unsigned mop_len = mop[0];
mop[0] = mop_len -= num;
/* Move any stragglers down */
for (j = i - num; j < mop_len;)
mop[++j] = mop[++i];
2024-04-28 16:26:31 +00:00
e = nsts();
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
td_add(l_span_take, e - b, 1);
}
// acquire the set of pages within the sparsemap
2024-05-03 19:15:39 +00:00
if (prefer_mdb_idl_loc) {
2024-04-28 16:26:31 +00:00
b = nsts();
for (pgno_t i = ml; i < ml + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
2024-04-28 16:26:31 +00:00
e = nsts();
td_add(b_span_take, e - b, 1);
} else {
2024-04-28 16:26:31 +00:00
b = nsts();
for (pgno_t i = sl; i <= sl + n; i++) {
assert(_sparsemap_set(&map, i, false) == i);
}
2024-04-28 16:26:31 +00:00
e = nsts();
td_add(b_span_take, e - b, 1);
}
2024-05-03 19:15:39 +00:00
// acquire the set of pages within the roaring bitmap
if (prefer_mdb_idl_loc) {
b = nsts();
roaring_bitmap_remove_range(rbm, ml, ml + n);
e = nsts();
} else {
b = nsts();
roaring_bitmap_remove_range(rbm, sl, sl + n);
e = nsts();
}
roaring_bitmap_run_optimize(rbm);
assert(verify_sm_eq_ml(map, list));
2024-05-03 19:15:39 +00:00
assert(verify_sm_eq_rm(map, rbm));
2024-05-03 01:13:17 +00:00
// Once we've used a tenth of the free list, let's replenish it a bit.
if (list[0] < amt / 10) {
do {
2024-05-02 18:55:04 +00:00
pgno_t pgno;
size_t len, retries = amt;
do {
len = toss(15) + 1;
2024-05-02 18:55:04 +00:00
pgno = sparsemap_span(map, 0, len, false);
assert(verify_sm_is_first_available_span(map, pgno, n, false));
2024-04-28 16:26:31 +00:00
//__diag("%zu\t%zu,%zu\n", iterations, replenish, retries);
2024-05-02 18:55:04 +00:00
} while (SPARSEMAP_NOT_FOUND(pgno) && --retries);
2024-04-28 16:26:31 +00:00
if (retries == 0) {
goto larger_please;
}
2024-05-02 18:55:04 +00:00
if (SPARSEMAP_FOUND(pgno)) {
assert(verify_empty_midl(list, pgno, len));
assert(verify_empty_sparsemap(map, pgno, len));
2024-05-03 19:15:39 +00:00
assert(verify_empty_roaring(rbm, pgno, len));
2024-04-28 16:26:31 +00:00
assert(verify_sm_eq_ml(map, list));
2024-05-03 19:15:39 +00:00
assert(verify_sm_eq_rm(map, rbm));
2024-04-28 16:26:31 +00:00
if (list[-1] - list[0] < len) {
mdb_midl_need(&list, list[-1] + len);
2024-04-28 16:26:31 +00:00
}
2024-05-02 18:55:04 +00:00
for (size_t i = pgno; i < pgno + len; i++) {
assert(verify_midl_contains(list, i) == false);
2024-04-28 16:26:31 +00:00
assert(sparsemap_is_set(map, i) == false);
mdb_midl_insert(list, i);
2024-04-28 16:26:31 +00:00
assert(verify_midl_contains(list, i) == true);
assert(_sparsemap_set(&map, i, true) == i);
2024-04-28 16:26:31 +00:00
assert(sparsemap_is_set(map, i) == true);
2024-05-03 19:15:39 +00:00
assert(roaring_bitmap_add_checked(rbm, i) == true);
}
mdb_midl_sort(list);
assert(verify_midl_nodups(list));
2024-05-02 18:55:04 +00:00
assert(verify_span_midl(list, pgno, len));
assert(verify_span_sparsemap(map, pgno, len));
2024-05-03 19:15:39 +00:00
assert(verify_span_roaring(rbm, pgno, len));
}
2024-04-28 16:26:31 +00:00
assert(verify_sm_eq_ml(map, list));
2024-05-03 19:15:39 +00:00
assert(verify_sm_eq_rm(map, rbm));
2024-04-28 16:26:31 +00:00
replenish++;
} while (list[0] < amt - 32);
}
2024-04-28 16:26:31 +00:00
replenish = 0;
2024-04-28 16:26:31 +00:00
// every so often, either ...
if (iterations % 1000 == 0) {
larger_please:;
2024-05-03 01:13:17 +00:00
size_t COUNT = xorshift32() % 3586 + 513;
2024-04-30 18:40:23 +00:00
// ... add some amount of 4KiB pages, or
2024-04-30 17:58:35 +00:00
size_t len = COUNT;
// The largest page is at list[1] because this is a reverse sorted list.
pgno_t pg = list[0] ? list[1] + 1 : 0;
2024-05-03 19:15:39 +00:00
if (true) { // disable shrinking for now... (toss(6) + 1 < 7)
2024-04-30 17:58:35 +00:00
MDB_IDL new_list = mdb_midl_alloc(len);
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
2024-05-03 19:15:39 +00:00
roaring_bitmap_t *new_rbm = roaring_bitmap_create();
2024-04-30 17:58:35 +00:00
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
new_list[i + 1] = gp;
new_list[0]++;
assert(verify_midl_contains(new_list, gp) == true);
assert(_sparsemap_set(&new_map, gp, true) == gp);
assert(sparsemap_is_set(new_map, gp));
2024-05-03 19:15:39 +00:00
assert(roaring_bitmap_add_checked(new_rbm, gp));
assert(roaring_bitmap_contains(new_rbm, gp));
2024-04-30 17:58:35 +00:00
}
assert(verify_sm_eq_ml(new_map, new_list));
2024-05-03 19:15:39 +00:00
assert(verify_sm_eq_rm(new_map, new_rbm));
2024-04-30 17:58:35 +00:00
{
b = nsts();
mdb_midl_append_list(&list, new_list);
mdb_midl_sort(list);
e = nsts();
td_add(l_span_merge, e - b, 1);
}
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(verify_midl_contains(list, gp) == true);
}
{
b = nsts();
2024-05-03 20:07:46 +00:00
_sparsemap_merge(&map, new_map);
2024-04-30 17:58:35 +00:00
e = nsts();
td_add(b_span_merge, e - b, 1);
2024-04-28 16:26:31 +00:00
}
2024-04-30 17:58:35 +00:00
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(sparsemap_is_set(map, gp));
}
2024-04-30 17:58:35 +00:00
free(new_map);
2024-05-03 19:15:39 +00:00
{
b = nsts();
roaring_bitmap_or_inplace(rbm, new_rbm);
e = nsts();
}
for (size_t i = 0; i < len; i++) {
pgno_t gp = (pg + len) - i;
assert(roaring_bitmap_contains(rbm, gp));
}
roaring_free(new_rbm);
} else {
if (list[-1] > INITIAL_AMOUNT) {
2024-04-28 16:26:31 +00:00
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
2024-05-03 19:15:39 +00:00
{
pgno_t pg;
for (size_t i = 0; i < COUNT; i++) {
pg = list[list[0] - i];
assert(sparsemap_is_set(map, pg) == true);
assert(_sparsemap_set(&map, pg, false) == pg);
}
}
{
roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]);
}
{
mdb_midl_shrink_to(&list, list[0] - COUNT);
}
2024-04-28 16:26:31 +00:00
assert(list[list[0]] != pg);
assert(verify_midl_nodups(list));
verify_sm_eq_ml(map, list);
2024-05-03 19:15:39 +00:00
verify_sm_eq_rm(map, rbm);
}
}
}
2024-04-28 16:26:31 +00:00
stats(iterations, map, list);
2024-05-03 01:13:17 +00:00
// printf("\033[K%zu\r", iterations);
iterations++;
}
return 0;
}
2024-05-17 16:06:12 +00:00
#endif