2024-04-26 20:25:17 +00:00
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
2024-04-28 16:26:31 +00:00
|
|
|
#include <stdbool.h>
|
2024-04-26 20:25:17 +00:00
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2024-05-03 19:15:39 +00:00
|
|
|
#include "../include/common.h"
|
|
|
|
#include "../include/roaring.h"
|
2024-04-26 20:25:17 +00:00
|
|
|
#include "../include/sparsemap.h"
|
2024-05-03 19:15:39 +00:00
|
|
|
#include "../include/tdigest.h"
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
/* midl.h ------------------------------------------------------------------ */
|
|
|
|
/** @defgroup idls ID List Management
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
/** A generic unsigned ID number. These were entryIDs in back-bdb.
|
|
|
|
* Preferably it should have the same size as a pointer.
|
|
|
|
*/
|
|
|
|
typedef size_t MDB_ID;
|
|
|
|
|
|
|
|
/** An IDL is an ID List, a sorted array of IDs. The first
|
|
|
|
* element of the array is a counter for how many actual
|
|
|
|
* IDs are in the list. In the original back-bdb code, IDLs are
|
|
|
|
* sorted in ascending order. For libmdb IDLs are sorted in
|
|
|
|
* descending order.
|
|
|
|
*/
|
|
|
|
typedef MDB_ID *MDB_IDL;
|
|
|
|
|
|
|
|
/* IDL sizes - likely should be even bigger
|
|
|
|
* limiting factors: sizeof(ID), thread stack size
|
|
|
|
*/
|
|
|
|
#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
|
|
|
|
#define MDB_IDL_DB_SIZE (1 << MDB_IDL_LOGN)
|
|
|
|
#define MDB_IDL_UM_SIZE (1 << (MDB_IDL_LOGN + 1))
|
|
|
|
|
|
|
|
#define MDB_IDL_DB_MAX (MDB_IDL_DB_SIZE - 1)
|
|
|
|
#define MDB_IDL_UM_MAX (MDB_IDL_UM_SIZE - 1)
|
|
|
|
|
|
|
|
#define MDB_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(MDB_ID))
|
|
|
|
#define MDB_IDL_IS_ZERO(ids) ((ids)[0] == 0)
|
|
|
|
#define MDB_IDL_CPY(dst, src) (memcpy(dst, src, MDB_IDL_SIZEOF(src)))
|
|
|
|
#define MDB_IDL_FIRST(ids) ((ids)[1])
|
|
|
|
#define MDB_IDL_LAST(ids) ((ids)[(ids)[0]])
|
|
|
|
|
|
|
|
/** Current max length of an #mdb_midl_alloc()ed IDL */
|
|
|
|
#define MDB_IDL_ALLOCLEN(ids) ((ids)[-1])
|
|
|
|
|
|
|
|
/** Append ID to IDL. The IDL must be big enough. */
|
|
|
|
#define mdb_midl_xappend(idl, id) \
|
|
|
|
do { \
|
|
|
|
MDB_ID *xidl = (idl), xlen = ++(xidl[0]); \
|
|
|
|
xidl[xlen] = (id); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/** Search for an ID in an IDL.
|
|
|
|
* @param[in] ids The IDL to search.
|
|
|
|
* @param[in] id The ID to search for.
|
|
|
|
* @return The index of the first ID greater than or equal to \b id.
|
|
|
|
*/
|
|
|
|
unsigned mdb_midl_search(MDB_IDL ids, MDB_ID id);
|
|
|
|
|
|
|
|
/** Allocate an IDL.
|
|
|
|
* Allocates memory for an IDL of the given size.
|
|
|
|
* @return IDL on success, NULL on failure.
|
|
|
|
*/
|
|
|
|
MDB_IDL mdb_midl_alloc(int num);
|
|
|
|
|
|
|
|
/** Free an IDL.
|
|
|
|
* @param[in] ids The IDL to free.
|
|
|
|
*/
|
|
|
|
void mdb_midl_free(MDB_IDL ids);
|
|
|
|
|
|
|
|
/** Shrink an IDL.
|
|
|
|
* Return the IDL to the default size if it has grown larger.
|
|
|
|
* @param[in,out] idp Address of the IDL to shrink.
|
|
|
|
*/
|
|
|
|
void mdb_midl_shrink(MDB_IDL *idp);
|
|
|
|
|
|
|
|
/** Shrink an IDL to a specific size.
|
|
|
|
* Resize the IDL to \b size if it is larger.
|
|
|
|
* @param[in,out] idp Address of the IDL to shrink.
|
|
|
|
* @param[in] size Capacity to have once resized.
|
|
|
|
*/
|
|
|
|
void mdb_midl_shrink(MDB_IDL *idp);
|
|
|
|
|
|
|
|
/** Make room for num additional elements in an IDL.
|
|
|
|
* @param[in,out] idp Address of the IDL.
|
|
|
|
* @param[in] num Number of elements to make room for.
|
|
|
|
* @return 0 on success, ENOMEM on failure.
|
|
|
|
*/
|
|
|
|
int mdb_midl_need(MDB_IDL *idp, unsigned num);
|
|
|
|
|
|
|
|
/** Append an ID onto an IDL.
|
|
|
|
* @param[in,out] idp Address of the IDL to append to.
|
|
|
|
* @param[in] id The ID to append.
|
|
|
|
* @return 0 on success, ENOMEM if the IDL is too large.
|
|
|
|
*/
|
|
|
|
int mdb_midl_append(MDB_IDL *idp, MDB_ID id);
|
|
|
|
|
|
|
|
/** Append an IDL onto an IDL.
|
|
|
|
* @param[in,out] idp Address of the IDL to append to.
|
|
|
|
* @param[in] app The IDL to append.
|
|
|
|
* @return 0 on success, ENOMEM if the IDL is too large.
|
|
|
|
*/
|
|
|
|
int mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app);
|
|
|
|
|
|
|
|
/** Append an ID range onto an IDL.
|
|
|
|
* @param[in,out] idp Address of the IDL to append to.
|
|
|
|
* @param[in] id The lowest ID to append.
|
|
|
|
* @param[in] n Number of IDs to append.
|
|
|
|
* @return 0 on success, ENOMEM if the IDL is too large.
|
|
|
|
*/
|
|
|
|
int mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n);
|
|
|
|
|
|
|
|
/** Merge an IDL onto an IDL. The destination IDL must be big enough.
|
|
|
|
* @param[in] idl The IDL to merge into.
|
|
|
|
* @param[in] merge The IDL to merge.
|
|
|
|
*/
|
|
|
|
void mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge);
|
|
|
|
|
|
|
|
/** Sort an IDL.
|
|
|
|
* @param[in,out] ids The IDL to sort.
|
|
|
|
*/
|
|
|
|
void mdb_midl_sort(MDB_IDL ids);
|
|
|
|
|
|
|
|
/* midl.c ------------------------------------------------------------------ */
|
|
|
|
/** @defgroup idls ID List Management
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
#define CMP(x, y) ((x) < (y) ? -1 : (x) > (y))
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
mdb_midl_search(MDB_IDL ids, MDB_ID id)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* binary search of id in ids
|
|
|
|
* if found, returns position of id
|
|
|
|
* if not found, returns first position greater than id
|
|
|
|
*/
|
|
|
|
unsigned base = 0;
|
|
|
|
unsigned cursor = 1;
|
|
|
|
int val = 0;
|
|
|
|
unsigned n = ids[0];
|
|
|
|
|
|
|
|
while (0 < n) {
|
|
|
|
unsigned pivot = n >> 1;
|
|
|
|
cursor = base + pivot + 1;
|
|
|
|
val = CMP(ids[cursor], id);
|
|
|
|
|
|
|
|
if (val < 0) {
|
|
|
|
n = pivot;
|
|
|
|
|
|
|
|
} else if (val > 0) {
|
|
|
|
base = cursor;
|
|
|
|
n -= pivot + 1;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (val > 0) {
|
|
|
|
++cursor;
|
|
|
|
}
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
int
|
|
|
|
mdb_midl_insert(MDB_IDL ids, MDB_ID id)
|
|
|
|
{
|
|
|
|
unsigned x, i;
|
|
|
|
|
|
|
|
x = mdb_midl_search(ids, id);
|
|
|
|
assert(x > 0);
|
|
|
|
|
|
|
|
if (x < 1) {
|
|
|
|
/* internal error */
|
|
|
|
return -2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (x <= ids[0] && ids[x] == id) {
|
|
|
|
/* duplicate */
|
|
|
|
assert(0);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (++ids[0] >= MDB_IDL_DB_MAX) {
|
|
|
|
/* no room */
|
|
|
|
--ids[0];
|
|
|
|
return -2;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/* insert id */
|
|
|
|
for (i = ids[0]; i > x; i--)
|
|
|
|
ids[i] = ids[i - 1];
|
|
|
|
ids[x] = id;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void
|
2024-04-30 17:58:35 +00:00
|
|
|
mdb_midl_pop_n(MDB_IDL ids, unsigned n)
|
2024-04-28 16:26:31 +00:00
|
|
|
{
|
|
|
|
ids[0] = ids[0] - n;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_remove_at(MDB_IDL ids, unsigned idx)
|
2024-04-26 20:25:17 +00:00
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
for (int i = idx - 1; idx < ids[0] - 1;)
|
|
|
|
ids[++i] = ids[++idx];
|
|
|
|
ids[0] = ids[0] - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_remove(MDB_IDL ids, MDB_ID id)
|
|
|
|
{
|
|
|
|
unsigned idx = mdb_midl_search(ids, id);
|
|
|
|
if (idx <= ids[0] && ids[idx] == id)
|
|
|
|
mdb_midl_remove_at(ids, idx);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MDB_IDL
|
|
|
|
mdb_midl_alloc(int num)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = malloc((num + 2) * sizeof(MDB_ID));
|
|
|
|
if (ids) {
|
|
|
|
*ids++ = num;
|
|
|
|
*ids = 0;
|
|
|
|
}
|
|
|
|
return ids;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_free(MDB_IDL ids)
|
|
|
|
{
|
|
|
|
if (ids)
|
|
|
|
free(ids - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_shrink(MDB_IDL *idp)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = *idp;
|
|
|
|
if (*(--ids) > MDB_IDL_UM_MAX && (ids = realloc(ids, (MDB_IDL_UM_MAX + 2) * sizeof(MDB_ID)))) {
|
|
|
|
*ids++ = MDB_IDL_UM_MAX;
|
|
|
|
*idp = ids;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_shrink_to(MDB_IDL *idp, size_t size)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = *idp;
|
|
|
|
if (*(--ids) > size && (ids = realloc(ids, (size + 2) * sizeof(MDB_ID)))) {
|
|
|
|
*ids++ = size;
|
|
|
|
*idp = ids;
|
|
|
|
*idp[0] = *idp[0] > size ? size : *idp[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mdb_midl_grow(MDB_IDL *idp, int num)
|
|
|
|
{
|
|
|
|
MDB_IDL idn = *idp - 1;
|
|
|
|
/* grow it */
|
|
|
|
idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID));
|
|
|
|
if (!idn)
|
|
|
|
return ENOMEM;
|
|
|
|
*idn++ += num;
|
|
|
|
*idp = idn;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
mdb_midl_need(MDB_IDL *idp, unsigned num)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = *idp;
|
|
|
|
num += ids[0];
|
|
|
|
if (num > ids[-1]) {
|
|
|
|
num = (num + num / 4 + (256 + 2)) & -256;
|
|
|
|
if (!(ids = realloc(ids - 1, num * sizeof(MDB_ID))))
|
|
|
|
return ENOMEM;
|
|
|
|
*ids++ = num - 2;
|
|
|
|
*idp = ids;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
mdb_midl_append(MDB_IDL *idp, MDB_ID id)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = *idp;
|
|
|
|
/* Too big? */
|
|
|
|
if (ids[0] >= ids[-1]) {
|
|
|
|
if (mdb_midl_grow(idp, MDB_IDL_UM_MAX))
|
|
|
|
return ENOMEM;
|
|
|
|
ids = *idp;
|
|
|
|
}
|
|
|
|
ids[0]++;
|
|
|
|
ids[ids[0]] = id;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
mdb_midl_append_list(MDB_IDL *idp, MDB_IDL app)
|
|
|
|
{
|
|
|
|
MDB_IDL ids = *idp;
|
|
|
|
/* Too big? */
|
|
|
|
if (ids[0] + app[0] >= ids[-1]) {
|
|
|
|
if (mdb_midl_grow(idp, app[0]))
|
|
|
|
return ENOMEM;
|
|
|
|
ids = *idp;
|
|
|
|
}
|
|
|
|
memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(MDB_ID));
|
|
|
|
ids[0] += app[0];
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
mdb_midl_append_range(MDB_IDL *idp, MDB_ID id, unsigned n)
|
|
|
|
{
|
|
|
|
MDB_ID *ids = *idp, len = ids[0];
|
|
|
|
/* Too big? */
|
|
|
|
if (len + n > ids[-1]) {
|
|
|
|
if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX))
|
|
|
|
return ENOMEM;
|
|
|
|
ids = *idp;
|
|
|
|
}
|
|
|
|
ids[0] = len + n;
|
|
|
|
ids += len;
|
|
|
|
while (n)
|
|
|
|
ids[n--] = id++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_xmerge(MDB_IDL idl, MDB_IDL merge)
|
|
|
|
{
|
|
|
|
MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k;
|
|
|
|
idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */
|
|
|
|
old_id = idl[j];
|
|
|
|
while (i) {
|
|
|
|
merge_id = merge[i--];
|
|
|
|
for (; old_id < merge_id; old_id = idl[--j])
|
|
|
|
idl[k--] = old_id;
|
|
|
|
idl[k--] = merge_id;
|
|
|
|
}
|
|
|
|
idl[0] = total;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Quicksort + Insertion sort for small arrays */
|
|
|
|
|
|
|
|
#define SMALL 8
|
|
|
|
#define MIDL_SWAP(a, b) \
|
|
|
|
{ \
|
|
|
|
itmp = (a); \
|
|
|
|
(a) = (b); \
|
|
|
|
(b) = itmp; \
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mdb_midl_sort(MDB_IDL ids)
|
|
|
|
{
|
|
|
|
/* Max possible depth of int-indexed tree * 2 items/level */
|
|
|
|
int istack[sizeof(int) * CHAR_BIT * 2];
|
|
|
|
int i, j, k, l, ir, jstack;
|
|
|
|
MDB_ID a, itmp;
|
|
|
|
|
|
|
|
ir = (int)ids[0];
|
|
|
|
l = 1;
|
|
|
|
jstack = 0;
|
|
|
|
for (;;) {
|
|
|
|
if (ir - l < SMALL) { /* Insertion sort */
|
|
|
|
for (j = l + 1; j <= ir; j++) {
|
|
|
|
a = ids[j];
|
|
|
|
for (i = j - 1; i >= 1; i--) {
|
|
|
|
if (ids[i] >= a)
|
|
|
|
break;
|
|
|
|
ids[i + 1] = ids[i];
|
|
|
|
}
|
|
|
|
ids[i + 1] = a;
|
|
|
|
}
|
|
|
|
if (jstack == 0)
|
|
|
|
break;
|
|
|
|
ir = istack[jstack--];
|
|
|
|
l = istack[jstack--];
|
|
|
|
} else {
|
|
|
|
k = (l + ir) >> 1; /* Choose median of left, center, right */
|
|
|
|
MIDL_SWAP(ids[k], ids[l + 1]);
|
|
|
|
if (ids[l] < ids[ir]) {
|
|
|
|
MIDL_SWAP(ids[l], ids[ir]);
|
|
|
|
}
|
|
|
|
if (ids[l + 1] < ids[ir]) {
|
|
|
|
MIDL_SWAP(ids[l + 1], ids[ir]);
|
|
|
|
}
|
|
|
|
if (ids[l] < ids[l + 1]) {
|
|
|
|
MIDL_SWAP(ids[l], ids[l + 1]);
|
|
|
|
}
|
|
|
|
i = l + 1;
|
|
|
|
j = ir;
|
|
|
|
a = ids[l + 1];
|
|
|
|
for (;;) {
|
|
|
|
do
|
|
|
|
i++;
|
|
|
|
while (ids[i] > a);
|
|
|
|
do
|
|
|
|
j--;
|
|
|
|
while (ids[j] < a);
|
|
|
|
if (j < i)
|
|
|
|
break;
|
|
|
|
MIDL_SWAP(ids[i], ids[j]);
|
|
|
|
}
|
|
|
|
ids[l + 1] = ids[j];
|
|
|
|
ids[j] = a;
|
|
|
|
jstack += 2;
|
|
|
|
if (ir - i + 1 >= j - l) {
|
|
|
|
istack[jstack] = ir;
|
|
|
|
istack[jstack - 1] = i;
|
|
|
|
ir = j - 1;
|
|
|
|
} else {
|
|
|
|
istack[jstack] = j - 1;
|
|
|
|
istack[jstack - 1] = l;
|
|
|
|
l = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
|
|
typedef MDB_ID pgno_t;
|
|
|
|
|
|
|
|
char *
|
|
|
|
bytes_as(double bytes, char *s, size_t size)
|
|
|
|
{
|
|
|
|
const char *units[] = { "b", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
|
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
while (bytes >= 1024 && i < sizeof(units) / sizeof(units[0]) - 1) {
|
|
|
|
bytes /= 1024;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
snprintf(s, size, "%.2f %s", bytes, units[i]);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A "coin toss" function that is critical to the proper operation of the
|
|
|
|
* Skiplist. For example, when `max = 6` this function returns 0 with
|
|
|
|
* probability 0.5, 1 with 0.25, 2 with 0.125, etc. until 6 with 0.5^7.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
toss(size_t max)
|
|
|
|
{
|
|
|
|
size_t level = 0;
|
|
|
|
double probability = 0.5;
|
|
|
|
|
|
|
|
double random_value = (double)xorshift32() / RAND_MAX;
|
|
|
|
while (random_value < probability && level < max) {
|
|
|
|
level++;
|
|
|
|
probability *= 0.5;
|
|
|
|
}
|
|
|
|
return level;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
verify_midl_contains(MDB_IDL list, pgno_t pg)
|
|
|
|
{
|
|
|
|
unsigned idx = mdb_midl_search(list, pg);
|
|
|
|
return idx <= list[0] && list[idx] == pg;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
verify_midl_nodups(MDB_IDL list)
|
|
|
|
{
|
|
|
|
pgno_t id = 1;
|
|
|
|
while (id < list[0]) {
|
|
|
|
if (list[id] == list[id + 1])
|
|
|
|
return false;
|
|
|
|
id++;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len)
|
|
|
|
{
|
|
|
|
pgno_t idx = mdb_midl_search(list, pg);
|
|
|
|
bool found = idx <= list[0] && list[idx] == pg;
|
|
|
|
if (!found)
|
|
|
|
return false;
|
|
|
|
if (len == 1)
|
|
|
|
return true;
|
|
|
|
if (list[len] + 1 != list[len - 1])
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len)
|
|
|
|
{
|
|
|
|
for (pgno_t i = pg; i < pg + len; i++) {
|
|
|
|
pgno_t idx = mdb_midl_search(list, pg);
|
|
|
|
bool found = idx <= list[0] && list[idx] == pg;
|
|
|
|
if (found)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-05-03 19:15:39 +00:00
|
|
|
bool
|
|
|
|
verify_span_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
|
|
|
|
{
|
|
|
|
for (pgno_t i = pg; i < pg + len; i++) {
|
|
|
|
if (roaring_bitmap_contains(rbm, i) != true) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-04-26 20:25:17 +00:00
|
|
|
bool
|
|
|
|
verify_span_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
|
|
|
|
{
|
|
|
|
for (pgno_t i = pg; i < pg + len; i++) {
|
|
|
|
if (sparsemap_is_set(map, i) != true) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len)
|
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
for (pgno_t i = 0; i < len; i++) {
|
|
|
|
if (sparsemap_is_set(map, pg + i) != false) {
|
2024-04-26 20:25:17 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-05-03 19:15:39 +00:00
|
|
|
bool
|
|
|
|
verify_empty_roaring(roaring_bitmap_t *rbm, pgno_t pg, unsigned len)
|
|
|
|
{
|
|
|
|
for (pgno_t i = 0; i < len; i++) {
|
|
|
|
if (roaring_bitmap_contains(rbm, pg + i) != false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-05-02 18:55:04 +00:00
|
|
|
bool
|
|
|
|
verify_sm_is_first_available_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
|
|
|
|
{
|
|
|
|
for (sparsemap_idx_t i = 0; i < idx + len; i++) {
|
|
|
|
sparsemap_idx_t j = 0;
|
2024-05-03 01:13:17 +00:00
|
|
|
while (sparsemap_is_set(map, i + j) == value && j < len) {
|
2024-05-02 18:55:04 +00:00
|
|
|
j++;
|
|
|
|
}
|
2024-05-03 01:35:37 +00:00
|
|
|
if (j == len) {
|
|
|
|
return i == idx;
|
|
|
|
}
|
2024-05-02 18:55:04 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-05-03 19:15:39 +00:00
|
|
|
bool
|
|
|
|
verify_sm_eq_rm(sparsemap_t *map, roaring_bitmap_t *rbm)
|
|
|
|
{
|
|
|
|
uint64_t max = roaring_bitmap_maximum(rbm);
|
|
|
|
roaring_uint32_iterator_t iter;
|
|
|
|
roaring_iterator_init(rbm, &iter);
|
|
|
|
for (uint64_t i = 0; i <= max; i++) {
|
|
|
|
if (i == iter.current_value) {
|
|
|
|
assert(sparsemap_is_set(map, i) == true);
|
|
|
|
roaring_uint32_iterator_advance(&iter);
|
|
|
|
} else {
|
|
|
|
assert(sparsemap_is_set(map, i) == false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-04-26 20:25:17 +00:00
|
|
|
bool
|
|
|
|
verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list)
|
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
for (MDB_ID i = 1; i <= list[0]; i++) {
|
2024-04-26 20:25:17 +00:00
|
|
|
pgno_t pg = list[i];
|
2024-04-28 16:26:31 +00:00
|
|
|
unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1;
|
|
|
|
if (skipped) {
|
|
|
|
for (MDB_ID j = list[i - 1]; j > list[i]; j--) {
|
|
|
|
if (sparsemap_is_set(map, pg - j) != false) {
|
|
|
|
__diag("%zu\n", pg - j);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
if (sparsemap_is_set(map, pg) != true) {
|
|
|
|
__diag("%zu\n", pg);
|
2024-04-26 20:25:17 +00:00
|
|
|
return false;
|
2024-04-28 16:26:31 +00:00
|
|
|
}
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
sparsemap_idx_t
|
|
|
|
_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
|
|
|
|
{
|
2024-04-30 17:58:35 +00:00
|
|
|
do {
|
|
|
|
sparsemap_idx_t l = sparsemap_set(*map, idx, value);
|
|
|
|
if (l != idx) {
|
|
|
|
if (errno == ENOSPC) {
|
2024-05-06 19:43:47 +00:00
|
|
|
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64);
|
2024-04-30 17:58:35 +00:00
|
|
|
assert(*map != NULL);
|
|
|
|
errno = 0;
|
|
|
|
} else {
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
} while (true);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
|
2024-05-03 20:07:46 +00:00
|
|
|
sparsemap_idx_t
|
|
|
|
_sparsemap_merge(sparsemap_t **map, sparsemap_t *other)
|
|
|
|
{
|
|
|
|
do {
|
2024-05-04 13:45:43 +00:00
|
|
|
int retval = sparsemap_merge(*map, other);
|
2024-05-03 20:07:46 +00:00
|
|
|
if (retval != 0) {
|
|
|
|
if (errno == ENOSPC) {
|
2024-05-09 19:50:56 +00:00
|
|
|
size_t new_size = retval + (64 - (retval % 64)) + 64;
|
2024-05-06 19:43:47 +00:00
|
|
|
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size);
|
2024-05-03 20:07:46 +00:00
|
|
|
assert(*map != NULL);
|
|
|
|
errno = 0;
|
|
|
|
} else {
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
} while (true);
|
|
|
|
}
|
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
td_histogram_t *l_span_loc;
|
|
|
|
td_histogram_t *b_span_loc;
|
|
|
|
td_histogram_t *l_span_take;
|
|
|
|
td_histogram_t *b_span_take;
|
2024-04-30 17:58:35 +00:00
|
|
|
td_histogram_t *l_span_merge;
|
|
|
|
td_histogram_t *b_span_merge;
|
2024-04-28 16:26:31 +00:00
|
|
|
|
|
|
|
void
|
2024-05-02 12:55:38 +00:00
|
|
|
stats_header(void)
|
2024-04-28 16:26:31 +00:00
|
|
|
{
|
|
|
|
printf(
|
2024-04-30 17:58:35 +00:00
|
|
|
"timestamp,iterations,idl_cap,idl_used,idl_bytes,sm_cap,sm_used,idl_loc_p50,idl_loc_p75,idl_loc_p90,idl_loc_p99,idl_loc_p999,sm_loc_p50,sm_loc_p75,sm_loc_p90,sm_loc_p99,sm_loc_p999,idl_take_p50,idl_take_p75,idl_take_p90,idl_take_p99,idl_take_p999,sm_take_p50,sm_take_p75,sm_take_p90,sm_take_p99,sm_take_p999,idl_merge_p50,idl_merge_p75,idl_merge_p90,idl_merge_p99,idl_merge_p999,sm_merge_p50,sm_merge_p75,sm_merge_p90,sm_merge_p99,sm_merge_p999\n");
|
2024-04-28 16:26:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
stats(size_t iterations, sparsemap_t *map, MDB_IDL list)
|
|
|
|
{
|
|
|
|
if (iterations < 10)
|
|
|
|
return;
|
|
|
|
|
|
|
|
td_compress(l_span_loc);
|
|
|
|
td_compress(b_span_loc);
|
|
|
|
td_compress(l_span_take);
|
|
|
|
td_compress(b_span_take);
|
2024-04-30 17:58:35 +00:00
|
|
|
td_compress(l_span_merge);
|
|
|
|
td_compress(b_span_merge);
|
2024-04-28 16:26:31 +00:00
|
|
|
|
2024-04-30 17:58:35 +00:00
|
|
|
printf(
|
|
|
|
"%f,%zu,%zu,%zu,%zu,%zu,%zu,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f,%.10f\n",
|
2024-04-28 16:26:31 +00:00
|
|
|
nsts(), iterations, list[-1], list[0], MDB_IDL_SIZEOF(list), sparsemap_get_capacity(map), sparsemap_get_size(map), td_quantile(l_span_loc, .5),
|
|
|
|
td_quantile(l_span_loc, .75), td_quantile(l_span_loc, .90), td_quantile(l_span_loc, .99), td_quantile(l_span_loc, .999), td_quantile(b_span_loc, .5),
|
|
|
|
td_quantile(b_span_loc, .75), td_quantile(b_span_loc, .90), td_quantile(b_span_loc, .99), td_quantile(b_span_loc, .999), td_quantile(l_span_take, .5),
|
|
|
|
td_quantile(l_span_take, .75), td_quantile(l_span_take, .90), td_quantile(l_span_take, .99), td_quantile(l_span_take, .999), td_quantile(b_span_take, .5),
|
2024-04-30 17:58:35 +00:00
|
|
|
td_quantile(b_span_take, .75), td_quantile(b_span_take, .90), td_quantile(b_span_take, .99), td_quantile(b_span_take, .999), td_quantile(l_span_merge, .5),
|
|
|
|
td_quantile(l_span_merge, .75), td_quantile(l_span_merge, .90), td_quantile(l_span_merge, .99), td_quantile(l_span_merge, .999),
|
|
|
|
td_quantile(b_span_merge, .5), td_quantile(b_span_merge, .75), td_quantile(b_span_merge, .90), td_quantile(b_span_merge, .99),
|
|
|
|
td_quantile(b_span_merge, .999));
|
2024-04-28 16:26:31 +00:00
|
|
|
}
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
#define INITIAL_AMOUNT 1024 * 2
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A "soak test" that tries to replicate behavior in LMDB for page allocation.
|
|
|
|
*/
|
|
|
|
int
|
2024-05-02 12:55:38 +00:00
|
|
|
main(void)
|
2024-04-26 20:25:17 +00:00
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
size_t replenish = 0, iterations = 0;
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
// disable buffering
|
2024-04-28 16:28:58 +00:00
|
|
|
#ifdef DEBUG
|
2024-04-26 20:25:17 +00:00
|
|
|
setvbuf(stdout, NULL, _IONBF, 0);
|
|
|
|
setvbuf(stderr, NULL, _IONBF, 0);
|
2024-04-28 16:28:58 +00:00
|
|
|
#endif
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
l_span_loc = td_new(100);
|
|
|
|
b_span_loc = td_new(100);
|
|
|
|
l_span_take = td_new(100);
|
|
|
|
b_span_take = td_new(100);
|
2024-04-30 17:58:35 +00:00
|
|
|
l_span_merge = td_new(100);
|
|
|
|
b_span_merge = td_new(100);
|
2024-04-28 16:26:31 +00:00
|
|
|
|
|
|
|
stats_header();
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
sparsemap_idx_t amt = INITIAL_AMOUNT;
|
2024-04-26 20:25:17 +00:00
|
|
|
MDB_IDL list = mdb_midl_alloc(amt);
|
2024-04-28 16:26:31 +00:00
|
|
|
sparsemap_t *map = sparsemap(INITIAL_AMOUNT);
|
2024-05-03 19:15:39 +00:00
|
|
|
roaring_bitmap_t *rbm = roaring_bitmap_create();
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
// start with 2GiB of 4KiB free pages to track:
|
|
|
|
// - MDB_IDL requires one int for each free page
|
|
|
|
// - Sparsemap will compress the set bits using less memory
|
|
|
|
mdb_midl_need(&list, amt);
|
2024-04-28 16:26:31 +00:00
|
|
|
for (sparsemap_idx_t pg = 0; pg < amt; pg++) {
|
2024-04-26 20:25:17 +00:00
|
|
|
// We list every free (unallocated) page in the IDL, while...
|
|
|
|
mdb_midl_xappend(list, pg);
|
2024-05-03 19:15:39 +00:00
|
|
|
// ... true (unset in the bitmap) indicates free in the bitmap, ...
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(_sparsemap_set(&map, pg, true) == pg);
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(roaring_bitmap_add_checked(rbm, pg));
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
mdb_midl_sort(list);
|
2024-05-03 19:15:39 +00:00
|
|
|
roaring_bitmap_run_optimize(rbm);
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(verify_sm_eq_ml(map, list));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_sm_eq_rm(map, rbm));
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
double b, e;
|
2024-04-26 20:25:17 +00:00
|
|
|
while (1) {
|
|
|
|
unsigned mi;
|
2024-05-03 19:15:39 +00:00
|
|
|
pgno_t ml, sl, rl;
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
// get an amount [1, 16] of pages to find preferring smaller sizes
|
|
|
|
unsigned n = toss(15) + 1;
|
|
|
|
|
|
|
|
// find a set of pages using the MDB_IDL
|
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
/* Seek a big enough contiguous page range. Prefer
|
|
|
|
* pages at the tail, just truncating the list.
|
|
|
|
*/
|
|
|
|
int retry = 1;
|
2024-04-28 16:26:31 +00:00
|
|
|
unsigned i = 0;
|
2024-04-26 20:25:17 +00:00
|
|
|
pgno_t pgno = 0, *mop = list;
|
|
|
|
unsigned n2 = n, mop_len = mop[0];
|
|
|
|
if (mop_len > n2) {
|
|
|
|
i = mop_len;
|
|
|
|
do {
|
|
|
|
pgno = mop[i];
|
|
|
|
if (mop[i - n2] == pgno + n2)
|
|
|
|
goto search_done;
|
|
|
|
} while (--i > n2);
|
|
|
|
if (--retry < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
search_done:;
|
|
|
|
ml = pgno;
|
|
|
|
mi = i;
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
|
|
|
td_add(l_span_loc, e - b, 1);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
assert(verify_span_midl(list, ml, n));
|
|
|
|
assert(verify_span_sparsemap(map, ml, n));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_span_roaring(rbm, ml, n));
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
// find a set of pages using the Sparsemap
|
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
pgno_t pgno = sparsemap_span(map, 0, n, true);
|
|
|
|
assert(SPARSEMAP_NOT_FOUND(pgno) == false);
|
|
|
|
sl = pgno;
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
|
|
|
td_add(b_span_loc, e - b, 1);
|
2024-05-02 18:55:04 +00:00
|
|
|
assert(verify_sm_is_first_available_span(map, pgno, n, true));
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
assert(verify_span_midl(list, sl, n));
|
|
|
|
assert(verify_span_sparsemap(map, sl, n));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_span_roaring(rbm, sl, n));
|
|
|
|
|
|
|
|
// find a set of pages using the Roaring Bitmap
|
|
|
|
{
|
|
|
|
b = nsts();
|
|
|
|
uint64_t max = roaring_bitmap_maximum(rbm);
|
|
|
|
uint64_t offset = roaring_bitmap_minimum(rbm);
|
|
|
|
do {
|
|
|
|
if (n == 1 || roaring_bitmap_range_cardinality(rbm, offset, offset + n) == n) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
offset++;
|
|
|
|
} while (offset <= max);
|
|
|
|
rl = offset;
|
|
|
|
e = nsts();
|
|
|
|
}
|
2024-05-03 19:20:35 +00:00
|
|
|
/*
|
|
|
|
if (rl != sl) {
|
|
|
|
assert(verify_span_midl(list, rl, n));
|
|
|
|
assert(verify_span_sparsemap(map, rl, n));
|
|
|
|
assert(verify_span_roaring(rbm, rl, n));
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
assert(rl == sl);
|
2024-05-03 19:15:39 +00:00
|
|
|
|
|
|
|
bool prefer_mdb_idl_loc = (bool)xorshift32() % 2;
|
2024-04-26 20:25:17 +00:00
|
|
|
|
|
|
|
// acquire the set of pages within the list
|
2024-05-03 19:15:39 +00:00
|
|
|
if (prefer_mdb_idl_loc) {
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
unsigned j, num = n;
|
|
|
|
int i = mi;
|
|
|
|
pgno_t *mop = list;
|
|
|
|
unsigned mop_len = mop[0];
|
|
|
|
|
|
|
|
mop[0] = mop_len -= num;
|
|
|
|
/* Move any stragglers down */
|
|
|
|
for (j = i - num; j < mop_len;)
|
|
|
|
mop[++j] = mop[++i];
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
for (j = mop_len + 1; j <= mop[-1]; j++)
|
|
|
|
mop[j] = 0;
|
2024-04-28 16:26:31 +00:00
|
|
|
td_add(l_span_take, e - b, 1);
|
2024-04-26 20:25:17 +00:00
|
|
|
} else {
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
unsigned j, num = n;
|
|
|
|
int i = mdb_midl_search(list, sl) + num;
|
|
|
|
pgno_t *mop = list;
|
|
|
|
unsigned mop_len = mop[0];
|
|
|
|
|
|
|
|
mop[0] = mop_len -= num;
|
|
|
|
/* Move any stragglers down */
|
|
|
|
for (j = i - num; j < mop_len;)
|
|
|
|
mop[++j] = mop[++i];
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
|
|
|
for (j = mop_len + 1; j <= mop[-1]; j++)
|
|
|
|
mop[j] = 0;
|
|
|
|
td_add(l_span_take, e - b, 1);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// acquire the set of pages within the sparsemap
|
2024-05-03 19:15:39 +00:00
|
|
|
if (prefer_mdb_idl_loc) {
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
for (pgno_t i = ml; i < ml + n; i++) {
|
|
|
|
assert(_sparsemap_set(&map, i, false) == i);
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
|
|
|
td_add(b_span_take, e - b, 1);
|
2024-04-26 20:25:17 +00:00
|
|
|
} else {
|
2024-04-28 16:26:31 +00:00
|
|
|
b = nsts();
|
2024-04-26 20:25:17 +00:00
|
|
|
for (pgno_t i = sl; i <= sl + n; i++) {
|
|
|
|
assert(_sparsemap_set(&map, i, false) == i);
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
e = nsts();
|
|
|
|
td_add(b_span_take, e - b, 1);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
|
2024-05-03 19:15:39 +00:00
|
|
|
// acquire the set of pages within the roaring bitmap
|
|
|
|
if (prefer_mdb_idl_loc) {
|
|
|
|
b = nsts();
|
|
|
|
roaring_bitmap_remove_range(rbm, ml, ml + n);
|
|
|
|
e = nsts();
|
|
|
|
} else {
|
|
|
|
b = nsts();
|
|
|
|
roaring_bitmap_remove_range(rbm, sl, sl + n);
|
|
|
|
e = nsts();
|
|
|
|
}
|
|
|
|
roaring_bitmap_run_optimize(rbm);
|
|
|
|
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(verify_sm_eq_ml(map, list));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_sm_eq_rm(map, rbm));
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-05-03 01:13:17 +00:00
|
|
|
// Once we've used a tenth of the free list, let's replenish it a bit.
|
|
|
|
if (list[0] < amt / 10) {
|
2024-04-26 20:25:17 +00:00
|
|
|
do {
|
2024-05-02 18:55:04 +00:00
|
|
|
pgno_t pgno;
|
2024-04-26 20:25:17 +00:00
|
|
|
size_t len, retries = amt;
|
|
|
|
do {
|
|
|
|
len = toss(15) + 1;
|
2024-05-02 18:55:04 +00:00
|
|
|
pgno = sparsemap_span(map, 0, len, false);
|
|
|
|
assert(verify_sm_is_first_available_span(map, pgno, n, false));
|
2024-04-28 16:26:31 +00:00
|
|
|
//__diag("%zu\t%zu,%zu\n", iterations, replenish, retries);
|
2024-05-02 18:55:04 +00:00
|
|
|
} while (SPARSEMAP_NOT_FOUND(pgno) && --retries);
|
2024-04-28 16:26:31 +00:00
|
|
|
if (retries == 0) {
|
|
|
|
goto larger_please;
|
|
|
|
}
|
2024-05-02 18:55:04 +00:00
|
|
|
if (SPARSEMAP_FOUND(pgno)) {
|
|
|
|
assert(verify_empty_midl(list, pgno, len));
|
|
|
|
assert(verify_empty_sparsemap(map, pgno, len));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_empty_roaring(rbm, pgno, len));
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(verify_sm_eq_ml(map, list));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_sm_eq_rm(map, rbm));
|
2024-04-28 16:26:31 +00:00
|
|
|
if (list[-1] - list[0] < len) {
|
2024-04-26 20:25:17 +00:00
|
|
|
mdb_midl_need(&list, list[-1] + len);
|
2024-04-28 16:26:31 +00:00
|
|
|
}
|
2024-05-02 18:55:04 +00:00
|
|
|
for (size_t i = pgno; i < pgno + len; i++) {
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(verify_midl_contains(list, i) == false);
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(sparsemap_is_set(map, i) == false);
|
2024-04-26 20:25:17 +00:00
|
|
|
mdb_midl_insert(list, i);
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(verify_midl_contains(list, i) == true);
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(_sparsemap_set(&map, i, true) == i);
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(sparsemap_is_set(map, i) == true);
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(roaring_bitmap_add_checked(rbm, i) == true);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
mdb_midl_sort(list);
|
|
|
|
assert(verify_midl_nodups(list));
|
2024-05-02 18:55:04 +00:00
|
|
|
assert(verify_span_midl(list, pgno, len));
|
|
|
|
assert(verify_span_sparsemap(map, pgno, len));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_span_roaring(rbm, pgno, len));
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(verify_sm_eq_ml(map, list));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_sm_eq_rm(map, rbm));
|
2024-04-28 16:26:31 +00:00
|
|
|
replenish++;
|
2024-04-26 20:25:17 +00:00
|
|
|
} while (list[0] < amt - 32);
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
replenish = 0;
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
// every so often, either ...
|
|
|
|
if (iterations % 1000 == 0) {
|
|
|
|
larger_please:;
|
2024-05-03 01:13:17 +00:00
|
|
|
size_t COUNT = xorshift32() % 3586 + 513;
|
2024-04-30 18:40:23 +00:00
|
|
|
// ... add some amount of 4KiB pages, or
|
2024-04-30 17:58:35 +00:00
|
|
|
size_t len = COUNT;
|
|
|
|
// The largest page is at list[1] because this is a reverse sorted list.
|
|
|
|
pgno_t pg = list[0] ? list[1] + 1 : 0;
|
2024-05-03 19:15:39 +00:00
|
|
|
if (true) { // disable shrinking for now... (toss(6) + 1 < 7)
|
2024-04-30 17:58:35 +00:00
|
|
|
MDB_IDL new_list = mdb_midl_alloc(len);
|
|
|
|
sparsemap_t *new_map = sparsemap(INITIAL_AMOUNT);
|
2024-05-03 19:15:39 +00:00
|
|
|
roaring_bitmap_t *new_rbm = roaring_bitmap_create();
|
2024-04-30 17:58:35 +00:00
|
|
|
for (size_t i = 0; i < len; i++) {
|
|
|
|
pgno_t gp = (pg + len) - i;
|
|
|
|
new_list[i + 1] = gp;
|
|
|
|
new_list[0]++;
|
|
|
|
assert(verify_midl_contains(new_list, gp) == true);
|
|
|
|
assert(_sparsemap_set(&new_map, gp, true) == gp);
|
|
|
|
assert(sparsemap_is_set(new_map, gp));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(roaring_bitmap_add_checked(new_rbm, gp));
|
|
|
|
assert(roaring_bitmap_contains(new_rbm, gp));
|
2024-04-30 17:58:35 +00:00
|
|
|
}
|
|
|
|
assert(verify_sm_eq_ml(new_map, new_list));
|
2024-05-03 19:15:39 +00:00
|
|
|
assert(verify_sm_eq_rm(new_map, new_rbm));
|
2024-04-30 17:58:35 +00:00
|
|
|
{
|
|
|
|
b = nsts();
|
|
|
|
mdb_midl_append_list(&list, new_list);
|
|
|
|
mdb_midl_sort(list);
|
|
|
|
e = nsts();
|
|
|
|
td_add(l_span_merge, e - b, 1);
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < len; i++) {
|
|
|
|
pgno_t gp = (pg + len) - i;
|
|
|
|
assert(verify_midl_contains(list, gp) == true);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
b = nsts();
|
2024-05-03 20:07:46 +00:00
|
|
|
_sparsemap_merge(&map, new_map);
|
2024-04-30 17:58:35 +00:00
|
|
|
e = nsts();
|
|
|
|
td_add(b_span_merge, e - b, 1);
|
2024-04-28 16:26:31 +00:00
|
|
|
}
|
2024-04-30 17:58:35 +00:00
|
|
|
for (size_t i = 0; i < len; i++) {
|
|
|
|
pgno_t gp = (pg + len) - i;
|
|
|
|
assert(sparsemap_is_set(map, gp));
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
2024-04-30 17:58:35 +00:00
|
|
|
free(new_map);
|
2024-05-03 19:15:39 +00:00
|
|
|
{
|
|
|
|
b = nsts();
|
|
|
|
roaring_bitmap_or_inplace(rbm, new_rbm);
|
|
|
|
e = nsts();
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < len; i++) {
|
|
|
|
pgno_t gp = (pg + len) - i;
|
|
|
|
assert(roaring_bitmap_contains(rbm, gp));
|
|
|
|
}
|
|
|
|
roaring_free(new_rbm);
|
2024-04-26 20:25:17 +00:00
|
|
|
} else {
|
|
|
|
if (list[-1] > INITIAL_AMOUNT) {
|
2024-04-28 16:26:31 +00:00
|
|
|
// ... a fraction of the time, remove COUNT / 2 of 4KiB pages.
|
2024-05-03 19:15:39 +00:00
|
|
|
{
|
|
|
|
pgno_t pg;
|
|
|
|
for (size_t i = 0; i < COUNT; i++) {
|
|
|
|
pg = list[list[0] - i];
|
|
|
|
assert(sparsemap_is_set(map, pg) == true);
|
|
|
|
assert(_sparsemap_set(&map, pg, false) == pg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
roaring_bitmap_remove_range_closed(rbm, list[list[0] - COUNT], list[list[0]]);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mdb_midl_shrink_to(&list, list[0] - COUNT);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
assert(list[list[0]] != pg);
|
2024-04-26 20:25:17 +00:00
|
|
|
assert(verify_midl_nodups(list));
|
|
|
|
verify_sm_eq_ml(map, list);
|
2024-05-03 19:15:39 +00:00
|
|
|
verify_sm_eq_rm(map, rbm);
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
stats(iterations, map, list);
|
2024-05-03 01:13:17 +00:00
|
|
|
// printf("\033[K%zu\r", iterations);
|
|
|
|
iterations++;
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|