This commit is contained in:
Gregory Burd 2024-05-17 15:52:42 -04:00
parent bb460a505e
commit e9e6404115
2 changed files with 201 additions and 41 deletions

View file

@ -730,15 +730,15 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[],
continue;
}
size_t n = 0;
for (size_t b = skip; b < SM_BITS_PER_VECTOR; b++) {
buffer[n++] = start + b;
for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) {
buffer[n++] = start + ret + b;
}
scanner(&buffer[0], n, aux);
ret += n;
skip = 0;
} else {
for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) {
buffer[b] = start + b;
buffer[b] = start + ret + b;
}
scanner(&buffer[0], SM_BITS_PER_VECTOR, aux);
ret += SM_BITS_PER_VECTOR;
@ -758,14 +758,14 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[],
continue;
}
if (w & ((sm_bitvec_t)1 << b)) {
buffer[n++] = start + b;
buffer[n++] = start + ret + b;
ret++;
}
}
} else {
for (int b = 0; b < SM_BITS_PER_VECTOR; b++) {
if (w & ((sm_bitvec_t)1 << b)) {
buffer[n++] = start + b;
buffer[n++] = start + ret + b;
}
}
ret += n;

View file

@ -52,7 +52,7 @@ toss(size_t max)
bool recording = true;
void
static void
record_set_mutation(FILE *out, pgno_t pg)
{
if (recording) {
@ -60,7 +60,7 @@ record_set_mutation(FILE *out, pgno_t pg)
}
}
void
static void
record_clear_mutation(FILE *out, pgno_t pg)
{
if (recording) {
@ -68,7 +68,7 @@ record_clear_mutation(FILE *out, pgno_t pg)
}
}
void
static void
record_take_span_mutation(FILE *out, pgno_t pg, unsigned len)
{
if (recording) {
@ -76,7 +76,7 @@ record_take_span_mutation(FILE *out, pgno_t pg, unsigned len)
}
}
void
static void
record_release_span_mutation(FILE *out, pgno_t pg, unsigned len)
{
if (recording) {
@ -84,28 +84,28 @@ record_release_span_mutation(FILE *out, pgno_t pg, unsigned len)
}
}
/* sparsemap ------------------------------------------------------------- */
sparsemap_idx_t
_sparsemap_merge(sparsemap_t **map, sparsemap_t *other)
static void
__scan_record_offsets(sm_idx_t v[], size_t n, void *aux)
{
do {
int retval = sparsemap_merge(*map, other);
if (retval != 0) {
if (errno == ENOSPC) {
size_t new_size = retval + (64 - (retval % 64)) + 64;
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size);
assert(*map != NULL);
errno = 0;
} else {
assert(false);
}
} else {
return retval;
}
} while (true);
FILE *out = (FILE *)aux;
for (size_t i = 0; i < n; i++) {
fprintf(out, "%u ", v[i]);
}
}
static void
record_merge_mutation(FILE *out, void *handle)
{
if (recording) {
sparsemap_t *map = (sparsemap_t *)handle;
fprintf(out, "merge %zu ", sparsemap_get_ending_offset(map));
sparsemap_scan(map, __scan_record_offsets, 0, (void *)out);
fprintf(out, "\n");
}
}
/* sparsemap ------------------------------------------------------------- */
static sparsemap_idx_t
_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value)
{
@ -228,6 +228,43 @@ __sm_is_first(void *handle, pgno_t pg, unsigned len)
return false;
}
static bool
__sm_merge(void **handle, void *other_handle)
{
sparsemap_t **map = (sparsemap_t **)handle;
sparsemap_t *other = (sparsemap_t *)other_handle;
do {
int retval = sparsemap_merge(*map, other);
if (retval != 0) {
if (errno == ENOSPC) {
size_t new_size = retval + (64 - (retval % 64)) + 64;
*map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size);
assert(*map != NULL);
errno = 0;
} else {
assert(false);
}
} else {
break;
}
} while (true);
return true;
}
static size_t
__sm_size(void *handle)
{
sparsemap_t *map = (sparsemap_t *)handle;
return sparsemap_get_size(map);
}
static size_t
__sm_count(void *handle)
{
sparsemap_t *map = (sparsemap_t *)handle;
return sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true);
}
/* midl ------------------------------------------------------------------ */
static void *
@ -249,7 +286,7 @@ __midl_set(void **handle, pgno_t pg)
{
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
if (list[0] + 1 == list[-1]) {
mdb_midl_need(&list, list[-1] + 1);
mdb_midl_need(_list, list[-1] + 1);
}
mdb_midl_insert(list, pg);
return pg;
@ -308,7 +345,6 @@ static bool
__midl_take_span(void **handle, pgno_t pg, unsigned len)
{
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len;
unsigned j, num = len;
pgno_t *mop = list;
@ -320,17 +356,16 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len)
mop[++j] = mop[++i];
/* Set all unused values in the array to 0
for (j = mop_len + 1; j <= mop[-1]; j++)
mop[j] = 0;
*/
mop[j] = 0; */
return true;
}
static bool
__midl_release_span(void **handle, pgno_t pg, unsigned len)
{
MDB_IDL list = (MDB_IDL)handle;
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
if (list[0] + len >= list[-1]) {
mdb_midl_need(&list, list[-1] + len);
mdb_midl_need(_list, list[-1] + len);
}
for (size_t i = pg; i < pg + len; i++) {
mdb_midl_insert(list, i);
@ -367,6 +402,30 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len)
return true;
}
static bool
__midl_merge(void **handle, void *other_handle)
{
MDB_IDL *_list = (MDB_IDL *)handle, list = *_list;
MDB_IDL other = (MDB_IDL)other_handle;
mdb_midl_append_list(_list, other);
mdb_midl_sort(list);
return true;
}
static size_t
__midl_size(void *handle)
{
MDB_IDL list = (MDB_IDL)handle;
return list[0] * sizeof(pgno_t);
}
static size_t
__midl_count(void *handle)
{
MDB_IDL list = (MDB_IDL)handle;
return list[0];
}
static bool
__midl_validate(void *handle)
{
@ -476,6 +535,29 @@ __roar_is_empty(void *handle, pgno_t pg, unsigned len)
return true;
}
static bool
__roar_merge(void **handle, void *other_handle)
{
roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm;
roaring_bitmap_t *other = (roaring_bitmap_t *)other_handle;
roaring_bitmap_or_inplace(rbm, other);
return true;
}
static size_t
__roar_size(void *handle)
{
// TODO
return 0;
}
static size_t
__roar_count(void *handle)
{
// TODO
return 0;
}
static bool
__roar_validate(void *handle)
{
@ -501,6 +583,9 @@ typedef struct container {
bool (*is_span)(void *handle, pgno_t pg, unsigned len);
bool (*is_empty)(void *handle, pgno_t pg, unsigned len);
bool (*is_first)(void *handle, pgno_t pg, unsigned len);
bool (*merge)(void **handle, void *other_handle);
size_t (*size)(void *handle);
size_t (*count)(void *handle);
bool (*validate)(void *handle);
} container_t;
@ -518,6 +603,9 @@ container_t containers[] = {
.is_span = __sm_is_span,
.is_empty = __sm_is_empty,
.is_first = __sm_is_first,
.merge = __sm_merge,
.size = __sm_size,
.count = __sm_count,
.validate = NULL
},
{ "midl",
@ -532,6 +620,9 @@ container_t containers[] = {
.is_span = __midl_is_span,
.is_empty = __midl_is_empty,
.is_first = NULL,
.merge = __midl_merge,
.size = __midl_size,
.count = __midl_count,
.validate = __midl_validate
},
{ "roaring",
@ -546,12 +637,16 @@ container_t containers[] = {
.is_span = __roar_is_span,
.is_empty = __roar_is_empty,
.is_first = NULL,
.merge = __roar_merge,
.size = __roar_size,
.count = __roar_count,
.validate = __roar_validate,
},
};
// clang-format on
void *handles[3];
void *handles[(sizeof((containers)) / sizeof((containers)[0]))];
void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))];
FILE *fp;
#define alloc(type, size) containers[type].alloc(size);
@ -559,7 +654,7 @@ FILE *fp;
if (containers[type].fn) \
containers[type].fn(handles[type], ##__VA_ARGS__)
#define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__)
#define mutate(type, fn, ...) record_##fn##_mutation(fp, __VA_ARGS__), containers[type].fn(&handles[type], __VA_ARGS__)
#define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__)
#define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++)
#define compare(set) \
for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \
@ -685,7 +780,7 @@ main(int argc, char *argv[])
int opt;
const char *record_file = NULL;
int force_flag = 0;
size_t amt = INITIAL_AMOUNT;
size_t left, amt = INITIAL_AMOUNT;
bool buffer = true;
fp = stdout;
@ -721,10 +816,8 @@ main(int argc, char *argv[])
// Check if record file is specified
if (record_file == NULL) {
recording = false;
recording = true; // TODO false
} else {
recording = true;
// Check for existing file without force flag
if (access(record_file, F_OK) == 0 && !force_flag) {
fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file);
@ -742,7 +835,7 @@ main(int argc, char *argv[])
// disable buffering
if (!buffer) {
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
setvbuf(fp, NULL, _IONBF, 0);
}
unsigned types[] = { SM, ML, RB };
unsigned num_types = (sizeof((types)) / sizeof((types)[0]));
@ -759,6 +852,7 @@ main(int argc, char *argv[])
cast(type, validate);
}
compare(types);
left = amt;
while (true) {
// the an amount [1, 16] of pages to find preferring smaller sizes
@ -786,6 +880,72 @@ main(int argc, char *argv[])
cast(type, validate);
}
compare(types);
left -= len;
// Once we've used 1/10th of the free list, let's replenish it a bit.
if (amt - left > amt / 10) {
do {
pgno_t pgno;
size_t len, retries = amt;
// Find a hole in the map to replenish.
do {
len = toss(15) + 1;
pgno = sparsemap_span(handles[SM], 0, len, false);
} while (SPARSEMAP_NOT_FOUND(pgno) && --retries);
if (retries == 0) {
goto larger_please;
}
if (SPARSEMAP_FOUND(pgno)) {
foreach(types)
{
assert(invoke(type, is_empty, pgno, len));
}
compare(types);
foreach(types)
{
assert(invoke(type, is_span, pgno, len) == false);
assert(mutate(type, release_span, pgno, len));
assert(invoke(type, is_span, pgno, len) == true);
cast(type, validate);
}
compare(types);
left += len;
}
} while (amt - left > amt / 100);
}
if (toss(1000) == 0) {
size_t new_amt;
pgno_t max;
larger_please:
new_amt = 1024 + (xorshift32() % 2048) + toss(1024);
max = sparsemap_get_ending_offset(handles[SM]);
// Build a new container to merge with the existing one.
foreach(types)
{
new_handles[type] = alloc(type, new_amt);
for (size_t i = 0; i < new_amt; i++) {
// We don't want to record and we're using new_handles not
// handles, so call fn directly.
assert(containers[type].is_set(new_handles[type], i) == false);
containers[type].set(&new_handles[type], i);
assert(containers[type].is_set(new_handles[type], i) == true);
}
}
foreach(types)
{
assert(mutate(type, merge, new_handles[type]));
cast(type, validate);
}
compare(types);
left += new_amt;
amt += new_amt;
foreach(types)
{
containers[type].free(new_handles[type]);
}
}
}
return 0;