diff --git a/src/sparsemap.c b/src/sparsemap.c index 69ec576..da9ad78 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -730,15 +730,15 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[], continue; } size_t n = 0; - for (size_t b = skip; b < SM_BITS_PER_VECTOR; b++) { - buffer[n++] = start + b; + for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) { + buffer[n++] = start + ret + b; } scanner(&buffer[0], n, aux); ret += n; skip = 0; } else { for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) { - buffer[b] = start + b; + buffer[b] = start + ret + b; } scanner(&buffer[0], SM_BITS_PER_VECTOR, aux); ret += SM_BITS_PER_VECTOR; @@ -758,14 +758,14 @@ __sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[], continue; } if (w & ((sm_bitvec_t)1 << b)) { - buffer[n++] = start + b; + buffer[n++] = start + ret + b; ret++; } } } else { for (int b = 0; b < SM_BITS_PER_VECTOR; b++) { if (w & ((sm_bitvec_t)1 << b)) { - buffer[n++] = start + b; + buffer[n++] = start + ret + b; } } ret += n; diff --git a/tests/soak.c b/tests/soak.c index 044997c..c04072b 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -52,7 +52,7 @@ toss(size_t max) bool recording = true; -void +static void record_set_mutation(FILE *out, pgno_t pg) { if (recording) { @@ -60,7 +60,7 @@ record_set_mutation(FILE *out, pgno_t pg) } } -void +static void record_clear_mutation(FILE *out, pgno_t pg) { if (recording) { @@ -68,7 +68,7 @@ record_clear_mutation(FILE *out, pgno_t pg) } } -void +static void record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) { if (recording) { @@ -76,7 +76,7 @@ record_take_span_mutation(FILE *out, pgno_t pg, unsigned len) } } -void +static void record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) { if (recording) { @@ -84,28 +84,28 @@ record_release_span_mutation(FILE *out, pgno_t pg, unsigned len) } } -/* sparsemap ------------------------------------------------------------- */ - -sparsemap_idx_t -_sparsemap_merge(sparsemap_t **map, sparsemap_t *other) +static void +__scan_record_offsets(sm_idx_t v[], size_t n, void *aux) { - do { - int retval = sparsemap_merge(*map, other); - if (retval != 0) { - if (errno == ENOSPC) { - size_t new_size = retval + (64 - (retval % 64)) + 64; - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); - assert(*map != NULL); - errno = 0; - } else { - assert(false); - } - } else { - return retval; - } - } while (true); + FILE *out = (FILE *)aux; + for (size_t i = 0; i < n; i++) { + fprintf(out, "%u ", v[i]); + } } +static void +record_merge_mutation(FILE *out, void *handle) +{ + if (recording) { + sparsemap_t *map = (sparsemap_t *)handle; + fprintf(out, "merge %zu ", sparsemap_get_ending_offset(map)); + sparsemap_scan(map, __scan_record_offsets, 0, (void *)out); + fprintf(out, "\n"); + } +} + +/* sparsemap ------------------------------------------------------------- */ + static sparsemap_idx_t _sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) { @@ -228,6 +228,43 @@ __sm_is_first(void *handle, pgno_t pg, unsigned len) return false; } +static bool +__sm_merge(void **handle, void *other_handle) +{ + sparsemap_t **map = (sparsemap_t **)handle; + sparsemap_t *other = (sparsemap_t *)other_handle; + do { + int retval = sparsemap_merge(*map, other); + if (retval != 0) { + if (errno == ENOSPC) { + size_t new_size = retval + (64 - (retval % 64)) + 64; + *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + new_size); + assert(*map != NULL); + errno = 0; + } else { + assert(false); + } + } else { + break; + } + } while (true); + return true; +} + +static size_t +__sm_size(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_get_size(map); +} + +static size_t +__sm_count(void *handle) +{ + sparsemap_t *map = (sparsemap_t *)handle; + return sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true); +} + /* midl ------------------------------------------------------------------ */ static void * @@ -249,7 +286,7 @@ __midl_set(void **handle, pgno_t pg) { MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { - mdb_midl_need(&list, list[-1] + 1); + mdb_midl_need(_list, list[-1] + 1); } mdb_midl_insert(list, pg); return pg; @@ -308,7 +345,6 @@ static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; - int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg) + len; unsigned j, num = len; pgno_t *mop = list; @@ -320,17 +356,16 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len) mop[++j] = mop[++i]; /* Set all unused values in the array to 0 for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; - */ + mop[j] = 0; */ return true; } static bool __midl_release_span(void **handle, pgno_t pg, unsigned len) { - MDB_IDL list = (MDB_IDL)handle; + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { - mdb_midl_need(&list, list[-1] + len); + mdb_midl_need(_list, list[-1] + len); } for (size_t i = pg; i < pg + len; i++) { mdb_midl_insert(list, i); @@ -367,6 +402,30 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len) return true; } +static bool +__midl_merge(void **handle, void *other_handle) +{ + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; + MDB_IDL other = (MDB_IDL)other_handle; + mdb_midl_append_list(_list, other); + mdb_midl_sort(list); + return true; +} + +static size_t +__midl_size(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + return list[0] * sizeof(pgno_t); +} + +static size_t +__midl_count(void *handle) +{ + MDB_IDL list = (MDB_IDL)handle; + return list[0]; +} + static bool __midl_validate(void *handle) { @@ -476,6 +535,29 @@ __roar_is_empty(void *handle, pgno_t pg, unsigned len) return true; } +static bool +__roar_merge(void **handle, void *other_handle) +{ + roaring_bitmap_t **_rbm = (roaring_bitmap_t **)handle, *rbm = *_rbm; + roaring_bitmap_t *other = (roaring_bitmap_t *)other_handle; + roaring_bitmap_or_inplace(rbm, other); + return true; +} + +static size_t +__roar_size(void *handle) +{ + // TODO + return 0; +} + +static size_t +__roar_count(void *handle) +{ + // TODO + return 0; +} + static bool __roar_validate(void *handle) { @@ -501,6 +583,9 @@ typedef struct container { bool (*is_span)(void *handle, pgno_t pg, unsigned len); bool (*is_empty)(void *handle, pgno_t pg, unsigned len); bool (*is_first)(void *handle, pgno_t pg, unsigned len); + bool (*merge)(void **handle, void *other_handle); + size_t (*size)(void *handle); + size_t (*count)(void *handle); bool (*validate)(void *handle); } container_t; @@ -518,6 +603,9 @@ container_t containers[] = { .is_span = __sm_is_span, .is_empty = __sm_is_empty, .is_first = __sm_is_first, + .merge = __sm_merge, + .size = __sm_size, + .count = __sm_count, .validate = NULL }, { "midl", @@ -532,6 +620,9 @@ container_t containers[] = { .is_span = __midl_is_span, .is_empty = __midl_is_empty, .is_first = NULL, + .merge = __midl_merge, + .size = __midl_size, + .count = __midl_count, .validate = __midl_validate }, { "roaring", @@ -546,12 +637,16 @@ container_t containers[] = { .is_span = __roar_is_span, .is_empty = __roar_is_empty, .is_first = NULL, + .merge = __roar_merge, + .size = __roar_size, + .count = __roar_count, .validate = __roar_validate, }, }; // clang-format on -void *handles[3]; +void *handles[(sizeof((containers)) / sizeof((containers)[0]))]; +void *new_handles[(sizeof((containers)) / sizeof((containers)[0]))]; FILE *fp; #define alloc(type, size) containers[type].alloc(size); @@ -559,7 +654,7 @@ FILE *fp; if (containers[type].fn) \ containers[type].fn(handles[type], ##__VA_ARGS__) #define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) -#define mutate(type, fn, ...) record_##fn##_mutation(fp, __VA_ARGS__), containers[type].fn(&handles[type], __VA_ARGS__) +#define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) #define compare(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ @@ -685,7 +780,7 @@ main(int argc, char *argv[]) int opt; const char *record_file = NULL; int force_flag = 0; - size_t amt = INITIAL_AMOUNT; + size_t left, amt = INITIAL_AMOUNT; bool buffer = true; fp = stdout; @@ -721,10 +816,8 @@ main(int argc, char *argv[]) // Check if record file is specified if (record_file == NULL) { - recording = false; + recording = true; // TODO false } else { - recording = true; - // Check for existing file without force flag if (access(record_file, F_OK) == 0 && !force_flag) { fprintf(stderr, "Warning: File '%s' already exists. Use -f or --force to overwrite.\n", record_file); @@ -742,7 +835,7 @@ main(int argc, char *argv[]) // disable buffering if (!buffer) { setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); + setvbuf(fp, NULL, _IONBF, 0); } unsigned types[] = { SM, ML, RB }; unsigned num_types = (sizeof((types)) / sizeof((types)[0])); @@ -759,6 +852,7 @@ main(int argc, char *argv[]) cast(type, validate); } compare(types); + left = amt; while (true) { // the an amount [1, 16] of pages to find preferring smaller sizes @@ -786,6 +880,72 @@ main(int argc, char *argv[]) cast(type, validate); } compare(types); + left -= len; + + // Once we've used 1/10th of the free list, let's replenish it a bit. + if (amt - left > amt / 10) { + do { + pgno_t pgno; + size_t len, retries = amt; + // Find a hole in the map to replenish. + do { + len = toss(15) + 1; + pgno = sparsemap_span(handles[SM], 0, len, false); + } while (SPARSEMAP_NOT_FOUND(pgno) && --retries); + if (retries == 0) { + goto larger_please; + } + if (SPARSEMAP_FOUND(pgno)) { + foreach(types) + { + assert(invoke(type, is_empty, pgno, len)); + } + compare(types); + foreach(types) + { + assert(invoke(type, is_span, pgno, len) == false); + assert(mutate(type, release_span, pgno, len)); + assert(invoke(type, is_span, pgno, len) == true); + cast(type, validate); + } + compare(types); + left += len; + } + } while (amt - left > amt / 100); + } + + if (toss(1000) == 0) { + size_t new_amt; + pgno_t max; + larger_please: + new_amt = 1024 + (xorshift32() % 2048) + toss(1024); + max = sparsemap_get_ending_offset(handles[SM]); + + // Build a new container to merge with the existing one. + foreach(types) + { + new_handles[type] = alloc(type, new_amt); + for (size_t i = 0; i < new_amt; i++) { + // We don't want to record and we're using new_handles not + // handles, so call fn directly. + assert(containers[type].is_set(new_handles[type], i) == false); + containers[type].set(&new_handles[type], i); + assert(containers[type].is_set(new_handles[type], i) == true); + } + } + foreach(types) + { + assert(mutate(type, merge, new_handles[type])); + cast(type, validate); + } + compare(types); + left += new_amt; + amt += new_amt; + foreach(types) + { + containers[type].free(new_handles[type]); + } + } } return 0;