diff --git a/tests/soak.c b/tests/soak.c index c0fab52..058a5af 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -16,6 +16,9 @@ typedef size_t pgno_t; +#define INITIAL_AMOUNT 1024 * 2 +bool recording = true; + char * bytes_as(double bytes, char *s, size_t size) { @@ -50,7 +53,152 @@ toss(size_t max) return level; } -bool recording = true; +static size_t +b64_encoded_size(size_t inlen) +{ + size_t ret; + + ret = inlen; + if (inlen % 3 != 0) + ret += 3 - (inlen % 3); + ret /= 3; + ret *= 4; + + return ret; +} + +static const char b64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static char * +b64_encode(const unsigned char *in, size_t len) +{ + char *out; + size_t elen; + size_t i; + size_t j; + size_t v; + + if (in == NULL || len == 0) + return NULL; + + elen = b64_encoded_size(len); + out = malloc(elen + 1); + out[elen] = '\0'; + + for (i = 0, j = 0; i < len; i += 3, j += 4) { + v = in[i]; + v = i + 1 < len ? v << 8 | in[i + 1] : v << 8; + v = i + 2 < len ? v << 8 | in[i + 2] : v << 8; + + out[j] = b64chars[(v >> 18) & 0x3F]; + out[j + 1] = b64chars[(v >> 12) & 0x3F]; + if (i + 1 < len) { + out[j + 2] = b64chars[(v >> 6) & 0x3F]; + } else { + out[j + 2] = '='; + } + if (i + 2 < len) { + out[j + 3] = b64chars[v & 0x3F]; + } else { + out[j + 3] = '='; + } + } + + return out; +} + +static size_t +b64_decoded_size(const char *in) +{ + size_t len; + size_t ret; + size_t i; + + if (in == NULL) + return 0; + + len = strlen(in); + ret = len / 4 * 3; + + for (i = len; i-- > 0;) { + if (in[i] == '=') { + ret--; + } else { + break; + } + } + + return ret; +} + +#if 0 +static void +b64_generate_decode_table() +{ + int inv[80]; + size_t i; + + memset(inv, -1, sizeof(inv)); + for (i = 0; i < sizeof(b64chars) - 1; i++) { + inv[b64chars[i] - 43] = i; + } +} +#endif + +static int b64invs[] = { 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51 }; + +static int +b64_isvalidchar(char c) +{ + if (c >= '0' && c <= '9') + return 1; + if (c >= 'A' && c <= 'Z') + return 1; + if (c >= 'a' && c <= 'z') + return 1; + if (c == '+' || c == '/' || c == '=') + return 1; + return 0; +} + +static int +b64_decode(const char *in, unsigned char *out, size_t outlen) +{ + size_t len; + size_t i; + size_t j; + int v; + + if (in == NULL || out == NULL) + return 0; + + len = strlen(in); + if (outlen < b64_decoded_size(in) || len % 4 != 0) + return 0; + + for (i = 0; i < len; i++) { + if (!b64_isvalidchar(in[i])) { + return 0; + } + } + + for (i = 0, j = 0; i < len; i += 4, j += 3) { + v = b64invs[in[i] - 43]; + v = (v << 6) | b64invs[in[i + 1] - 43]; + v = in[i + 2] == '=' ? v << 6 : (v << 6) | b64invs[in[i + 2] - 43]; + v = in[i + 3] == '=' ? v << 6 : (v << 6) | b64invs[in[i + 3] - 43]; + + out[j] = (v >> 16) & 0xFF; + if (in[i + 2] != '=') + out[j + 1] = (v >> 8) & 0xFF; + if (in[i + 3] != '=') + out[j + 2] = v & 0xFF; + } + + return 1; +} static void record_set_mutation(FILE *out, pgno_t pg) @@ -104,20 +252,38 @@ record_merge_mutation(FILE *out, void *handle) } } +static void +record_checkpoint(FILE *out, void *handle) +{ + if (recording) { + sparsemap_t *map = (sparsemap_t *)handle; + size_t capacity = sparsemap_get_capacity(map); + size_t buffer_size = sparsemap_get_size(map); + size_t encoded_size = b64_encoded_size(buffer_size); + char *encoded = b64_encode(sparsemap_get_data(map), buffer_size); + fprintf(out, "checkpoint %zu %zu %zu ", capacity, buffer_size, encoded_size); + fprintf(out, "%s", encoded); + fprintf(out, "\n"); + } +} + /* sparsemap ------------------------------------------------------------- */ static sparsemap_idx_t -_sparsemap_set(sparsemap_t **map, sparsemap_idx_t idx, bool value) +_sparsemap_set(sparsemap_t **_map, sparsemap_idx_t idx, bool value) { + sparsemap_t *map = *_map, *new_map = NULL; do { - sparsemap_idx_t l = sparsemap_set(*map, idx, value); + sparsemap_idx_t l = sparsemap_set(map, idx, value); if (l != idx) { if (errno == ENOSPC) { - *map = sparsemap_set_data_size(*map, NULL, sparsemap_get_capacity(*map) + 64); - assert(*map != NULL); + size_t capacity = sparsemap_get_capacity(map) + 64; + new_map = sparsemap_set_data_size(map, NULL, capacity); + assert(new_map != NULL); errno = 0; + *_map = new_map; } else { - assert(false); + perror("Unable to grow sparsemap"); } } else { return l; @@ -166,8 +332,7 @@ __sm_find_span(void *handle, unsigned len) { sparsemap_t *map = (sparsemap_t *)handle; pgno_t pgno = (pgno_t)sparsemap_span(map, 0, len, true); - assert(SPARSEMAP_NOT_FOUND(pgno) == false); - return pgno; + return SPARSEMAP_NOT_FOUND(pgno) ? -1 : pgno; } static bool @@ -269,6 +434,8 @@ __sm_count(void *handle) /* midl ------------------------------------------------------------------ */ +static bool __midl_validate(void *handle); + static void * __midl_alloc(size_t capacity) { @@ -287,12 +454,16 @@ __midl_free(void *handle) static pgno_t __midl_set(void **handle, pgno_t pg) { + assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + 1 == list[-1]) { - mdb_midl_need(_list, list[-1] + 1); + assert(mdb_midl_need(_list, list[-1] + 1) == 0); list = *_list; } - mdb_midl_insert(list, pg); + mdb_midl_xappend(list, pg); + mdb_midl_sort(list); + //assert(mdb_midl_insert(list, pg) == 0); + assert(__midl_validate(*handle)); return pg; } @@ -307,13 +478,17 @@ __midl_is_set(void *handle, pgno_t pg) static pgno_t __midl_clear(void **handle, pgno_t pg) { + assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; unsigned len = list[0]; list[0] = len -= 1; for (unsigned j = pg - 1; j < len;) list[++j] = list[++pg]; +#ifdef MDB_DEBUG for (unsigned j = len + 1; j <= list[-1]; j++) list[j] = 0; +#endif + assert(__midl_validate(*handle)); return pg; } @@ -323,8 +498,7 @@ __midl_find_span(void *handle, unsigned len) MDB_IDL list = (MDB_IDL)handle; /* Seek a big enough contiguous page range. Prefer - * pages at the tail, just truncating the list. - */ + pages at the tail, just truncating the list. */ int retry = 1; unsigned i = 0; pgno_t pgno = 0, *mop = list; @@ -339,15 +513,18 @@ __midl_find_span(void *handle, unsigned len) } while (--i > n2); if (--retry < 0) break; + } else { + return -1; } } while (1); search_done:; - return pgno; + return retry < 0 ? -1 : pgno; } static bool __midl_take_span(void **handle, pgno_t pg, unsigned len) { + assert(__midl_validate(*handle)); MDB_IDL list = *(MDB_IDL *)handle; int i = list[list[0]] == pg ? list[0] : mdb_midl_search(list, pg); unsigned j, num = len; @@ -358,24 +535,29 @@ __midl_take_span(void **handle, pgno_t pg, unsigned len) /* Move any stragglers down */ for (j = i - num; j < mop_len;) mop[++j] = mop[++i]; - /* Set all unused values in the array to 0 +#ifdef MDB_DEBUG for (j = mop_len + 1; j <= mop[-1]; j++) - mop[j] = 0; */ + mop[j] = 0; +#endif + assert(__midl_validate(*handle)); return true; } static bool __midl_release_span(void **handle, pgno_t pg, unsigned len) { + assert(__midl_validate(*handle)); MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; if (list[0] + len >= list[-1]) { - mdb_midl_need(_list, list[-1] + len); + assert(mdb_midl_need(_list, list[-1] + len) == 0); list = *_list; } for (size_t i = pg; i < pg + len; i++) { - mdb_midl_insert(list, i); + mdb_midl_xappend(list, i); + // assert(mdb_midl_insert(list, i) == 0); } mdb_midl_sort(list); + assert(__midl_validate(*handle)); return true; } @@ -410,15 +592,15 @@ __midl_is_empty(void *handle, pgno_t pg, unsigned len) static bool __midl_merge(void **handle, void *other_handle) { - MDB_IDL *_list = (MDB_IDL *)handle, list = *_list; - MDB_IDL other = (MDB_IDL)other_handle; + assert(__midl_validate(*handle)); + MDB_IDL *_list = (MDB_IDL *)handle, list = *_list, other = (MDB_IDL)other_handle; if (list[0] + other[0] >= list[-1]) { - mdb_midl_need(_list, list[-1] + other[0]); + assert(mdb_midl_need(_list, list[-1] + other[0]) == 0); list = *_list; } - mdb_midl_append_list(_list, other); - list = *_list; - mdb_midl_sort(list); + mdb_midl_xmerge(list, other_handle); + mdb_midl_sort(*_list); + assert(__midl_validate(*handle)); return true; } @@ -440,11 +622,19 @@ static bool __midl_validate(void *handle) { MDB_IDL list = (MDB_IDL)handle; - pgno_t id = 1; - while (id < list[0]) { - if (list[id] >= list[id + 1]) - return false; - id++; + if (list[0] > list[-1]) { + return false; + } + if (list[0] > 1) { + // check for duplicates + for (pgno_t i = 2; i < list[0]; i++) { + if (list[i] == list[i - 1]) { + return false; + } + // ensure ordering + if (list[i] > list[i - 1]) + return false; + } } return true; } @@ -501,7 +691,7 @@ __roar_find_span(void *handle, unsigned len) } offset++; } while (offset <= max); - return offset; + return offset > max ? -1 : offset; } static bool @@ -526,24 +716,14 @@ static bool __roar_is_span(void *handle, pgno_t pg, unsigned len) { roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; - for (pgno_t i = pg; i < pg + len; i++) { - if (roaring_bitmap_contains(rbm, i) != true) { - return false; - } - } - return true; + return roaring_bitmap_contains_range(rbm, pg, pg + len); } static bool __roar_is_empty(void *handle, pgno_t pg, unsigned len) { roaring_bitmap_t *rbm = (roaring_bitmap_t *)handle; - for (pgno_t i = 0; i < len; i++) { - if (roaring_bitmap_contains(rbm, pg + i) != false) { - return false; - } - } - return true; + return !roaring_bitmap_contains_range(rbm, pg, pg + len); } static bool @@ -558,15 +738,13 @@ __roar_merge(void **handle, void *other_handle) static size_t __roar_size(void *handle) { - // TODO - return 0; + return roaring_bitmap_frozen_size_in_bytes((roaring_bitmap_t *)handle); } static size_t __roar_count(void *handle) { - // TODO - return 0; + return roaring_bitmap_get_cardinality((roaring_bitmap_t *)handle); } static bool @@ -583,20 +761,35 @@ typedef enum { SM, ML, RB } container_impl_t; typedef struct container { const char *name; + /* allocate a new container */ void *(*alloc)(size_t capacity); + /* free the container */ void (*free)(void *handle); + /* add pg to the container */ pgno_t (*set)(void **handle, pgno_t pg); + /* is pg in the container */ bool (*is_set)(void *handle, pgno_t pg); + /* remove pg from the container */ pgno_t (*clear)(void **handle, pgno_t pg); + /* find a set of contigious page of len and return the smallest pgno */ pgno_t (*find_span)(void *handle, unsigned len); + /* remove the span [pg, pg + len) from the container */ bool (*take_span)(void **handle, pgno_t pg, unsigned len); + /* add the span [pg, pg + len) into the container */ bool (*release_span)(void **handle, pgno_t pg, unsigned len); + /* are the pgno in the span [pg, pg+ len) in the container? */ bool (*is_span)(void *handle, pgno_t pg, unsigned len); + /* are the pgno in the span [pg, pg+ len) notn in the container? */ bool (*is_empty)(void *handle, pgno_t pg, unsigned len); + /* is the span the first one (brute force check) */ bool (*is_first)(void *handle, pgno_t pg, unsigned len); + /* ensure that all pgno contained in other_handle are also in handle */ bool (*merge)(void **handle, void *other_handle); + /* the bytes size of the container */ size_t (*size)(void *handle); + /* the number of items in the container */ size_t (*count)(void *handle); + /* perform internal validation on the container (optional) */ bool (*validate)(void *handle); } container_t; @@ -667,48 +860,57 @@ FILE *fp; #define invoke(type, fn, ...) containers[type].fn(handles[type], __VA_ARGS__) #define mutate(type, fn, ...) (type == 0) ? record_##fn##_mutation(fp, __VA_ARGS__) : (void)0, containers[type].fn(&handles[type], __VA_ARGS__) #define foreach(set) for (unsigned type = 0; type < (sizeof((set)) / sizeof((set)[0])); type++) -#define compare(set) \ +#define checkpoint(set) \ for (unsigned type = 1; type < (sizeof((set)) / sizeof((set)[0])); type++) { \ verify_eq(0, handles[0], type, handles[type]); \ - } + } \ + record_checkpoint(fp, handles[0]) bool verify_sm_eq_rb(sparsemap_t *map, roaring_bitmap_t *rbm) { + bool ret = true; uint64_t max = roaring_bitmap_maximum(rbm); roaring_uint32_iterator_t iter; roaring_iterator_init(rbm, &iter); for (uint64_t i = 0; i <= max; i++) { if (i == iter.current_value) { - assert(sparsemap_is_set(map, i) == true); + if (sparsemap_is_set(map, i) == false) { + fprintf(stdout, "- %zu ", i); + ret = false; + } roaring_uint32_iterator_advance(&iter); } else { - assert(sparsemap_is_set(map, i) == false); + if (sparsemap_is_set(map, i) == true) { + fprintf(stdout, "+ %zu ", i); + ret = false; + } } } - return true; + return ret; } bool verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) { + bool ret = true; for (MDB_ID i = 1; i <= list[0]; i++) { pgno_t pg = list[i]; unsigned skipped = i == 1 ? 0 : list[i - 1] - list[i] - 1; if (skipped) { for (MDB_ID j = list[i - 1]; j > list[i]; j--) { if (sparsemap_is_set(map, pg - j) != false) { - __diag("%zu\n", pg - j); - return false; + fprintf(stdout, "+ %zu ", pg - j); + ret = false; } } } if (sparsemap_is_set(map, pg) != true) { - __diag("%zu\n", pg); - return false; + fprintf(stdout, "- %zu ", pg); + ret = false; } } - return true; + return ret; } bool @@ -769,8 +971,6 @@ stats(size_t iterations, sparsemap_t *map, MDB_IDL list) td_quantile(b_span_merge, .999)); } -#define INITIAL_AMOUNT 1024 * 2 - #define SHORT_OPT "r:fa:bh" #define LONG_OPT "record:,force,amount:,buffer,help" @@ -791,7 +991,7 @@ main(int argc, char *argv[]) int opt; const char *record_file = NULL; int force_flag = 0; - size_t left, amt = INITIAL_AMOUNT; + size_t left, iteration = 0, amt = INITIAL_AMOUNT; bool buffer = true; fp = stdout; @@ -862,16 +1062,20 @@ main(int argc, char *argv[]) } cast(type, validate); } - compare(types); + checkpoint(types); left = amt; while (true) { + iteration++; // the an amount [1, 16] of pages to find preferring smaller sizes unsigned len = toss(15) + 1; pgno_t loc[num_types]; foreach(types) { loc[type] = invoke(type, find_span, len); + if (loc[type] == -1) { + goto larger_please; + } } for (unsigned n = 0; n < num_types; n++) { foreach(types) @@ -890,14 +1094,13 @@ main(int argc, char *argv[]) assert(mutate(type, take_span, loc[which_loc], len)); cast(type, validate); } - compare(types); + checkpoint(types); left -= len; - // Once we've used 1/10th of the free list, let's replenish it a bit. - if (amt - left > amt / 10) { + if (toss(15) > 13) { do { pgno_t pgno; - size_t len, retries = amt; + size_t len, retries = amt / 10; // Find a hole in the map to replenish. do { len = toss(15) + 1; @@ -911,7 +1114,7 @@ main(int argc, char *argv[]) { assert(invoke(type, is_empty, pgno, len)); } - compare(types); + checkpoint(types); foreach(types) { assert(invoke(type, is_span, pgno, len) == false); @@ -919,7 +1122,7 @@ main(int argc, char *argv[]) assert(invoke(type, is_span, pgno, len) == true); cast(type, validate); } - compare(types); + checkpoint(types); left += len; } } while (amt - left > amt / 100); @@ -930,7 +1133,7 @@ main(int argc, char *argv[]) pgno_t max; larger_please: new_amt = 1024 + (xorshift32() % 2048) + toss(1024); - new_offset = sparsemap_get_ending_offset(handles[SM]); + new_offset = sparsemap_get_ending_offset(handles[SM]) + 1; // Build a new container to merge with the existing one. foreach(types) @@ -939,6 +1142,7 @@ main(int argc, char *argv[]) for (size_t i = 0; i < new_amt; i++) { // We don't want to record and we're using new_handles not // handles, so call fn directly. + assert(containers[type].is_set(handles[type], i + new_offset) == false); assert(containers[type].is_set(new_handles[type], i + new_offset) == false); containers[type].set(&new_handles[type], i + new_offset); assert(containers[type].is_set(new_handles[type], i + new_offset) == true); @@ -949,7 +1153,7 @@ main(int argc, char *argv[]) assert(mutate(type, merge, new_handles[type])); cast(type, validate); } - compare(types); + checkpoint(types); left += new_amt; amt += new_amt; foreach(types)