From 919f595d29c543a130907f11ce2685db9183cd96 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 2 Aug 2024 00:29:35 -0400 Subject: [PATCH] WIP cleanup, add populate_rle tests --- CMakeLists.txt | 14 +++--- sparsemap.c | 130 +++++++++++++++++-------------------------------- test/test.c | 41 ++++++++++++++-- 3 files changed, 90 insertions(+), 95 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e3b21a..0d28802 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,9 +14,9 @@ set(CMAKE_C_OUTPUT_EXTENSION .o) set(SOURCE_DIR .) set(HEADER_DIR . test) -set(COMMON_CMAKE_C_FLAGS "-Wall -Wextra -Wpedantic") +set(COMMON_CMAKE_C_FLAGS "-std=c11 -Wall -Wextra -Wpedantic") set(CMAKE_C_FLAGS_DEBUG "-DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_TESTING -DDEBUG -g -O0") -set(CMAKE_C_FLAGS_PROFILE "-g -Og -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope") +set(CMAKE_C_FLAGS_PROFILE "-DSPARSEMAP_DIAGNOSTIC -DSPARSEMAP_TESTING -DDEBUG -g -Og -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope") set(CMAKE_C_FLAGS_RELEASE "-Ofast") # Include all header files from the header directory @@ -49,25 +49,25 @@ set_target_properties(sparsemap PROPERTIES target_include_directories(sparsemap PRIVATE ${HEADER_DIR}) # Add ex_1 program -add_executable(ex_1 test/ex_1.c test/munit.c test/common.c) +add_executable(ex_1 test/ex_1.c test/munit.c test/qc.c test/common.c) target_link_libraries(ex_1 PRIVATE sparsemap) target_include_directories(ex_1 PRIVATE ${HEADER_DIR}) add_custom_target(run_ex_1 COMMAND ex_1 WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) # Add ex_2 program -add_executable(ex_2 test/ex_2.c test/munit.c test/common.c) +add_executable(ex_2 test/ex_2.c test/munit.c test/qc.c test/common.c) target_link_libraries(ex_2 PRIVATE sparsemap) target_include_directories(ex_2 PRIVATE ${HEADER_DIR}) add_custom_target(run_ex_2 COMMAND ex_2 WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) # Add ex_3 program -add_executable(ex_3 test/ex_3.c test/munit.c test/common.c) +add_executable(ex_3 test/ex_3.c test/munit.c test/qc.c test/common.c) target_link_libraries(ex_3 PRIVATE sparsemap) target_include_directories(ex_3 PRIVATE ${HEADER_DIR}) add_custom_target(run_ex_3 COMMAND ex_3 WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) # Add ex_4 program -add_executable(ex_4 test/ex_4.c test/munit.c test/common.c) +add_executable(ex_4 test/ex_4.c test/munit.c test/qc.c test/common.c) target_link_libraries(ex_4 PRIVATE sparsemap) target_include_directories(ex_4 PRIVATE ${HEADER_DIR}) add_custom_target(run_ex_4 COMMAND ex_4 WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) @@ -80,7 +80,7 @@ set_source_files_properties(test/test.c PROPERTIES COMPILE_FLAGS "-DDEBUG -DSPAR add_custom_target(run_test COMMAND test WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) # Add soak program -add_executable(soak test/soak.c test/common.c test/tdigest.c test/roaring.c) +add_executable(soak test/soak.c test/common.c test/tdigest.c test/qc.c test/roaring.c) target_link_libraries(soak PRIVATE sparsemap) target_include_directories(soak PRIVATE ${HEADER_DIR} lib) target_link_libraries(soak PUBLIC m) diff --git a/sparsemap.c b/sparsemap.c index d14be86..7fa7493 100644 --- a/sparsemap.c +++ b/sparsemap.c @@ -22,7 +22,6 @@ #include -#include #include #include #include @@ -42,7 +41,7 @@ #pragma GCC diagnostic pop void __attribute__((format(printf, 4, 5))) __sm_diag_(const char *file, int line, const char *func, const char *format, ...) { - va_list args; + va_list args = { 0 }; fprintf(stderr, "%s:%d:%s(): ", file, line, func); va_start(args, format); vfprintf(stderr, format, args); @@ -73,7 +72,7 @@ typedef struct { __sm_bitvec_t *m_data; } __sm_chunk_t; -// TODO remove me, this is only used for debugging. +// NOTE: When using in production feel free to remove this section of test code. #ifdef SPARSEMAP_TESTING #include char *QCC_showSparsemap(void *value, int len); @@ -568,7 +567,7 @@ __sm_chunk_is_set(__sm_chunk_t *chunk, size_t idx) } } -/* +/** * TODO */ static int @@ -695,7 +694,7 @@ __sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, __sm_bi __sm_assert(bv < SM_FLAGS_PER_INDEX); size_t flags = SM_CHUNK_GET_FLAGS(*chunk->m_data, bv); - assert(flags != SM_PAYLOAD_NONE); + __sm_assert(flags != SM_PAYLOAD_NONE); if (flags == SM_PAYLOAD_ZEROS) { /* Easy - set bit to 0 in a __sm_bitvec_t of zeroes. */ if (value == false) { @@ -1019,36 +1018,6 @@ done:; return amt; } -/** - * @brief Ranks the set bits within the range [from, to]. - * - * @param[out] rank Additional results, remaining bits and last position. - * @param[in] chunk The chunk to examine. - * @param[in] from The start of the range, 0-indexed and inclusive. - * @param[in] to The end of the range, 0-indexed and inclusive. - * @return the sum of the set bits in the range [from, to], 0 if none. - */ -static size_t -__sm_chunk_rank_set(__sm_chunk_rank_t *rank, __sm_chunk_t *chunk, size_t from, size_t to) -{ - return __sm_chunk_rank_(rank, true, chunk, from, to); -} - -/** - * @brief Ranks the unset bits within the range [from, to]. - * - * @param[out] rank Additional results, remaining bits and last position. - * @param[in] chunk The chunk to examine. - * @param[in] from The start of the range, 0-indexed and inclusive. - * @param[in] to The end of the range, 0-indexed and inclusive. - * @return the sum of the unset bits in the range [from, to], 0 if none. - */ -static size_t -__sm_chunk_rank_unset(__sm_chunk_rank_t *rank, __sm_chunk_t *chunk, size_t from, size_t to) -{ - return __sm_chunk_rank_(rank, false, chunk, from, to); -} - /** @brief Counts the bits matching \b value in the range [0, \b idx] * inclusive after ignoring the first \b offset bits in the chunk. * @@ -1377,11 +1346,12 @@ __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t ds if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j + delta)) { size_t position; __sm_bitvec_t fill; + // TODO: switch (__sm_chunk_clr_bit(dst_chunk, j + delta, &position)) { switch (__sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, false)) { case SM_NEEDS_TO_GROW: offset += SM_SIZEOF_OVERHEAD + position * sizeof(__sm_bitvec_t); __sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(__sm_bitvec_t)); - __sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, true); + __sm_chunk_set_bit(dst_chunk, j + delta, &position); break; case SM_NEEDS_TO_SHRINK: if (__sm_chunk_is_empty(src_chunk)) { @@ -1633,20 +1603,13 @@ sparsemap_unset(sparsemap_t *map, sparsemap_idx_t idx) } /* Now that we've addressed (1) and (3) we have to work on (2) where the - * index is within the body of this RLE chunk. This will lead to: - * - a) TODO... - * - b) TODO... - * - c) ... - * - * Chunks must have an aligned starting offset, so let's first find what - * we'll call the "pivot" chunk wherein we'll find the index we need to - * clear. That chunk will be sparse. + * index is within the body of this RLE chunk. Chunks must have an aligned + * starting offset, so let's first find what we'll call the "pivot" chunk + * wherein we'll find the index we need to clear. That chunk will be sparse. */ - size_t pos = 0; uint8_t buf[(SM_SIZEOF_OVERHEAD * 3) + (sizeof(__sm_bitvec_t) * 6)] = { 0 }; uint8_t *pivot_p; __sm_chunk_t pivot_chunk; - size_t pivot_offset; /* Find the starting offset for our pivot chunk. */ size_t aligned_idx = __sm_get_chunk_aligned_offset(idx); @@ -2004,14 +1967,13 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx) if ((idx - start) == length) { __sm_chunk_rle_set_length(&chunk, length + 1); __sm_assert(__sm_chunk_rle_get_length(&chunk) == length + 1); + goto done; } - goto done; } /* We've been asked to set a bit that is within this RLE chunk's range but * not within its run. That means this chunk's capacity must shrink, and * we need a new sparse chunk to hold this value. */ __sm_chunk_t new_chunk; - size_t new_offset; uint8_t buf[SM_SIZEOF_OVERHEAD + sizeof(__sm_bitvec_t) * 2] = { 0 }; uint8_t *new_p = buf; /* Find out where the chunk should align to hold this idx ... */ @@ -2028,10 +1990,11 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx) SM_CHUNK_SET_FLAGS(new_chunk.m_data[0], idx / SM_BITS_PER_VECTOR, SM_PAYLOAD_MIXED); /* ... and set the bit at that index in this new chunk. */ new_chunk.m_data[1] = (__sm_bitvec_t)1 << (idx % SM_BITS_PER_VECTOR); + __sm_set_chunk_count(map, __sm_get_chunk_count(map) + 1); __sm_assert(__sm_chunk_get_capacity(&chunk) + start + 1 == aligned_idx); __sm_when_diag({ /* Sanity check the chunk */ - // fprintf(stdout, "\n%s\n", QCC_showChunk(pivot_p, 0); + // fprintf(stdout, "\n%s\n", QCC_showSparsemap(map, 0)); for (size_t j = 0; j < SM_CHUNK_MAX_CAPACITY; j++) { bool expected = (j + aligned_idx == idx) ? true : false; __sm_assert(__sm_chunk_is_set(&new_chunk, j) == expected); @@ -2215,31 +2178,35 @@ sparsemap_get_ending_offset(sparsemap_t *map) p += SM_SIZEOF_OVERHEAD; __sm_chunk_t chunk; __sm_chunk_init(&chunk, p); - sparsemap_idx_t relative_position = start; - for (size_t m = 0; m < sizeof(__sm_bitvec_t); m++, p++) { - for (int n = 0; n < SM_FLAGS_PER_INDEX_BYTE; n++) { - size_t flags = SM_CHUNK_GET_FLAGS(*p, n); - if (flags == SM_PAYLOAD_NONE) { - continue; - } else if (flags == SM_PAYLOAD_ZEROS) { - relative_position += SM_BITS_PER_VECTOR; - } else if (flags == SM_PAYLOAD_ONES) { - relative_position += SM_BITS_PER_VECTOR; - offset = relative_position; - } else if (flags == SM_PAYLOAD_MIXED) { - __sm_bitvec_t w = chunk.m_data[1 + __sm_chunk_get_position(&chunk, m * SM_FLAGS_PER_INDEX_BYTE + n)]; - int idx = 0; - for (int k = 0; k < SM_BITS_PER_VECTOR; k++) { - if (w & ((__sm_bitvec_t)1 << k)) { - idx = k; + if (SM_IS_CHUNK_RLE(&chunk)) { + return start + __sm_chunk_rle_get_length(&chunk) - 1; + } else { + sparsemap_idx_t relative_position = start; + for (size_t m = 0; m < sizeof(__sm_bitvec_t); m++, p++) { + for (int n = 0; n < SM_FLAGS_PER_INDEX_BYTE; n++) { + size_t flags = SM_CHUNK_GET_FLAGS(*p, n); + if (flags == SM_PAYLOAD_NONE) { + continue; + } else if (flags == SM_PAYLOAD_ZEROS) { + relative_position += SM_BITS_PER_VECTOR; + } else if (flags == SM_PAYLOAD_ONES) { + relative_position += SM_BITS_PER_VECTOR; + offset = relative_position; + } else if (flags == SM_PAYLOAD_MIXED) { + __sm_bitvec_t w = chunk.m_data[1 + __sm_chunk_get_position(&chunk, m * SM_FLAGS_PER_INDEX_BYTE + n)]; + int idx = 0; + for (int k = 0; k < SM_BITS_PER_VECTOR; k++) { + if (w & ((__sm_bitvec_t)1 << k)) { + idx = k; + } } + offset = relative_position + idx; + relative_position += SM_BITS_PER_VECTOR; } - offset = relative_position + idx; - relative_position += SM_BITS_PER_VECTOR; } } + return offset; } - return offset; } double @@ -2289,7 +2256,7 @@ sparsemap_scan(sparsemap_t *map, void (*scanner)(__sm_idx_t[], size_t, void *aux __sm_chunk_init(&chunk, p); size_t skipped = __sm_chunk_scan(&chunk, start, scanner, skip, aux); if (skip) { - assert(skip >= skipped); + __sm_assert(skip >= skipped); skip -= skipped; } p += __sm_chunk_get_size(&chunk); @@ -2509,9 +2476,8 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other) for (size_t j = start; j < capacity + start; j++) { if (j >= offset) { if (__sm_chunk_is_set(&s_chunk, j - start)) { - __sm_bitvec_t fill; size_t pos; - __sm_chunk_set(&d_chunk, j - start, true, &pos, &fill, true); + __sm_chunk_set_bit(&d_chunk, j - start, &pos); sparsemap_unset(map, j); } } @@ -2592,7 +2558,7 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) static size_t __sm_rank_vec(sparsemap_t *map, size_t begin, size_t end, bool value, __sm_bitvec_t *vec) { - assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); + __sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); size_t amt, gap, pos = 0, result = 0, prev = 0, count, len = end - begin + 1; uint8_t *p; @@ -2816,7 +2782,6 @@ QCC_showChunk(void *value, int len) { __sm_idx_t start = *(__sm_idx_t *)value; __sm_chunk_t chunk; - // TODO: __sm_chunk_t *chunk = (__sm_chunk_t *)((uintptr_t)value + SM_SIZEOF_OVERHEAD); __sm_chunk_init(&chunk, value + SM_SIZEOF_OVERHEAD); return _qcc_format_chunk(start, &chunk, false); @@ -2892,10 +2857,10 @@ QCC_genChunk() // ... and set the RLE chunk's length of 1s to len. __sm_chunk_rle_set_length(chunk, len); // Now, test what we've generated to ensure it's correct. - assert(*(__sm_idx_t *)p == len); - assert(__sm_chunk_is_rle(chunk)); - assert(__sm_chunk_rle_get_capacity(chunk) == SM_CHUNK_RLE_MAX_CAPACITY); - assert(__sm_chunk_rle_get_length(chunk) == len); + __sm_assert(*(__sm_idx_t *)p == len); + __sm_assert(__sm_chunk_is_rle(chunk)); + __sm_assert(__sm_chunk_rle_get_capacity(chunk) == SM_CHUNK_RLE_MAX_CAPACITY); + __sm_assert(__sm_chunk_rle_get_length(chunk) == len); return QCC_initGenValue(p, 1, QCC_showChunk, QCC_freeChunkValue); } else { // Generate a chunk with the offset equal to the number of additional @@ -2939,14 +2904,9 @@ QCC_genChunk() for (int i = 0; i < cut; i++) { SM_CHUNK_SET_FLAGS(*desc, i, SM_PAYLOAD_NONE); } -#if 0 - char *s = QCC_showChunk(p, 0); - fprintf(stdout, "\n%s\n", s); - fflush(stdout); - free(s); -#endif + // fprintf(stdout, "\n%s\n", QCC_showChunk(p, 0)); // ... and check that our franken-chunk appears to be correct. - assert(__sm_chunk_is_rle(chunk) == false); + __sm_assert(__sm_chunk_is_rle(chunk) == false); return QCC_initGenValue(p, 1, QCC_showChunk, QCC_freeChunkValue); } } diff --git a/test/test.c b/test/test.c index e366786..c31feca 100644 --- a/test/test.c +++ b/test/test.c @@ -43,21 +43,33 @@ struct user_data { /* -------------------------- Supporting Functions for Testing */ -void +size_t +populate_map_rle(sparsemap_t *map, size_t loc, size_t num, size_t amount) +{ + size_t i, len = munit_rand_int_range(1, num) * amount; + for (i = 0; i < len; i++) { + sparsemap_set(map, loc + i); + } + return i; +} + +size_t populate_map(sparsemap_t *map, int size, int max_value) { int array[size]; - size_t before; + size_t i, before; setup_test_array(array, size, max_value); shuffle(array, size); before = sparsemap_count(map); - for (int i = 0; i < size; i++) { + for (i = 0; i < size; i++) { sparsemap_set(map, array[i]); bool set = sparsemap_is_set(map, array[i]); assert_true(set); } assert_true(sparsemap_count(map) == before + size); + + return i; } static void * @@ -350,6 +362,7 @@ test_api_get_capacity_setup(const MunitParameter params[], void *user_data) sparsemap_init(map, buf, 1024); populate_map(map, 1024, 3 * 1024); + populate_map_rle(map, 3 * 1024, 5, 4096); return (void *)map; } @@ -407,6 +420,13 @@ test_api_is_set(const MunitParameter params[], void *data) sparsemap_set(map, 42); assert_true(sparsemap_is_set(map, 42)); + sparsemap_clear(map); + size_t n = populate_map_rle(map, 0, 10, 2718); + + for (size_t i = 0; i < n; i++) { + assert_true(sparsemap_is_set(map, i)); + } + return MUNIT_OK; } @@ -460,6 +480,7 @@ test_api_get_size_setup(const MunitParameter params[], void *user_data) sparsemap_init(map, buf, 1024); populate_map(map, 1024, 3 * 1024); + populate_map_rle(map, 3 * 1024, 5, 4096); return (void *)map; } @@ -528,6 +549,10 @@ test_api_count(const MunitParameter params[], void *data) } assert_true(sparsemap_count(map) == 512); + sparsemap_clear(map); + size_t n = populate_map_rle(map, 3 * 1024, 7, 4001); + assert_true(sparsemap_count(map) == n); + sparsemap_clear(map); assert_true(sparsemap_count(map) == 0); @@ -653,6 +678,16 @@ test_api_get_end_offset(const MunitParameter params[], void *data) sparsemap_set(map, 13012); assert_true(sparsemap_get_ending_offset(map) == 13012); + sparsemap_clear(map); + size_t n = populate_map_rle(map, 13012, 10, 2718); + size_t exp = n + 13012 - 1; + size_t eoff = sparsemap_get_ending_offset(map); + assert_true(sparsemap_get_ending_offset(map) == 13012 + n - 1); + fprintf(stdout, "\n%s\n", QCC_showSparsemap(map, 0)); + sparsemap_set(map, 13012 + n + 100); + fprintf(stdout, "\n%s\n", QCC_showSparsemap(map, 0)); + assert_true(sparsemap_get_ending_offset(map) == 13112 + n - 1); + return MUNIT_OK; }