diff --git a/.idea/customTargets.xml b/.idea/customTargets.xml new file mode 100644 index 0000000..852c612 --- /dev/null +++ b/.idea/customTargets.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/makefile.xml b/.idea/makefile.xml new file mode 100644 index 0000000..c64d08d --- /dev/null +++ b/.idea/makefile.xml @@ -0,0 +1,25 @@ + + + + + + + + + tests/soak + + + + + + + + + clean + + + + + + + \ No newline at end of file diff --git a/Makefile b/Makefile index b553734..8ed2f3f 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,8 @@ SHARED_LIB = libsparsemap.so LIBS = -lm #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC -CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC -#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC +#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC +CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC @@ -14,8 +14,8 @@ CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC -#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC -TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC +#TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC +TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC TESTS = tests/test tests/soak @@ -50,7 +50,7 @@ soak: tests env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB) - $(CC) $^ -o $@ $(TEST_FLAGS) + $(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS) clean: rm -f $(OBJS) diff --git a/include/sparsemap.h b/include/sparsemap.h index f89982c..8378690 100644 --- a/include/sparsemap.h +++ b/include/sparsemap.h @@ -297,14 +297,16 @@ sparsemap_idx_t sparsemap_get_ending_offset(sparsemap_t *map); */ void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux); -/** @brief Merges the values from \b other into \b map, \b other is unchanged. +/** @brief Merges the values from \b source into \b destination, \b source is unchanged. * - * @param[in] map The sparsemap reference. - * @param[in] other The bitmap to merge into \b map. + * Efficiently adds all set bits from \b source into \b destination. + * + * @param[in] destination The sparsemap reference into which we will merge \b source. + * @param[in] source The bitmap to merge into \b destination. * @returns 0 on success, or sets errno to ENOSPC and returns the amount of * additional space required to successfully merge the maps. */ -int sparsemap_merge(sparsemap_t *map, sparsemap_t *other); +int sparsemap_merge(sparsemap_t *destination, sparsemap_t *source); /** @brief Splits the bitmap by assigning all bits starting at \b offset to the * \b other bitmap while removing them from \b map. diff --git a/src/sparsemap.c b/src/sparsemap.c index f76061e..8251a66 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -228,9 +228,9 @@ static void __sm_chunk_set_capacity(__sm_chunk_t *chunk, size_t capacity) { __sm_assert(capacity % SM_BITS_PER_VECTOR == 0); - __sm_assert(capacity < SM_CHUNK_MAX_CAPACITY); + __sm_assert(capacity <= SM_CHUNK_MAX_CAPACITY); - if (capacity >= SM_CHUNK_MAX_CAPACITY) { + if (capacity > SM_CHUNK_MAX_CAPACITY) { return; } @@ -426,15 +426,23 @@ __sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, sm_bitv /** @brief Merges into the chunk at \b offset all set bits from \b src. * * @param[in] chunk The chunk in question. - * @param[in] offset The fully aligned offset of the chunk to be merged. + * @param[in] offset The offset of the first bit in the chunk to be merged. + * @todo merge vectors rather than call sparsemap_set() in a loop */ void -__sm_merge_chunk(sparsemap_t *chunk, sparsemap_idx_t offset, __sm_chunk_t src) +__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t offset, __sm_chunk_t dst_chunk, __sm_chunk_t src_chunk) { - size_t capacity = __sm_chunk_get_capacity(&src); - for (sparsemap_idx_t j = 0; j < capacity; j++, offset++) { - if (__sm_chunk_is_set(&src, j)) { - sparsemap_set(chunk, offset, true); + size_t src_capacity = __sm_chunk_get_capacity(&src_chunk); + (void)dst_chunk; +#if 0 + size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk); + if (dst_capacity < src_capacity) { + __sm_chunk_set_capacity(&dst_chunk, src_capacity); + } +#endif + for (sparsemap_idx_t j = 0; j < src_capacity; j++) { + if (__sm_chunk_is_set(&src_chunk, j)) { + sparsemap_set(map, offset + j, true); } } } @@ -1125,7 +1133,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value) __sm_append_data(map, &buf[0], sizeof(buf)); uint8_t *p = __sm_get_chunk_data(map, 0); - *(sm_idx_t *)p = __sm_get_chunk_aligned_offset(idx); // TODO: this was get_vector_aligned_offset + *(sm_idx_t *)p = __sm_get_vector_aligned_offset(idx); __sm_set_chunk_count(map, 1); @@ -1396,22 +1404,22 @@ sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t, void *aux), } int -sparsemap_merge(sparsemap_t *map, sparsemap_t *other) +sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) { uint8_t *src, *dst; - size_t src_count = __sm_get_chunk_count(other); - size_t dst_count = __sm_get_chunk_count(map); + size_t src_count = __sm_get_chunk_count(source); + size_t dst_count = __sm_get_chunk_count(destination); size_t max_chunk_count = src_count + dst_count; - ssize_t difference = map->m_capacity - (other->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); + ssize_t remaining_capacity = destination->m_capacity - (source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); /* Estimate worst-case overhead required for merge. */ - if (difference <= 0) { + if (remaining_capacity <= 0) { errno = ENOSPC; - return -difference; + return -remaining_capacity; } - dst = __sm_get_chunk_data(map, 0); - src = __sm_get_chunk_data(other, 0); + src = __sm_get_chunk_data(source, 0); + dst = __sm_get_chunk_data(destination, 0); for (size_t i = 0; i < max_chunk_count && src_count; i++) { sm_idx_t src_start = *(sm_idx_t *)src; sm_idx_t dst_start = *(sm_idx_t *)dst; @@ -1430,8 +1438,7 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other) __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); __sm_chunk_t dst_chunk; __sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t)); - __sm_merge_chunk(map, src_start, src_chunk); - *(sm_idx_t *)dst = __sm_get_chunk_aligned_offset(src_start); + __sm_merge_chunk(destination, *(sm_idx_t *)src, dst_chunk, src_chunk); src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); dst_count--; @@ -1443,14 +1450,14 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other) __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); size_t src_size = __sm_chunk_get_size(&src_chunk); if (dst_count == 0) { - __sm_append_data(map, src, sizeof(sm_idx_t) + src_size); + __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size); } else { - size_t offset = __sm_get_chunk_offset(map, dst_start); - __sm_insert_data(map, offset, src, sizeof(sm_idx_t) + src_size); + size_t offset = __sm_get_chunk_offset(destination, dst_start); + __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size); } /* Update the chunk count and data_used. */ - __sm_set_chunk_count(map, __sm_get_chunk_count(map) + 1); + __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); /* Carry on to the next chunk. */ __sm_chunk_t dst_chunk; @@ -1528,7 +1535,7 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other) uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 }; memcpy(dst, &buf[0], sizeof(buf)); - *(sm_idx_t *)dst = __sm_get_chunk_aligned_offset(offset); //TODO: was simply "offset" + *(sm_idx_t *)dst = __sm_get_vector_aligned_offset(offset); // TODO: was simply "offset" dst += sizeof(sm_idx_t); /* the |other| sparsemap_t now has one additional chunk */ @@ -1551,7 +1558,7 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other) for (size_t j = offset % capacity; j < capacity; j++, d++) { if (__sm_chunk_is_set(&s_chunk, j)) { sparsemap_set(other, d, true); - sparsemap_set(map, d, false); //TODO remove, and fix set_capacity below + sparsemap_set(map, d, false); // TODO remove, and fix set_capacity below } } diff --git a/tests/soak.c b/tests/soak.c index 595cd17..651b796 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -6,9 +6,9 @@ #include #include +#include "../include/sparsemap.h" #include "../include/common.h" #include "../include/roaring.h" -#include "../include/sparsemap.h" #include "../include/tdigest.h" /* midl.h ------------------------------------------------------------------ */ diff --git a/tests/test.c b/tests/test.c index 61a9565..652b212 100644 --- a/tests/test.c +++ b/tests/test.c @@ -749,7 +749,7 @@ test_api_split(const MunitParameter params[], void *data) } sparsemap_idx_t offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion); - for (int i = 0; i < offset; i++) { + for (size_t i = 0; i < offset; i++) { assert_true(sparsemap_is_set(map, i)); assert_false(sparsemap_is_set(&portion, i)); } @@ -767,7 +767,7 @@ test_api_split(const MunitParameter params[], void *data) } offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion); - for (int i = 0; i < offset - 24; i++) { + for (size_t i = 0; i < offset - 24; i++) { assert_true(sparsemap_is_set(map, i + 24)); assert_false(sparsemap_is_set(&portion, i + 24)); } @@ -842,11 +842,11 @@ test_api_merge(const MunitParameter params[], void *data) sparsemap_clear(map); sparsemap_clear(other); - sparsemap_set(other, 1, true); sparsemap_set(other, 2049, true); sparsemap_set(map, 2050, true); sparsemap_set(other, 4097, true); + sparsemap_set(map, 6113, true); sparsemap_set(other, 8193, true); sparsemap_merge(map, other); @@ -855,10 +855,11 @@ test_api_merge(const MunitParameter params[], void *data) assert_true(sparsemap_is_set(map, 2049)); assert_true(sparsemap_is_set(map, 2050)); assert_true(sparsemap_is_set(map, 4097)); + assert_true(sparsemap_is_set(map, 6113)); assert_true(sparsemap_is_set(map, 8193)); for (int i = 0; i < 10000; i++) { - if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 8193) + if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 6113 || i == 8193) continue; else assert_false(sparsemap_is_set(map, i));