Merge pull request 'gburd/full-merge' (#8) from gburd/full-merge into main

Reviewed-on: #8
This commit is contained in:
Gregory Burd 2024-05-11 01:26:44 +00:00
commit 7a572453c9
6 changed files with 121 additions and 51 deletions

15
.idea/customTargets.xml Normal file
View file

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CLionExternalBuildManager">
<target id="db0ccaeb-4851-470b-83d0-afa663f6ceb9" name="tests/soak" defaultType="MAKE">
<configuration id="98973a90-a9d0-431b-9071-9ce6960b0b01" name="tests/soak">
<build type="MAKE">
<make targetName="tests/soak" />
</build>
<clean type="MAKE">
<make targetName="clean" />
</clean>
</configuration>
</target>
</component>
</project>

25
.idea/makefile.xml Normal file
View file

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MakefileBuildTargetsManager">
<user-build-targets>
<build-target name="tests/soak">
<build-configurations>
<build-configuration>
<make-targets>
<make-target>tests/soak</make-target>
</make-targets>
</build-configuration>
</build-configurations>
</build-target>
<build-target name="clean">
<build-configurations>
<build-configuration>
<make-targets>
<make-target>clean</make-target>
</make-targets>
</build-configuration>
</build-configurations>
</build-target>
</user-build-targets>
</component>
</project>

View file

@ -5,8 +5,8 @@ SHARED_LIB = libsparsemap.so
LIBS = -lm LIBS = -lm
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
@ -14,8 +14,8 @@ CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC #CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC #TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC TEST_FLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC #TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
TESTS = tests/test tests/soak TESTS = tests/test tests/soak
@ -50,7 +50,7 @@ soak: tests
env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak env ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=verbosity=1:log_threads=1 ./tests/soak
tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB) tests/test: $(TEST_OBJS) $(LIB_OBJS) $(STATIC_LIB)
$(CC) $^ -o $@ $(TEST_FLAGS) $(CC) $^ $(LIBS) -o $@ $(TEST_FLAGS)
clean: clean:
rm -f $(OBJS) rm -f $(OBJS)

View file

@ -297,14 +297,16 @@ sparsemap_idx_t sparsemap_get_ending_offset(sparsemap_t *map);
*/ */
void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux); void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n, void *aux), size_t skip, void *aux);
/** @brief Merges the values from \b other into \b map, \b other is unchanged. /** @brief Merges the values from \b source into \b destination, \b source is unchanged.
* *
* @param[in] map The sparsemap reference. * Efficiently adds all set bits from \b source into \b destination.
* @param[in] other The bitmap to merge into \b map. *
* @param[in] destination The sparsemap reference into which we will merge \b source.
* @param[in] source The bitmap to merge into \b destination.
* @returns 0 on success, or sets errno to ENOSPC and returns the amount of * @returns 0 on success, or sets errno to ENOSPC and returns the amount of
* additional space required to successfully merge the maps. * additional space required to successfully merge the maps.
*/ */
int sparsemap_merge(sparsemap_t *map, sparsemap_t *other); int sparsemap_merge(sparsemap_t *destination, sparsemap_t *source);
/** @brief Splits the bitmap by assigning all bits starting at \b offset to the /** @brief Splits the bitmap by assigning all bits starting at \b offset to the
* \b other bitmap while removing them from \b map. * \b other bitmap while removing them from \b map.

View file

@ -228,9 +228,9 @@ static void
__sm_chunk_set_capacity(__sm_chunk_t *chunk, size_t capacity) __sm_chunk_set_capacity(__sm_chunk_t *chunk, size_t capacity)
{ {
__sm_assert(capacity % SM_BITS_PER_VECTOR == 0); __sm_assert(capacity % SM_BITS_PER_VECTOR == 0);
__sm_assert(capacity < SM_CHUNK_MAX_CAPACITY); __sm_assert(capacity <= SM_CHUNK_MAX_CAPACITY);
if (capacity >= SM_CHUNK_MAX_CAPACITY) { if (capacity > SM_CHUNK_MAX_CAPACITY) {
return; return;
} }
@ -423,22 +423,6 @@ __sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, sm_bitv
return SM_OK; return SM_OK;
} }
/** @brief Merges into the chunk at \b offset all set bits from \b src.
*
* @param[in] chunk The chunk in question.
* @param[in] offset The fully aligned offset of the chunk to be merged.
*/
void
__sm_merge_chunk(sparsemap_t *chunk, sparsemap_idx_t offset, __sm_chunk_t src)
{
size_t capacity = __sm_chunk_get_capacity(&src);
for (sparsemap_idx_t j = 0; j < capacity; j++, offset++) {
if (__sm_chunk_is_set(&src, j)) {
sparsemap_set(chunk, offset, true);
}
}
}
/** @brief Finds the index of the \b n'th bit after \b offset bits with \b /** @brief Finds the index of the \b n'th bit after \b offset bits with \b
* value. * value.
* *
@ -940,6 +924,41 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
map->m_data_used -= gap_size; map->m_data_used -= gap_size;
} }
/** @brief Merges into the chunk at \b offset all set bits from \b src.
*
* @param[in] chunk The chunk in question.
* @param[in] offset The offset of the first bit in the chunk to be merged.
* @todo merge vectors rather than call sparsemap_set() in a loop
*/
void
__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t idx, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, __sm_chunk_t *src_chunk)
{
for (sparsemap_idx_t j = 0; j < capacity; j++) {
sparsemap_idx_t offset = __sm_get_chunk_offset(map, idx + j);
if (__sm_chunk_is_set(src_chunk, j)) {
size_t position;
sm_bitvec_t fill;
switch (__sm_chunk_set(dst_chunk, j, true, &position, &fill, false)) {
case SM_NEEDS_TO_GROW:
offset += sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t);
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t));
__sm_chunk_set(dst_chunk, j, true, &position, &fill, true);
break;
case SM_NEEDS_TO_SHRINK:
if (__sm_chunk_is_empty(src_chunk)) {
__sm_assert(position == 1);
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2);
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
} else {
offset += sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t);
__sm_remove_data(map, offset, sizeof(sm_bitvec_t));
}
break;
}
}
}
}
/* /*
* The following is the "Sparsemap" implementation, it uses chunks (code above) * The following is the "Sparsemap" implementation, it uses chunks (code above)
* and is the public API for this compressed bitmap representation. * and is the public API for this compressed bitmap representation.
@ -1125,7 +1144,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
__sm_append_data(map, &buf[0], sizeof(buf)); __sm_append_data(map, &buf[0], sizeof(buf));
uint8_t *p = __sm_get_chunk_data(map, 0); uint8_t *p = __sm_get_chunk_data(map, 0);
*(sm_idx_t *)p = __sm_get_chunk_aligned_offset(idx); // TODO: this was get_vector_aligned_offset *(sm_idx_t *)p = __sm_get_vector_aligned_offset(idx);
__sm_set_chunk_count(map, 1); __sm_set_chunk_count(map, 1);
@ -1396,22 +1415,23 @@ sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t, void *aux),
} }
int int
sparsemap_merge(sparsemap_t *map, sparsemap_t *other) sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
{ {
uint8_t *src, *dst; uint8_t *src, *dst;
size_t src_count = __sm_get_chunk_count(other); size_t src_count = __sm_get_chunk_count(source);
size_t dst_count = __sm_get_chunk_count(map); size_t dst_count = __sm_get_chunk_count(destination);
size_t max_chunk_count = src_count + dst_count; size_t max_chunk_count = src_count + dst_count;
ssize_t difference = map->m_capacity - (other->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); ssize_t remaining_capacity = destination->m_capacity - destination->m_data_used -
(source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2));
/* Estimate worst-case overhead required for merge. */ /* Estimate worst-case overhead required for merge. */
if (difference <= 0) { if (remaining_capacity <= 0) {
errno = ENOSPC; errno = ENOSPC;
return -difference; return -remaining_capacity;
} }
dst = __sm_get_chunk_data(map, 0); src = __sm_get_chunk_data(source, 0);
src = __sm_get_chunk_data(other, 0); dst = __sm_get_chunk_data(destination, 0);
for (size_t i = 0; i < max_chunk_count && src_count; i++) { for (size_t i = 0; i < max_chunk_count && src_count; i++) {
sm_idx_t src_start = *(sm_idx_t *)src; sm_idx_t src_start = *(sm_idx_t *)src;
sm_idx_t dst_start = *(sm_idx_t *)dst; sm_idx_t dst_start = *(sm_idx_t *)dst;
@ -1430,8 +1450,15 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other)
__sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t));
__sm_chunk_t dst_chunk; __sm_chunk_t dst_chunk;
__sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t)); __sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t));
__sm_merge_chunk(map, src_start, src_chunk); size_t src_capacity = __sm_chunk_get_capacity(&src_chunk);
*(sm_idx_t *)dst = __sm_get_chunk_aligned_offset(src_start); size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
if (dst_capacity < src_capacity) {
__sm_chunk_set_capacity(&dst_chunk, src_capacity);
}
if (*(sm_idx_t *)dst > *(sm_idx_t *)src) {
*(sm_idx_t *)dst = *(sm_idx_t *)src;
}
__sm_merge_chunk(destination, src_start, src_capacity, &dst_chunk, &src_chunk);
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk);
dst_count--; dst_count--;
@ -1443,14 +1470,14 @@ sparsemap_merge(sparsemap_t *map, sparsemap_t *other)
__sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t));
size_t src_size = __sm_chunk_get_size(&src_chunk); size_t src_size = __sm_chunk_get_size(&src_chunk);
if (dst_count == 0) { if (dst_count == 0) {
__sm_append_data(map, src, sizeof(sm_idx_t) + src_size); __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size);
} else { } else {
size_t offset = __sm_get_chunk_offset(map, dst_start); size_t offset = __sm_get_chunk_offset(destination, dst_start);
__sm_insert_data(map, offset, src, sizeof(sm_idx_t) + src_size); __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
} }
/* Update the chunk count and data_used. */ /* Update the chunk count and data_used. */
__sm_set_chunk_count(map, __sm_get_chunk_count(map) + 1); __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
/* Carry on to the next chunk. */ /* Carry on to the next chunk. */
__sm_chunk_t dst_chunk; __sm_chunk_t dst_chunk;
@ -1528,7 +1555,7 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 }; uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 };
memcpy(dst, &buf[0], sizeof(buf)); memcpy(dst, &buf[0], sizeof(buf));
*(sm_idx_t *)dst = __sm_get_chunk_aligned_offset(offset); //TODO: was simply "offset" *(sm_idx_t *)dst = __sm_get_vector_aligned_offset(offset); // TODO: was simply "offset"
dst += sizeof(sm_idx_t); dst += sizeof(sm_idx_t);
/* the |other| sparsemap_t now has one additional chunk */ /* the |other| sparsemap_t now has one additional chunk */
@ -1551,7 +1578,7 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
for (size_t j = offset % capacity; j < capacity; j++, d++) { for (size_t j = offset % capacity; j < capacity; j++, d++) {
if (__sm_chunk_is_set(&s_chunk, j)) { if (__sm_chunk_is_set(&s_chunk, j)) {
sparsemap_set(other, d, true); sparsemap_set(other, d, true);
sparsemap_set(map, d, false); //TODO remove, and fix set_capacity below sparsemap_set(map, d, false); // TODO remove, and fix set_capacity below
} }
} }

View file

@ -749,7 +749,7 @@ test_api_split(const MunitParameter params[], void *data)
} }
sparsemap_idx_t offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion); sparsemap_idx_t offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion);
for (int i = 0; i < offset; i++) { for (size_t i = 0; i < offset; i++) {
assert_true(sparsemap_is_set(map, i)); assert_true(sparsemap_is_set(map, i));
assert_false(sparsemap_is_set(&portion, i)); assert_false(sparsemap_is_set(&portion, i));
} }
@ -767,7 +767,7 @@ test_api_split(const MunitParameter params[], void *data)
} }
offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion); offset = sparsemap_split(map, SPARSEMAP_IDX_MAX, &portion);
for (int i = 0; i < offset - 24; i++) { for (size_t i = 0; i < offset - 24; i++) {
assert_true(sparsemap_is_set(map, i + 24)); assert_true(sparsemap_is_set(map, i + 24));
assert_false(sparsemap_is_set(&portion, i + 24)); assert_false(sparsemap_is_set(&portion, i + 24));
} }
@ -842,11 +842,11 @@ test_api_merge(const MunitParameter params[], void *data)
sparsemap_clear(map); sparsemap_clear(map);
sparsemap_clear(other); sparsemap_clear(other);
sparsemap_set(other, 1, true); sparsemap_set(other, 1, true);
sparsemap_set(other, 2049, true); sparsemap_set(other, 2049, true);
sparsemap_set(map, 2050, true); sparsemap_set(map, 2050, true);
sparsemap_set(other, 4097, true); sparsemap_set(other, 4097, true);
sparsemap_set(map, 6113, true);
sparsemap_set(other, 8193, true); sparsemap_set(other, 8193, true);
sparsemap_merge(map, other); sparsemap_merge(map, other);
@ -855,10 +855,11 @@ test_api_merge(const MunitParameter params[], void *data)
assert_true(sparsemap_is_set(map, 2049)); assert_true(sparsemap_is_set(map, 2049));
assert_true(sparsemap_is_set(map, 2050)); assert_true(sparsemap_is_set(map, 2050));
assert_true(sparsemap_is_set(map, 4097)); assert_true(sparsemap_is_set(map, 4097));
assert_true(sparsemap_is_set(map, 6113));
assert_true(sparsemap_is_set(map, 8193)); assert_true(sparsemap_is_set(map, 8193));
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 8193) if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 6113 || i == 8193)
continue; continue;
else else
assert_false(sparsemap_is_set(map, i)); assert_false(sparsemap_is_set(map, i));
@ -869,13 +870,13 @@ test_api_merge(const MunitParameter params[], void *data)
sparsemap_set(map, 0, true); sparsemap_set(map, 0, true);
sparsemap_set(map, 2048, true); sparsemap_set(map, 2048, true);
sparsemap_set(map, 2049, true);
sparsemap_set(map, 8193, true); sparsemap_set(map, 8193, true);
for (int i = 2049; i < 4096; i++) { for (int i = 2049; i < 4096; i++) {
sparsemap_set(other, i, true); sparsemap_set(other, i, true);
} }
sparsemap_merge(map, other); sparsemap_merge(map, other);
assert(sparsemap_is_set(map, 0)); assert(sparsemap_is_set(map, 0));
assert(sparsemap_is_set(map, 2048)); assert(sparsemap_is_set(map, 2048));
assert(sparsemap_is_set(map, 8193)); assert(sparsemap_is_set(map, 8193));
@ -1245,7 +1246,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data)
errno = 0; errno = 0;
} }
i += l; i += l;
/* ANSI esc code to clear line, carrage return, then print on the same line */ /* ANSI esc code to clear line, carriage return, then print on the same line */
// printf("\033[2K\r%d", i); // printf("\033[2K\r%d", i);
// printf("%d\t%d\n", l, i); // printf("%d\t%d\n", l, i);
} }
@ -1345,7 +1346,7 @@ test_scale_spans_come_spans_go_tear_down(void *fixture)
static MunitResult static MunitResult
test_scale_spans_come_spans_go(const MunitParameter params[], void *data) test_scale_spans_come_spans_go(const MunitParameter params[], void *data)
{ {
size_t amt = 8192; // 268435456; // ~5e7 interations due to 2e9 / avg(l) size_t amt = 8192; // 268435456; // ~5e7 iterations due to 2e9 / avg(l)
sparsemap_t *map = (sparsemap_t *)data; sparsemap_t *map = (sparsemap_t *)data;
(void)params; (void)params;