adding a merge map function (#6)

Reviewed-on: #6
Co-authored-by: Greg Burd <greg@burd.me>
Co-committed-by: Greg Burd <greg@burd.me>
This commit is contained in:
Gregory Burd 2024-04-30 15:20:48 +00:00 committed by Gregory Burd
parent 4ebe555fac
commit 2dac3ed385
4 changed files with 205 additions and 24 deletions

View file

@ -5,13 +5,13 @@ SHARED_LIB = libsparsemap.so
#CFLAGS = -Wall -Wextra -Wpedantic -Of -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -fPIC
#CFLAGS = -DSPARSEMAP_DIAGNOSTIC -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
#CFLAGS = -Wall -Wextra -Wpedantic -Og -g -fsanitize=all -fhardened -std=c11 -Iinclude/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -O0 -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -Wall -Wextra -Wpedantic -Ofast -g -std=c11 -Iinclude/ -Itests/ -fPIC
#TEST_FLAGS = -DDEBUG -Wall -Wextra -Wpedantic -Og -g -fsanitize=address,leak,object-size,pointer-compare,pointer-subtract,null,return,bounds,pointer-overflow,undefined -fsanitize-address-use-after-scope -std=c11 -Iinclude/ -fPIC
TESTS = tests/test

View file

@ -260,11 +260,22 @@ size_t sparsemap_get_size(sparsemap_t *map);
*/
void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n), size_t skip);
/** @brief Splits the bitmap by assigning all bits starting at \b offset to the
/** @brief Merges the values from \b other into the \b map, \b other is unchanged.
* \b other bitmap while removing them from \b map.
*
* @param[in] map The sparsemap reference.
* @param[in] skip Start the scan after "skip" bits.
* @param[in] other The bitmap to merge into \b map.
*/
void sparsemap_merge(sparsemap_t *map, sparsemap_t *other);
/** @brief Splits the bitmap by assigning all bits starting at \b offset to the
* \b other bitmap while removing them from \b map.
*
* The split must occur on a vector boundary.
*
* @param[in] map The sparsemap reference.
* @param[in] offset The 0-based offset into the bitmap at which to split.
* @param[in] other The bitmap into which we place the split.
*/
void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other);

View file

@ -52,7 +52,7 @@ void __attribute__((format(printf, 4, 5))) __sm_diag_(const char *file, int line
#define __sm_assert(expr) \
if (!(expr)) \
fprintf(stderr, "%s:%d:%s(): assertion failed! %s", __FILE__, __LINE__, __func__, #expr)
fprintf(stderr, "%s:%d:%s(): assertion failed! %s\n", __FILE__, __LINE__, __func__, #expr)
#define __sm_when_diag(expr) \
if (1) \
@ -438,7 +438,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, sm_bi
* SM_BITS_PER_VECTOR
*/
static size_t
__sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *offset, bool value)
__sm_chunk_map_select(__sm_chunk_t *map, size_t n, sparsemap_idx_t *offset, bool value)
{
size_t ret = 0;
register uint8_t *p;
@ -509,12 +509,10 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *offset, bool value)
}
}
}
*offset = (ssize_t)n;
*offset = n;
return ret;
}
extern void print_bits(char *name, uint64_t value); // GSB
/** @brief Counts the bits matching \b value in the range [0, \b idx]
* inclusive after ignoring the first \b offset bits in the chunk.
*
@ -763,7 +761,6 @@ __sm_get_chunk_map_data(sparsemap_t *map, size_t offset)
*
* @param[in] map The sparsemap_t in question.
* @returns a pointer after the end of the used data
* @todo could this simply use m_data_used?
*/
static uint8_t *
__sm_get_chunk_map_end(sparsemap_t *map)
@ -828,7 +825,8 @@ __sm_get_fully_aligned_offset(size_t idx)
*
* @param[in] map The sparsemap_t in question.
* @param[in] idx The index of the chunk map to locate.
* @returns the byte offset of a __sm_chunk_t in m_data.
* @returns the byte offset of a __sm_chunk_t in m_data, or -1 there
* are no chunks.
*/
static size_t
__sm_get_chunk_map_offset(sparsemap_t *map, sparsemap_idx_t idx)
@ -846,7 +844,7 @@ __sm_get_chunk_map_offset(sparsemap_t *map, sparsemap_idx_t idx)
__sm_assert(s == __sm_get_aligned_offset(s));
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p + sizeof(sm_idx_t));
if (s >= idx || (unsigned long)idx < s + __sm_chunk_map_get_capacity(&chunk)) {
if (s >= idx || idx < s + __sm_chunk_map_get_capacity(&chunk)) {
break;
}
p += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&chunk);
@ -1068,13 +1066,12 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
/* Get the __sm_chunk_t which manages this index */
ssize_t offset = __sm_get_chunk_map_offset(map, idx);
bool dont_grow = false;
if (map->m_data_used + sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2 > map->m_capacity) {
errno = ENOSPC;
return SPARSEMAP_IDX_MAX;
}
/* If there is no __sm_chunk_t and the bit is set to zero then return
/* If there are no __sm_chunk_t and the bit is set to zero then return
immediately; otherwise create an initial __sm_chunk_t. */
if (offset == -1) {
if (value == false) {
@ -1138,7 +1135,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
}
size_t size = __sm_chunk_map_get_size(&chunk);
offset += (ssize_t)(sizeof(sm_idx_t) + size);
offset += (sizeof(sm_idx_t) + size);
p += sizeof(sm_idx_t) + size;
uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 };
@ -1171,7 +1168,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
break;
case SM_NEEDS_TO_GROW:
if (!dont_grow) {
offset += (ssize_t)(sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t));
offset += (sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t));
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t));
}
__sm_chunk_map_set(&chunk, idx - start, value, &position, &fill, true);
@ -1183,7 +1180,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2);
__sm_set_chunk_map_count(map, __sm_get_chunk_map_count(map) - 1);
} else {
offset += (ssize_t)(sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t));
offset += (sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t));
__sm_remove_data(map, offset, sizeof(sm_bitvec_t));
}
break;
@ -1216,9 +1213,7 @@ size_t
sparsemap_get_size(sparsemap_t *map)
{
if (map->m_data_used) {
__sm_when_diag({
__sm_assert(map->m_data_used == __sm_get_size_impl(map));
});
__sm_when_diag({ __sm_assert(map->m_data_used == __sm_get_size_impl(map)); });
return map->m_data_used;
}
return map->m_data_used = __sm_get_size_impl(map);
@ -1247,6 +1242,72 @@ sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t), size_t ski
}
}
void
__sm_chunk_map_merge(sparsemap_t *map, sparsemap_idx_t offset, __sm_chunk_t src)
{
size_t capacity = __sm_chunk_map_get_capacity(&src);
for (sparsemap_idx_t j = 0; j < capacity; j++, offset++) {
if (__sm_chunk_map_is_set(&src, j)) {
sparsemap_set(map, offset, true);
}
}
}
void
sparsemap_merge(sparsemap_t *map, sparsemap_t *other)
{
uint8_t *src, *dst;
size_t src_count = __sm_get_chunk_map_count(other), dst_count = __sm_get_chunk_map_count(map), max_chunk_count = src_count + dst_count;
dst = __sm_get_chunk_map_data(map, 0);
src = __sm_get_chunk_map_data(other, 0);
for (size_t i = 0; i < max_chunk_count && src_count; i++) {
sm_idx_t dst_start = *(sm_idx_t *)dst;
sm_idx_t src_start = *(sm_idx_t *)src;
if (src_start > dst_start && dst_count > 0) {
__sm_chunk_t dst_chunk;
__sm_chunk_map_init(&dst_chunk, dst + sizeof(sm_idx_t));
dst += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&dst_chunk);
continue;
}
if (src_start == dst_start && dst_count > 0) {
/* Chunks overlap, merge them. */
__sm_chunk_t src_chunk;
__sm_chunk_map_init(&src_chunk, src + sizeof(sm_idx_t));
__sm_chunk_t dst_chunk;
__sm_chunk_map_init(&dst_chunk, dst + sizeof(sm_idx_t));
__sm_chunk_map_merge(map, src_start, src_chunk);
*(sm_idx_t *)dst = __sm_get_aligned_offset(src_start);
src += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&src_chunk);
dst += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&dst_chunk);
dst_count--;
src_count--;
continue;
}
if (src_start < dst_start || dst_count == 0) {
__sm_chunk_t src_chunk;
__sm_chunk_map_init(&src_chunk, src + sizeof(sm_idx_t));
size_t src_size = __sm_chunk_map_get_size(&src_chunk);
if (dst_count == 0) {
__sm_append_data(map, src, sizeof(sm_idx_t) + src_size);
} else {
size_t offset = __sm_get_chunk_map_offset(map, dst_start);
__sm_insert_data(map, offset, src, sizeof(sm_idx_t) + src_size);
}
/* Update the chunk count and data_used. */
__sm_set_chunk_map_count(map, __sm_get_chunk_map_count(map) + 1);
/* Carry on to the next chunk. */
__sm_chunk_t dst_chunk;
__sm_chunk_map_init(&dst_chunk, dst + sizeof(sm_idx_t));
src += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&src_chunk);
dst += sizeof(sm_idx_t) + __sm_chunk_map_get_size(&dst_chunk);
src_count--;
}
}
}
void
sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
{
@ -1334,7 +1395,7 @@ sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
__sm_chunk_map_set_capacity(&s_chunk, offset % capacity);
}
/* Now continue with all remaining minimaps. */
/* Now continue with all remaining chunk maps. */
for (; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)src;
src += sizeof(sm_idx_t);
@ -1383,7 +1444,7 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value)
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);
ssize_t new_n = (ssize_t)n;
ssize_t new_n = n;
size_t index = __sm_chunk_map_select(&chunk, n, &new_n, value);
if (new_n == -1) {
return start + index;

View file

@ -605,6 +605,114 @@ test_api_split(const MunitParameter params[], void *data)
return MUNIT_OK;
}
static void *
test_api_merge_setup(const MunitParameter params[], void *user_data)
{
(void)params;
(void)user_data;
sparsemap_t *map = sparsemap(10 * 1024);
assert_ptr_not_null(map);
return (void *)map;
}
static void
test_api_merge_tear_down(void *fixture)
{
sparsemap_t *map = (sparsemap_t *)fixture;
assert_ptr_not_null(map);
munit_free(map);
}
static MunitResult
test_api_merge(const MunitParameter params[], void *data)
{
sparsemap_t *map = (sparsemap_t *)data;
sparsemap_t *other = sparsemap(1024);
(void)params;
assert_ptr_not_null(map);
assert_ptr_not_null(other);
// Merge two empty maps to get an empty map.
sparsemap_merge(map, other);
// Merge a single set bit in the first chunk into the empty map.
sparsemap_set(other, 0, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(other, 0));
assert_true(sparsemap_is_set(map, 0));
sparsemap_clear(map);
sparsemap_clear(other);
// Merge two maps with the same single bit set.
sparsemap_set(map, 0, true);
sparsemap_set(other, 0, true);
sparsemap_merge(map, other);
sparsemap_clear(map);
sparsemap_clear(other);
// Merge an empty map with one that has the first bit set.
sparsemap_set(map, 0, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 0));
sparsemap_clear(map);
sparsemap_clear(other);
sparsemap_set(other, 2049, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 2049));
sparsemap_clear(map);
sparsemap_clear(other);
sparsemap_set(other, 1, true);
sparsemap_set(other, 2049, true);
sparsemap_set(map, 2050, true);
sparsemap_set(other, 4097, true);
sparsemap_set(other, 8193, true);
sparsemap_merge(map, other);
assert_true(sparsemap_is_set(map, 1));
assert_true(sparsemap_is_set(map, 2049));
assert_true(sparsemap_is_set(map, 2050));
assert_true(sparsemap_is_set(map, 4097));
assert_true(sparsemap_is_set(map, 8193));
for (int i = 0; i < 10000; i++) {
if (i == 2049 || i == 1 || i == 2050 || i == 4097 || i == 8193)
continue;
else
assert_false(sparsemap_is_set(map, i));
}
sparsemap_clear(map);
sparsemap_clear(other);
sparsemap_set(map, 0, true);
sparsemap_set(map, 2048, true);
sparsemap_set(map, 2049, true);
sparsemap_set(map, 8193, true);
for (int i = 2049; i < 4096; i++) {
sparsemap_set(other, i, true);
}
sparsemap_merge(map, other);
assert(sparsemap_is_set(map, 0));
assert(sparsemap_is_set(map, 2048));
assert(sparsemap_is_set(map, 8193));
for (int i = 2049; i < 4096; i++) {
assert(sparsemap_is_set(map, i));
}
free(other);
return MUNIT_OK;
}
static void *
test_api_select_setup(const MunitParameter params[], void *user_data)
{
@ -919,6 +1027,7 @@ static MunitTest api_test_suite[] = {
{ (char *)"/get_size", test_api_get_size, test_api_get_size_setup, test_api_get_size_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/scan", test_api_scan, test_api_scan_setup, test_api_scan_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/split", test_api_split, test_api_split_setup, test_api_split_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/merge", test_api_merge, test_api_merge_setup, test_api_merge_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
{ (char *)"/select/true", test_api_select, test_api_select_setup, test_api_select_tear_down, MUNIT_TEST_OPTION_NONE, NULL },
#ifdef SELECT_FALSE
{ (char *)"/select/false", test_api_select_false, test_api_select_false_setup, test_api_select_false_tear_down, MUNIT_TEST_OPTION_NONE, NULL },