From e641e6cc631504d0a21713a8f6cc1f28e488d775 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 10 May 2024 20:25:08 +0000 Subject: [PATCH] WIP --- .idx/dev.nix | 2 +- src/sparsemap.c | 68 ++++++++++++++++++++++++++++++------------------- tests/soak.c | 2 +- tests/test.c | 6 ++--- 4 files changed, 47 insertions(+), 31 deletions(-) diff --git a/.idx/dev.nix b/.idx/dev.nix index b909aa2..0c7da71 100644 --- a/.idx/dev.nix +++ b/.idx/dev.nix @@ -38,6 +38,7 @@ idx = { # Search for the extensions you want on https://open-vsx.org/ and use "publisher.id" extensions = [ + "asvetliakov.vscode-neovim" "coolbear.systemd-unit-file" "dotjoshjohnson.xml" "editorconfig.editorconfig" @@ -66,7 +67,6 @@ "vscodevim.vim" "yzhang.markdown-all-in-one" "znck.grammarly" - #"asvetliakov.vscode-neovim" #"jnoortheen.nix-ide" ]; # Enable previews diff --git a/src/sparsemap.c b/src/sparsemap.c index 8251a66..4831d02 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -423,30 +423,6 @@ __sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, sm_bitv return SM_OK; } -/** @brief Merges into the chunk at \b offset all set bits from \b src. - * - * @param[in] chunk The chunk in question. - * @param[in] offset The offset of the first bit in the chunk to be merged. - * @todo merge vectors rather than call sparsemap_set() in a loop - */ -void -__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t offset, __sm_chunk_t dst_chunk, __sm_chunk_t src_chunk) -{ - size_t src_capacity = __sm_chunk_get_capacity(&src_chunk); - (void)dst_chunk; -#if 0 - size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk); - if (dst_capacity < src_capacity) { - __sm_chunk_set_capacity(&dst_chunk, src_capacity); - } -#endif - for (sparsemap_idx_t j = 0; j < src_capacity; j++) { - if (__sm_chunk_is_set(&src_chunk, j)) { - sparsemap_set(map, offset + j, true); - } - } -} - /** @brief Finds the index of the \b n'th bit after \b offset bits with \b * value. * @@ -948,6 +924,37 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) map->m_data_used -= gap_size; } +/** @brief Merges into the chunk at \b offset all set bits from \b src. + * + * @param[in] chunk The chunk in question. + * @param[in] offset The offset of the first bit in the chunk to be merged. + * @todo merge vectors rather than call sparsemap_set() in a loop + */ +void +__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t idx, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, __sm_chunk_t *src_chunk) +{ + int rc; + for (sparsemap_idx_t j = 0; j < capacity; j++) { + bool retried = false; + size_t position; + sm_bitvec_t fill; + if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j)) { + retry:; + rc = __sm_chunk_set(dst_chunk, j, true, &position, &fill, retried); + if (rc == SM_NEEDS_TO_GROW) { + sparsemap_idx_t offset = __sm_get_chunk_offset(map, j + idx); + offset += sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t); + __sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t)); + if (!retried) { + retried = true; + goto retry; + } + } + __sm_assert(rc == SM_OK); + } + } +} + /* * The following is the "Sparsemap" implementation, it uses chunks (code above) * and is the public API for this compressed bitmap representation. @@ -1410,7 +1417,8 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) size_t src_count = __sm_get_chunk_count(source); size_t dst_count = __sm_get_chunk_count(destination); size_t max_chunk_count = src_count + dst_count; - ssize_t remaining_capacity = destination->m_capacity - (source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); + ssize_t remaining_capacity = destination->m_capacity - destination->m_data_used - + (source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); /* Estimate worst-case overhead required for merge. */ if (remaining_capacity <= 0) { @@ -1438,7 +1446,15 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); __sm_chunk_t dst_chunk; __sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t)); - __sm_merge_chunk(destination, *(sm_idx_t *)src, dst_chunk, src_chunk); + size_t src_capacity = __sm_chunk_get_capacity(&src_chunk); + size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk); + if (dst_capacity < src_capacity) { + __sm_chunk_set_capacity(&dst_chunk, src_capacity); + } + if (*(sm_idx_t *)dst > *(sm_idx_t *)src) { + *(sm_idx_t *)dst = *(sm_idx_t *)src; + } + __sm_merge_chunk(destination, src_start, src_capacity, &dst_chunk, &src_chunk); src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); dst_count--; diff --git a/tests/soak.c b/tests/soak.c index 651b796..595cd17 100644 --- a/tests/soak.c +++ b/tests/soak.c @@ -6,9 +6,9 @@ #include #include -#include "../include/sparsemap.h" #include "../include/common.h" #include "../include/roaring.h" +#include "../include/sparsemap.h" #include "../include/tdigest.h" /* midl.h ------------------------------------------------------------------ */ diff --git a/tests/test.c b/tests/test.c index 652b212..0513539 100644 --- a/tests/test.c +++ b/tests/test.c @@ -870,13 +870,13 @@ test_api_merge(const MunitParameter params[], void *data) sparsemap_set(map, 0, true); sparsemap_set(map, 2048, true); - sparsemap_set(map, 2049, true); sparsemap_set(map, 8193, true); for (int i = 2049; i < 4096; i++) { sparsemap_set(other, i, true); } sparsemap_merge(map, other); + assert(sparsemap_is_set(map, 0)); assert(sparsemap_is_set(map, 2048)); assert(sparsemap_is_set(map, 8193)); @@ -1246,7 +1246,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data) errno = 0; } i += l; - /* ANSI esc code to clear line, carrage return, then print on the same line */ + /* ANSI esc code to clear line, carriage return, then print on the same line */ // printf("\033[2K\r%d", i); // printf("%d\t%d\n", l, i); } @@ -1346,7 +1346,7 @@ test_scale_spans_come_spans_go_tear_down(void *fixture) static MunitResult test_scale_spans_come_spans_go(const MunitParameter params[], void *data) { - size_t amt = 8192; // 268435456; // ~5e7 interations due to 2e9 / avg(l) + size_t amt = 8192; // 268435456; // ~5e7 iterations due to 2e9 / avg(l) sparsemap_t *map = (sparsemap_t *)data; (void)params;