gburd/full-merge #8

Merged
greg merged 5 commits from gburd/full-merge into main 2024-05-11 01:26:44 +00:00
4 changed files with 47 additions and 31 deletions
Showing only changes of commit e641e6cc63 - Show all commits

View file

@ -38,6 +38,7 @@
idx = {
# Search for the extensions you want on https://open-vsx.org/ and use "publisher.id"
extensions = [
"asvetliakov.vscode-neovim"
"coolbear.systemd-unit-file"
"dotjoshjohnson.xml"
"editorconfig.editorconfig"
@ -66,7 +67,6 @@
"vscodevim.vim"
"yzhang.markdown-all-in-one"
"znck.grammarly"
#"asvetliakov.vscode-neovim"
#"jnoortheen.nix-ide"
];
# Enable previews

View file

@ -423,30 +423,6 @@ __sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, sm_bitv
return SM_OK;
}
/** @brief Merges into the chunk at \b offset all set bits from \b src.
*
* @param[in] chunk The chunk in question.
* @param[in] offset The offset of the first bit in the chunk to be merged.
* @todo merge vectors rather than call sparsemap_set() in a loop
*/
void
__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t offset, __sm_chunk_t dst_chunk, __sm_chunk_t src_chunk)
{
size_t src_capacity = __sm_chunk_get_capacity(&src_chunk);
(void)dst_chunk;
#if 0
size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
if (dst_capacity < src_capacity) {
__sm_chunk_set_capacity(&dst_chunk, src_capacity);
}
#endif
for (sparsemap_idx_t j = 0; j < src_capacity; j++) {
if (__sm_chunk_is_set(&src_chunk, j)) {
sparsemap_set(map, offset + j, true);
}
}
}
/** @brief Finds the index of the \b n'th bit after \b offset bits with \b
* value.
*
@ -948,6 +924,37 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
map->m_data_used -= gap_size;
}
/** @brief Merges into the chunk at \b offset all set bits from \b src.
*
* @param[in] chunk The chunk in question.
* @param[in] offset The offset of the first bit in the chunk to be merged.
* @todo merge vectors rather than call sparsemap_set() in a loop
*/
void
__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t idx, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, __sm_chunk_t *src_chunk)
{
int rc;
for (sparsemap_idx_t j = 0; j < capacity; j++) {
bool retried = false;
size_t position;
sm_bitvec_t fill;
if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j)) {
retry:;
rc = __sm_chunk_set(dst_chunk, j, true, &position, &fill, retried);
if (rc == SM_NEEDS_TO_GROW) {
sparsemap_idx_t offset = __sm_get_chunk_offset(map, j + idx);
offset += sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t);
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t));
if (!retried) {
retried = true;
goto retry;
}
}
__sm_assert(rc == SM_OK);
}
}
}
/*
* The following is the "Sparsemap" implementation, it uses chunks (code above)
* and is the public API for this compressed bitmap representation.
@ -1410,7 +1417,8 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
size_t src_count = __sm_get_chunk_count(source);
size_t dst_count = __sm_get_chunk_count(destination);
size_t max_chunk_count = src_count + dst_count;
ssize_t remaining_capacity = destination->m_capacity - (source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2));
ssize_t remaining_capacity = destination->m_capacity - destination->m_data_used -
(source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2));
/* Estimate worst-case overhead required for merge. */
if (remaining_capacity <= 0) {
@ -1438,7 +1446,15 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
__sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t));
__sm_chunk_t dst_chunk;
__sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t));
__sm_merge_chunk(destination, *(sm_idx_t *)src, dst_chunk, src_chunk);
size_t src_capacity = __sm_chunk_get_capacity(&src_chunk);
size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
if (dst_capacity < src_capacity) {
__sm_chunk_set_capacity(&dst_chunk, src_capacity);
}
if (*(sm_idx_t *)dst > *(sm_idx_t *)src) {
*(sm_idx_t *)dst = *(sm_idx_t *)src;
}
__sm_merge_chunk(destination, src_start, src_capacity, &dst_chunk, &src_chunk);
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk);
dst_count--;

View file

@ -6,9 +6,9 @@
#include <stdlib.h>
#include <string.h>
#include "../include/sparsemap.h"
#include "../include/common.h"
#include "../include/roaring.h"
#include "../include/sparsemap.h"
#include "../include/tdigest.h"
/* midl.h ------------------------------------------------------------------ */

View file

@ -870,13 +870,13 @@ test_api_merge(const MunitParameter params[], void *data)
sparsemap_set(map, 0, true);
sparsemap_set(map, 2048, true);
sparsemap_set(map, 2049, true);
sparsemap_set(map, 8193, true);
for (int i = 2049; i < 4096; i++) {
sparsemap_set(other, i, true);
}
sparsemap_merge(map, other);
assert(sparsemap_is_set(map, 0));
assert(sparsemap_is_set(map, 2048));
assert(sparsemap_is_set(map, 8193));
@ -1246,7 +1246,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data)
errno = 0;
}
i += l;
/* ANSI esc code to clear line, carrage return, then print on the same line */
/* ANSI esc code to clear line, carriage return, then print on the same line */
// printf("\033[2K\r%d", i);
// printf("%d\t%d\n", l, i);
}
@ -1346,7 +1346,7 @@ test_scale_spans_come_spans_go_tear_down(void *fixture)
static MunitResult
test_scale_spans_come_spans_go(const MunitParameter params[], void *data)
{
size_t amt = 8192; // 268435456; // ~5e7 interations due to 2e9 / avg(l)
size_t amt = 8192; // 268435456; // ~5e7 iterations due to 2e9 / avg(l)
sparsemap_t *map = (sparsemap_t *)data;
(void)params;