From 79a91f0864e0f27a592aae8cde73a25220ca2e19 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Tue, 14 May 2024 22:30:02 -0400 Subject: [PATCH] WIP --- src/sparsemap.c | 129 ++++++------------------------------------------ 1 file changed, 15 insertions(+), 114 deletions(-) diff --git a/src/sparsemap.c b/src/sparsemap.c index bc2ae1a..71b366a 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -960,34 +960,24 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) /** @brief Merges into the chunk at \b offset all set bits from \b src. * - * @param[in] chunk The chunk in question. + * @param[in] map The map the chunk belongs too. * @param[in] offset The offset of the first bit in the chunk to be merged. */ -#if 0 void -__sm_merge_chunk(sparsemap_t *chunk, sparsemap_idx_t offset, __sm_chunk_t src) -{ - size_t capacity = __sm_chunk_get_capacity(&src); - for (sparsemap_idx_t j = 0; j < capacity; j++, offset++) { - if (__sm_chunk_is_set(&src, j)) { - sparsemap_set(chunk, offset, true); - } - } -} -#endif -void -__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t idx, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, __sm_chunk_t *src_chunk) +__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t dst_start, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, + __sm_chunk_t *src_chunk) { + ssize_t delta = src_start - dst_start; for (sparsemap_idx_t j = 0; j < capacity; j++) { - sparsemap_idx_t offset = __sm_get_chunk_offset(map, idx + j); - if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j)) { + sparsemap_idx_t offset = __sm_get_chunk_offset(map, src_start + j); + if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j + delta)) { size_t position; sm_bitvec_t fill; - switch (__sm_chunk_set(dst_chunk, j, true, &position, &fill, false)) { + switch (__sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, false)) { case SM_NEEDS_TO_GROW: offset += sizeof(sm_idx_t) + position * sizeof(sm_bitvec_t); __sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(sm_bitvec_t)); - __sm_chunk_set(dst_chunk, j, true, &position, &fill, true); + __sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, true); break; case SM_NEEDS_TO_SHRINK: if (__sm_chunk_is_empty(src_chunk)) { @@ -1497,7 +1487,7 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) __sm_chunk_increase_capacity(&dst_chunk, src_capacity); dst_capacity = __sm_chunk_get_capacity(&dst_chunk); } - __sm_merge_chunk(destination, src_start, dst_capacity, &dst_chunk, &src_chunk); + __sm_merge_chunk(destination, src_start, dst_start, dst_capacity, &dst_chunk, &src_chunk); i += dst_capacity; continue; } @@ -1507,14 +1497,16 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) __sm_chunk_increase_capacity(&dst_chunk, src_capacity); } size_t src_end = src_start + src_capacity; - sparsemap_idx_t n = src_start; - while (n < src_end) { + size_t dst_end = dst_start + dst_capacity; + size_t overlap = src_end > dst_end ? src_capacity - (src_end - dst_end) : src_capacity; + __sm_merge_chunk(destination, src_start, dst_start, overlap, &dst_chunk, &src_chunk); + i = src_start + overlap; + for (size_t n = i; n < src_end; n++) { if (sparsemap_is_set(source, n)) { sparsemap_set(destination, n, true); } - n++; + i++; } - i = src_end; continue; } assert(!"shouldn't be here"); // TODO... @@ -1546,97 +1538,6 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) return 0; } -int -__sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) -{ - uint8_t *src, *dst; - size_t src_count = __sm_get_chunk_count(source); - size_t dst_count = __sm_get_chunk_count(destination); - size_t max_chunk_count = src_count + dst_count; - ssize_t remaining_capacity = destination->m_capacity - destination->m_data_used - - (source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2)); - - /* Estimate worst-case overhead required for merge. */ - if (remaining_capacity <= 0) { - errno = ENOSPC; - return -remaining_capacity; - } - - src = __sm_get_chunk_data(source, 0); - dst = __sm_get_chunk_data(destination, 0); - for (size_t i = 0; i < max_chunk_count && src_count; i++) { - sm_idx_t src_start = *(sm_idx_t *)src; - sm_idx_t dst_start = *(sm_idx_t *)dst; - __sm_chunk_t src_chunk; - __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); - __sm_chunk_t dst_chunk; - __sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t)); - size_t src_capacity = __sm_chunk_get_capacity(&src_chunk); - size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk); - /* Chunks do not overlap. */ - if (dst_count == 0) { - /* No destination chunks, so append data. */ - size_t src_size = __sm_chunk_get_size(&src_chunk); - __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size); - - /* Update the chunk count and data_used. */ - __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); - src_count--; - src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); - } else if (src_start >= (dst_start + dst_capacity) || dst_start >= (src_start + src_capacity)) { - if (src_start < dst_start) { - /* Source chunk precedes next destination chunk. */ - size_t src_size = __sm_chunk_get_size(&src_chunk); - size_t offset = __sm_get_chunk_offset(destination, dst_start); - __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size); - - /* Update the chunk count and data_used. */ - __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); - src_count--; - src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); - } else { - /* Source chunk follows destination chunk. */ - dst_count--; - dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); - continue; - } - } else { - /* Chunks overlap to some degree, determine the overlapping range. */ - if (dst_capacity < src_capacity) { - __sm_chunk_increase_capacity(&dst_chunk, src_capacity); - dst_capacity = __sm_chunk_get_capacity(&dst_chunk); - } - sparsemap_idx_t src_end = src_start + src_capacity; - sparsemap_idx_t dst_end = dst_start + dst_capacity; - sparsemap_idx_t overlap_start = (src_start > dst_start) ? src_start : dst_start; - sparsemap_idx_t overlap_end = (src_end < dst_end) ? src_end : dst_end; - size_t overlap_length = overlap_end - overlap_start; - if (overlap_length == src_capacity) { - /* 100% overlap */ - __sm_merge_chunk(destination, src_start, overlap_length, &dst_chunk, &src_chunk); - src_count--; - dst_count--; - src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); - dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); - } else { - /* Potentially three sections to merge. */ - if (src_start < overlap_start) { - __sm_merge_chunk(destination, src_start, overlap_length - src_start, &dst_chunk, &src_chunk); - } - __sm_merge_chunk(destination, overlap_start, overlap_end - overlap_start, &dst_chunk, &src_chunk); - if (overlap_end < dst_end) { - __sm_merge_chunk(destination, overlap_end, dst_end - overlap_end, &dst_chunk, &src_chunk); - } - src_count--; - dst_count--; - src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); - dst += sizeof(sm_idx_t) + __sm_chunk_get_size(&dst_chunk); - } - } - } - return 0; -} - sparsemap_idx_t sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other) {