From b0c74459ab35c0aa6572c49cf394efe6c08a3e40 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 15 May 2024 11:39:31 -0400 Subject: [PATCH] WIP --- src/sparsemap.c | 74 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/src/sparsemap.c b/src/sparsemap.c index 71b366a..0df933d 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -871,10 +871,10 @@ __sm_get_chunk_aligned_offset(size_t idx) * * @param[in] map The sparsemap_t in question. * @param[in] idx The index of the chunk to locate. - * @returns the byte offset of a __sm_chunk_t in m_data, or -1 there + * @returns the byte offset of a __sm_chunk_t in m_data, or -1 if there * are no chunks. */ -static size_t +static ssize_t __sm_get_chunk_offset(sparsemap_t *map, sparsemap_idx_t idx) { size_t count = __sm_get_chunk_count(map); @@ -962,6 +962,7 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) * * @param[in] map The map the chunk belongs too. * @param[in] offset The offset of the first bit in the chunk to be merged. + * @todo merge at the vector level not offset */ void __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t dst_start, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk, @@ -969,7 +970,7 @@ __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t ds { ssize_t delta = src_start - dst_start; for (sparsemap_idx_t j = 0; j < capacity; j++) { - sparsemap_idx_t offset = __sm_get_chunk_offset(map, src_start + j); + ssize_t offset = __sm_get_chunk_offset(map, src_start + j); if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j + delta)) { size_t position; sm_bitvec_t fill; @@ -989,6 +990,9 @@ __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t ds __sm_remove_data(map, offset, sizeof(sm_bitvec_t)); } break; + case SM_OK: + default: + break; } } } @@ -1205,7 +1209,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value) uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 }; __sm_insert_data(map, offset, &buf[0], sizeof(buf)); - size_t aligned_idx = __sm_get_chunk_aligned_offset(idx); + size_t aligned_idx = __sm_get_chunk_aligned_offset(idx); // TODO: vector or chunk alignment? if (start - aligned_idx < SM_CHUNK_MAX_CAPACITY) { __sm_chunk_t chunk; __sm_chunk_init(&chunk, p + sizeof(sm_idx_t)); @@ -1446,9 +1450,8 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) size_t i = src_starting_offset; size_t merge_end_offset = __sm_get_chunk_aligned_offset(src_ending_offset) + SM_CHUNK_MAX_CAPACITY; - while (i <= merge_end_offset) { - ssize_t src_offset = __sm_get_chunk_offset(source, i); - src = __sm_get_chunk_data(source, src_offset); + src = __sm_get_chunk_data(source, 0); + while (src_count) { sm_idx_t src_start = *(sm_idx_t *)src; __sm_chunk_t src_chunk; __sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t)); @@ -1461,57 +1464,74 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) __sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t)); size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk); + /* Try to expand the capacity if there's room before the start of the next chunk. */ + if (src_start == dst_start && dst_capacity < src_capacity) { + ssize_t nxt_offset = __sm_get_chunk_offset(destination, dst_start + dst_capacity + 1); + uint8_t *nxt_dst = __sm_get_chunk_data(destination, nxt_offset); + sm_idx_t nxt_dst_start = *(sm_idx_t *)nxt_dst; + if (nxt_dst_start > dst_start + src_capacity) { + __sm_chunk_increase_capacity(&dst_chunk, src_capacity); + dst_capacity = __sm_chunk_get_capacity(&dst_chunk); + } + } + /* Source chunk precedes next destination chunk. */ if ((src_start + src_capacity) <= dst_start) { size_t src_size = __sm_chunk_get_size(&src_chunk); - size_t offset = __sm_get_chunk_offset(destination, dst_start); + ssize_t offset = __sm_get_chunk_offset(destination, dst_start); __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size); /* Update the chunk count and data_used. */ __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } /* Source chunk follows next destination chunk. */ if (src_start >= (dst_start + dst_capacity)) { size_t src_size = __sm_chunk_get_size(&src_chunk); - __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size); + if (dst_offset == __sm_get_chunk_offset(destination, SPARSEMAP_IDX_MAX)) { + __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size); + } else { + ssize_t offset = __sm_get_chunk_offset(destination, src_start); + __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size); + } /* Update the chunk count and data_used. */ __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } - if (src_start == dst_start) { - if (dst_capacity < src_capacity) { - __sm_chunk_increase_capacity(&dst_chunk, src_capacity); - dst_capacity = __sm_chunk_get_capacity(&dst_chunk); - } + /* Source and destination and a perfect overlapping pair. */ + if (src_start == dst_start && src_capacity == dst_capacity) { __sm_merge_chunk(destination, src_start, dst_start, dst_capacity, &dst_chunk, &src_chunk); - i += dst_capacity; + i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } - if (dst_start < src_start) { - if (dst_capacity < src_capacity) { - __sm_chunk_increase_capacity(&dst_chunk, src_capacity); - } + /* Non-uniform overlapping chunks. */ + if (dst_start < src_start || (dst_start == src_start && dst_capacity != src_capacity)) { size_t src_end = src_start + src_capacity; size_t dst_end = dst_start + dst_capacity; size_t overlap = src_end > dst_end ? src_capacity - (src_end - dst_end) : src_capacity; __sm_merge_chunk(destination, src_start, dst_start, overlap, &dst_chunk, &src_chunk); - i = src_start + overlap; - for (size_t n = i; n < src_end; n++) { + for (size_t n = src_start + overlap; n <= src_end; n++) { if (sparsemap_is_set(source, n)) { sparsemap_set(destination, n, true); } - i++; } + i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } - assert(!"shouldn't be here"); // TODO... } else { - if (src_start > dst_ending_offset || dst_offset == -1) { + if (src_start >= dst_ending_offset) { /* Starting offset is after destination chunks, so append data. */ size_t src_size = __sm_chunk_get_size(&src_chunk); __sm_append_data(destination, src, sizeof(sm_idx_t) + src_size); @@ -1520,17 +1540,21 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source) __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } else { /* Source chunk precedes next destination chunk. */ size_t src_size = __sm_chunk_get_size(&src_chunk); - size_t offset = __sm_get_chunk_offset(destination, src_start); + ssize_t offset = __sm_get_chunk_offset(destination, src_start); __sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size); /* Update the chunk count and data_used. */ __sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1); i += src_capacity; + src_count--; + src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk); continue; } }