split/merge #9

Merged
greg merged 4 commits from gburd/split-merge into main 2024-05-15 17:57:40 +00:00
Showing only changes of commit b0c74459ab - Show all commits

View file

@ -871,10 +871,10 @@ __sm_get_chunk_aligned_offset(size_t idx)
*
* @param[in] map The sparsemap_t in question.
* @param[in] idx The index of the chunk to locate.
* @returns the byte offset of a __sm_chunk_t in m_data, or -1 there
* @returns the byte offset of a __sm_chunk_t in m_data, or -1 if there
* are no chunks.
*/
static size_t
static ssize_t
__sm_get_chunk_offset(sparsemap_t *map, sparsemap_idx_t idx)
{
size_t count = __sm_get_chunk_count(map);
@ -962,6 +962,7 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
*
* @param[in] map The map the chunk belongs too.
* @param[in] offset The offset of the first bit in the chunk to be merged.
* @todo merge at the vector level not offset
*/
void
__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t dst_start, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk,
@ -969,7 +970,7 @@ __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t ds
{
ssize_t delta = src_start - dst_start;
for (sparsemap_idx_t j = 0; j < capacity; j++) {
sparsemap_idx_t offset = __sm_get_chunk_offset(map, src_start + j);
ssize_t offset = __sm_get_chunk_offset(map, src_start + j);
if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j + delta)) {
size_t position;
sm_bitvec_t fill;
@ -989,6 +990,9 @@ __sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t ds
__sm_remove_data(map, offset, sizeof(sm_bitvec_t));
}
break;
case SM_OK:
default:
break;
}
}
}
@ -1205,7 +1209,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
uint8_t buf[sizeof(sm_idx_t) + sizeof(sm_bitvec_t) * 2] = { 0 };
__sm_insert_data(map, offset, &buf[0], sizeof(buf));
size_t aligned_idx = __sm_get_chunk_aligned_offset(idx);
size_t aligned_idx = __sm_get_chunk_aligned_offset(idx); // TODO: vector or chunk alignment?
if (start - aligned_idx < SM_CHUNK_MAX_CAPACITY) {
__sm_chunk_t chunk;
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
@ -1446,9 +1450,8 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
size_t i = src_starting_offset;
size_t merge_end_offset = __sm_get_chunk_aligned_offset(src_ending_offset) + SM_CHUNK_MAX_CAPACITY;
while (i <= merge_end_offset) {
ssize_t src_offset = __sm_get_chunk_offset(source, i);
src = __sm_get_chunk_data(source, src_offset);
src = __sm_get_chunk_data(source, 0);
while (src_count) {
sm_idx_t src_start = *(sm_idx_t *)src;
__sm_chunk_t src_chunk;
__sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t));
@ -1461,57 +1464,74 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
__sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t));
size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
/* Try to expand the capacity if there's room before the start of the next chunk. */
if (src_start == dst_start && dst_capacity < src_capacity) {
ssize_t nxt_offset = __sm_get_chunk_offset(destination, dst_start + dst_capacity + 1);
uint8_t *nxt_dst = __sm_get_chunk_data(destination, nxt_offset);
sm_idx_t nxt_dst_start = *(sm_idx_t *)nxt_dst;
if (nxt_dst_start > dst_start + src_capacity) {
__sm_chunk_increase_capacity(&dst_chunk, src_capacity);
dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
}
}
/* Source chunk precedes next destination chunk. */
if ((src_start + src_capacity) <= dst_start) {
size_t src_size = __sm_chunk_get_size(&src_chunk);
size_t offset = __sm_get_chunk_offset(destination, dst_start);
ssize_t offset = __sm_get_chunk_offset(destination, dst_start);
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
/* Update the chunk count and data_used. */
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
}
/* Source chunk follows next destination chunk. */
if (src_start >= (dst_start + dst_capacity)) {
size_t src_size = __sm_chunk_get_size(&src_chunk);
if (dst_offset == __sm_get_chunk_offset(destination, SPARSEMAP_IDX_MAX)) {
__sm_append_data(destination, src, sizeof(sm_idx_t) + src_size);
} else {
ssize_t offset = __sm_get_chunk_offset(destination, src_start);
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
}
/* Update the chunk count and data_used. */
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
}
if (src_start == dst_start) {
if (dst_capacity < src_capacity) {
__sm_chunk_increase_capacity(&dst_chunk, src_capacity);
dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
}
/* Source and destination and a perfect overlapping pair. */
if (src_start == dst_start && src_capacity == dst_capacity) {
__sm_merge_chunk(destination, src_start, dst_start, dst_capacity, &dst_chunk, &src_chunk);
i += dst_capacity;
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
}
if (dst_start < src_start) {
if (dst_capacity < src_capacity) {
__sm_chunk_increase_capacity(&dst_chunk, src_capacity);
}
/* Non-uniform overlapping chunks. */
if (dst_start < src_start || (dst_start == src_start && dst_capacity != src_capacity)) {
size_t src_end = src_start + src_capacity;
size_t dst_end = dst_start + dst_capacity;
size_t overlap = src_end > dst_end ? src_capacity - (src_end - dst_end) : src_capacity;
__sm_merge_chunk(destination, src_start, dst_start, overlap, &dst_chunk, &src_chunk);
i = src_start + overlap;
for (size_t n = i; n < src_end; n++) {
for (size_t n = src_start + overlap; n <= src_end; n++) {
if (sparsemap_is_set(source, n)) {
sparsemap_set(destination, n, true);
}
i++;
}
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
}
assert(!"shouldn't be here"); // TODO...
} else {
if (src_start > dst_ending_offset || dst_offset == -1) {
if (src_start >= dst_ending_offset) {
/* Starting offset is after destination chunks, so append data. */
size_t src_size = __sm_chunk_get_size(&src_chunk);
__sm_append_data(destination, src, sizeof(sm_idx_t) + src_size);
@ -1520,17 +1540,21 @@ sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
} else {
/* Source chunk precedes next destination chunk. */
size_t src_size = __sm_chunk_get_size(&src_chunk);
size_t offset = __sm_get_chunk_offset(destination, src_start);
ssize_t offset = __sm_get_chunk_offset(destination, src_start);
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
/* Update the chunk count and data_used. */
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
i += src_capacity;
src_count--;
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
continue;
}
}