WIP; span@ 1, 76

This commit is contained in:
Gregory Burd 2024-04-09 22:43:56 -04:00
parent 8079276343
commit b6118d7294

View file

@ -426,10 +426,13 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n)
} }
/** /**
* Counts the set bits in the range [first, idx] inclusive. * Counts the set bits in the range [0, 'idx'] inclusive ignoring the first
* '*offset' bits. Modifies '*offset' decreasing it by the number of bits
* ignored during the search. The ranking (counting) will start after the
* '*offset' has been reached 0.
*/ */
static size_t static size_t
__sm_chunk_map_rank(__sm_chunk_t *map, size_t idx, size_t *after) __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx)
{ {
size_t ret = 0; size_t ret = 0;
@ -442,41 +445,42 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t idx, size_t *after)
} }
if (flags == SM_PAYLOAD_ZEROS) { if (flags == SM_PAYLOAD_ZEROS) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
if (*after > SM_BITS_PER_VECTOR) { if (*offset > SM_BITS_PER_VECTOR) {
*after = *after - SM_BITS_PER_VECTOR; *offset = *offset - SM_BITS_PER_VECTOR;
} else { } else {
idx -= SM_BITS_PER_VECTOR - *after; idx -= SM_BITS_PER_VECTOR - *offset;
*after = 0; *offset = 0;
} }
} else { } else {
return (ret); return (ret);
} }
} else if (flags == SM_PAYLOAD_ONES) { } else if (flags == SM_PAYLOAD_ONES) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
if (*after > SM_BITS_PER_VECTOR) { if (*offset > SM_BITS_PER_VECTOR) {
*after = *after - SM_BITS_PER_VECTOR; *offset = *offset - SM_BITS_PER_VECTOR;
} else { } else {
idx -= SM_BITS_PER_VECTOR - *after; idx -= SM_BITS_PER_VECTOR - *offset;
if (*after == 0) { if (*offset == 0) {
ret += SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR;
} }
*after = 0; *offset = 0;
} }
} else { } else {
return (ret + idx); return (ret + idx);
} }
} else if (flags == SM_PAYLOAD_MIXED) { } else if (flags == SM_PAYLOAD_MIXED) {
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
uint64_t after_mask = *after > 63 ? 0 : (((uint64_t)1 << *after) - 1);
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
uint64_t mask_offset = *offset > 64 ? 0 : ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset));
idx -= SM_BITS_PER_VECTOR; idx -= SM_BITS_PER_VECTOR;
ret += popcountll(w & after_mask); ret += popcountll(w & mask_offset);
*after = (*after > SM_BITS_PER_VECTOR) ? *after - SM_BITS_PER_VECTOR : 0; *offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0;
} else { } else {
/* Create a mask for the range between after and idx inclusive [*after, idx]. */ /* Create a mask for the range between offset and idx inclusive [*offset, idx]. */
uint64_t offset_mask = *offset > 64 ? 0 : (((uint64_t)1 << *offset) - 1);
uint64_t idx_mask = ((uint64_t)1 << (idx + 1)) - 1; uint64_t idx_mask = ((uint64_t)1 << (idx + 1)) - 1;
ret += popcountll(w & (idx_mask - after_mask)); ret += popcountll(w & (idx_mask - offset_mask));
*after = *after > idx ? *after - idx : 0; *offset = *offset > idx ? *offset - idx : 0;
return (ret); return (ret);
} }
} }
@ -1164,27 +1168,28 @@ sparsemap_select(sparsemap_t *map, size_t n)
} }
/** /**
* Counts the set bits in the range [first, last] inclusive. * Counts the set bits starting at 'offset' until and including 'idx', meaning
* [offset, idx] inclusive.
*/ */
size_t size_t
sparsemap_rank(sparsemap_t *map, size_t first, size_t last) sparsemap_rank(sparsemap_t *map, size_t offset, size_t idx)
{ {
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0, after = first, prev = 0, count = __sm_get_chunk_map_count(map); size_t result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, 0); uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
sm_idx_t start = *(sm_idx_t *)p; sm_idx_t start = *(sm_idx_t *)p;
if (start > last) { if (start > idx) {
return (result); return (result);
} }
after -= start - prev; offset -= start - prev;
prev = start; prev = start;
p += sizeof(sm_idx_t); p += sizeof(sm_idx_t);
__sm_chunk_t chunk; __sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p); __sm_chunk_map_init(&chunk, p);
result += __sm_chunk_map_rank(&chunk, last - start, &after); result += __sm_chunk_map_rank(&chunk, &offset, idx - start);
p += __sm_chunk_map_get_size(&chunk); p += __sm_chunk_map_get_size(&chunk);
} }
return (result); return (result);