WIP
This commit is contained in:
parent
599284d5f5
commit
8ed31ad904
2 changed files with 90 additions and 23 deletions
100
src/sparsemap.c
100
src/sparsemap.c
|
@ -475,11 +475,24 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void printBits(char *name, uint64_t value) {
|
||||
if (name) {
|
||||
printf("%s\t", name);
|
||||
}
|
||||
for (int i = 63; i >= 0; i--) {
|
||||
printf("%ld", (value >> i) & 1);
|
||||
if (i % 8 == 0) {
|
||||
printf(" "); // Add space for better readability
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the set bits in the range [0, 'idx'] inclusive ignoring the first
|
||||
* '*offset' bits. Modifies '*offset' decreasing it by the number of bits
|
||||
* ignored during the search. The ranking (counting) will start after the
|
||||
* '*offset' has been reached 0.
|
||||
* '*offset' bits in this chunk. Modifies '*offset' decreasing it by the number
|
||||
* of bits ignored during the search. The ranking (counting) will start after
|
||||
* the '*offset' has been reached 0.
|
||||
*/
|
||||
static size_t
|
||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *vec, bool value)
|
||||
|
@ -509,7 +522,11 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
} else {
|
||||
*vec = 0;
|
||||
if (value == false) {
|
||||
return ret + idx;
|
||||
if (*offset > idx) {
|
||||
*offset = *offset - idx;
|
||||
} else {
|
||||
return ret + idx - *offset;
|
||||
}
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
|
@ -537,27 +554,45 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
}
|
||||
} else if (flags == SM_PAYLOAD_MIXED) {
|
||||
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
||||
if (idx > SM_BITS_PER_VECTOR) {
|
||||
uint64_t mask = ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset));
|
||||
if (idx >= SM_BITS_PER_VECTOR) {
|
||||
uint64_t mask = *offset > 0 ? ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset)) : UINT64_MAX;
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
size_t pc = popcountll(w & mask);
|
||||
if (value == true) {
|
||||
ret += pc;
|
||||
} else {
|
||||
ret += popcountll(mask) - pc;
|
||||
ret += SM_BITS_PER_VECTOR - pc;
|
||||
}
|
||||
*offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0;
|
||||
} else {
|
||||
/* Create a mask for the range between offset and idx inclusive [*offset, idx]. */
|
||||
uint64_t offset_mask = (((uint64_t)1 << *offset) - 1);
|
||||
uint64_t idx_mask = idx >= 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
||||
uint64_t mask = (idx_mask - offset_mask);
|
||||
sm_bitvec_t mw = w & mask;
|
||||
size_t pc = popcountll(mw);
|
||||
sm_bitvec_t mw;
|
||||
uint64_t mask;
|
||||
uint64_t idx_mask = idx == 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
||||
uint64_t offset_mask = *offset == 0 ? 0 : UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset);
|
||||
if (value == true) {
|
||||
ret += pc;
|
||||
/* To count the set bits we need to mask off the portion of the vector that we need
|
||||
to count then call popcount(). So, let's create a mask for the range between
|
||||
offset and idx inclusive [*offset, idx]. */
|
||||
mask = idx_mask - offset_mask;
|
||||
mw = w & mask;
|
||||
ret += popcountll(mw);
|
||||
} else {
|
||||
ret += popcountll(mask) - pc;
|
||||
/* To count the unset bits in this partial vector we need to use the idx_mask but ensure
|
||||
that the offset bits are also set. Then popcount(). Then we subtract the count of set
|
||||
bits found after masking from the possible number of bits that we examined. This should
|
||||
have inverted the popcount() and counted the unset bits in the range [*offset, idx]. */
|
||||
mask = idx_mask | offset_mask;
|
||||
mw = w & mask;
|
||||
size_t pc = popcountll(mw);
|
||||
#if 0
|
||||
printf("---------------------\n");
|
||||
printBits("om", offset_mask);
|
||||
printBits("im", idx_mask);
|
||||
printBits("m", mask);
|
||||
printBits("mw", mw);
|
||||
printf("pc: %lu\tidx:%lu\t*o:%lu\n", pc, idx, *offset);
|
||||
#endif
|
||||
ret += idx + 1 - pc; /* We accounted for offset in our masking above. */
|
||||
}
|
||||
*offset = *offset > idx ? *offset - idx : 0;
|
||||
*vec = mw;
|
||||
|
@ -817,6 +852,9 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
|
|||
void
|
||||
sparsemap_clear(sparsemap_t *map)
|
||||
{
|
||||
if (map == NULL) {
|
||||
return;
|
||||
}
|
||||
memset(map->m_data, 0, map->m_capacity);
|
||||
map->m_data_used = SM_SIZEOF_OVERHEAD;
|
||||
__sm_set_chunk_map_count(map, 0);
|
||||
|
@ -1299,34 +1337,56 @@ size_t
|
|||
sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec)
|
||||
{
|
||||
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
||||
size_t result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
||||
size_t gap, amt = 0, result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
||||
uint8_t *p = __sm_get_chunk_map_data(map, 0);
|
||||
|
||||
/* The count/rank of zero bits in an empty map is inf, so what you requested is the answer. */
|
||||
/* The count/rank of unset bits in an empty map is inf, so what you requested is the answer. */
|
||||
if (count == 0 && value == false) {
|
||||
return y - x + 1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sm_idx_t start = *(sm_idx_t *)p;
|
||||
gap = start - (prev == 0 ? start : prev);
|
||||
/* Start of this chunk is greater than the end of the desired range. */
|
||||
if (start > y) {
|
||||
if (value == true) {
|
||||
return result;
|
||||
} else {
|
||||
/* This chunk starts after our range [x, y]. */
|
||||
return y - x + 1;
|
||||
return result + gap + (y - x) + 1;
|
||||
}
|
||||
} else {
|
||||
/* The range and this chunk overlap. */
|
||||
if (value == false) {
|
||||
result += start - x;
|
||||
}
|
||||
}
|
||||
x -= start - prev;
|
||||
x -= gap;
|
||||
if (value == false) {
|
||||
result += gap;
|
||||
}
|
||||
prev = start;
|
||||
p += sizeof(sm_idx_t);
|
||||
__sm_chunk_t chunk;
|
||||
__sm_chunk_map_init(&chunk, p);
|
||||
|
||||
result += __sm_chunk_map_rank(&chunk, &x, y - start, vec, value);
|
||||
/* Ensure that x, the offset, isn't beyond the start of this chunk. */
|
||||
//if (x > y - start) {
|
||||
// amt = value ? 0 : y - start + 1;
|
||||
//} else {
|
||||
/* Count all the set/unset inside this chunk. */
|
||||
amt = __sm_chunk_map_rank(&chunk, &x, y - start, vec, value);
|
||||
//}
|
||||
result += amt;
|
||||
p += __sm_chunk_map_get_size(&chunk);
|
||||
}
|
||||
/* Count/rank the unset bits that fall outside the last chunk but within the range. */
|
||||
if (value == false) {
|
||||
if (y > prev + amt) {
|
||||
result += y - (prev + amt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
13
tests/test.c
13
tests/test.c
|
@ -820,11 +820,18 @@ test_api_rank_false(const MunitParameter params[], void *data)
|
|||
}
|
||||
|
||||
// one chunk means not so empty now!
|
||||
sparsemap_set(map, 4999, true);
|
||||
sparsemap_idx_t hole = 4999;
|
||||
sparsemap_set(map, hole, true);
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
for (int j = i; j < 10000; j++) {
|
||||
int amt = j - i + 1 - ((4999 > i && 4999 < j) ? 1 : 0);
|
||||
r = sparsemap_rank(map, i, j, false); // GSB
|
||||
int amt = j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
|
||||
r = sparsemap_rank(map, i, j, false);
|
||||
#if 1
|
||||
if (r != amt) {
|
||||
printf("\033[2K\r");
|
||||
printf("%d\t%d\t--\t%d\t%d", i, j, amt, r);
|
||||
}
|
||||
#endif
|
||||
assert_true(r == amt);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue