WIP
This commit is contained in:
parent
599284d5f5
commit
8ed31ad904
2 changed files with 90 additions and 23 deletions
100
src/sparsemap.c
100
src/sparsemap.c
|
@ -475,11 +475,24 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void printBits(char *name, uint64_t value) {
|
||||||
|
if (name) {
|
||||||
|
printf("%s\t", name);
|
||||||
|
}
|
||||||
|
for (int i = 63; i >= 0; i--) {
|
||||||
|
printf("%ld", (value >> i) & 1);
|
||||||
|
if (i % 8 == 0) {
|
||||||
|
printf(" "); // Add space for better readability
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Counts the set bits in the range [0, 'idx'] inclusive ignoring the first
|
* Counts the set bits in the range [0, 'idx'] inclusive ignoring the first
|
||||||
* '*offset' bits. Modifies '*offset' decreasing it by the number of bits
|
* '*offset' bits in this chunk. Modifies '*offset' decreasing it by the number
|
||||||
* ignored during the search. The ranking (counting) will start after the
|
* of bits ignored during the search. The ranking (counting) will start after
|
||||||
* '*offset' has been reached 0.
|
* the '*offset' has been reached 0.
|
||||||
*/
|
*/
|
||||||
static size_t
|
static size_t
|
||||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *vec, bool value)
|
__sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *vec, bool value)
|
||||||
|
@ -509,7 +522,11 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
||||||
} else {
|
} else {
|
||||||
*vec = 0;
|
*vec = 0;
|
||||||
if (value == false) {
|
if (value == false) {
|
||||||
return ret + idx;
|
if (*offset > idx) {
|
||||||
|
*offset = *offset - idx;
|
||||||
|
} else {
|
||||||
|
return ret + idx - *offset;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -537,27 +554,45 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
||||||
}
|
}
|
||||||
} else if (flags == SM_PAYLOAD_MIXED) {
|
} else if (flags == SM_PAYLOAD_MIXED) {
|
||||||
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
||||||
if (idx > SM_BITS_PER_VECTOR) {
|
if (idx >= SM_BITS_PER_VECTOR) {
|
||||||
uint64_t mask = ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset));
|
uint64_t mask = *offset > 0 ? ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset)) : UINT64_MAX;
|
||||||
idx -= SM_BITS_PER_VECTOR;
|
idx -= SM_BITS_PER_VECTOR;
|
||||||
size_t pc = popcountll(w & mask);
|
size_t pc = popcountll(w & mask);
|
||||||
if (value == true) {
|
if (value == true) {
|
||||||
ret += pc;
|
ret += pc;
|
||||||
} else {
|
} else {
|
||||||
ret += popcountll(mask) - pc;
|
ret += SM_BITS_PER_VECTOR - pc;
|
||||||
}
|
}
|
||||||
*offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0;
|
*offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0;
|
||||||
} else {
|
} else {
|
||||||
/* Create a mask for the range between offset and idx inclusive [*offset, idx]. */
|
sm_bitvec_t mw;
|
||||||
uint64_t offset_mask = (((uint64_t)1 << *offset) - 1);
|
uint64_t mask;
|
||||||
uint64_t idx_mask = idx >= 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
uint64_t idx_mask = idx == 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
||||||
uint64_t mask = (idx_mask - offset_mask);
|
uint64_t offset_mask = *offset == 0 ? 0 : UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset);
|
||||||
sm_bitvec_t mw = w & mask;
|
|
||||||
size_t pc = popcountll(mw);
|
|
||||||
if (value == true) {
|
if (value == true) {
|
||||||
ret += pc;
|
/* To count the set bits we need to mask off the portion of the vector that we need
|
||||||
|
to count then call popcount(). So, let's create a mask for the range between
|
||||||
|
offset and idx inclusive [*offset, idx]. */
|
||||||
|
mask = idx_mask - offset_mask;
|
||||||
|
mw = w & mask;
|
||||||
|
ret += popcountll(mw);
|
||||||
} else {
|
} else {
|
||||||
ret += popcountll(mask) - pc;
|
/* To count the unset bits in this partial vector we need to use the idx_mask but ensure
|
||||||
|
that the offset bits are also set. Then popcount(). Then we subtract the count of set
|
||||||
|
bits found after masking from the possible number of bits that we examined. This should
|
||||||
|
have inverted the popcount() and counted the unset bits in the range [*offset, idx]. */
|
||||||
|
mask = idx_mask | offset_mask;
|
||||||
|
mw = w & mask;
|
||||||
|
size_t pc = popcountll(mw);
|
||||||
|
#if 0
|
||||||
|
printf("---------------------\n");
|
||||||
|
printBits("om", offset_mask);
|
||||||
|
printBits("im", idx_mask);
|
||||||
|
printBits("m", mask);
|
||||||
|
printBits("mw", mw);
|
||||||
|
printf("pc: %lu\tidx:%lu\t*o:%lu\n", pc, idx, *offset);
|
||||||
|
#endif
|
||||||
|
ret += idx + 1 - pc; /* We accounted for offset in our masking above. */
|
||||||
}
|
}
|
||||||
*offset = *offset > idx ? *offset - idx : 0;
|
*offset = *offset > idx ? *offset - idx : 0;
|
||||||
*vec = mw;
|
*vec = mw;
|
||||||
|
@ -817,6 +852,9 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
|
||||||
void
|
void
|
||||||
sparsemap_clear(sparsemap_t *map)
|
sparsemap_clear(sparsemap_t *map)
|
||||||
{
|
{
|
||||||
|
if (map == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
memset(map->m_data, 0, map->m_capacity);
|
memset(map->m_data, 0, map->m_capacity);
|
||||||
map->m_data_used = SM_SIZEOF_OVERHEAD;
|
map->m_data_used = SM_SIZEOF_OVERHEAD;
|
||||||
__sm_set_chunk_map_count(map, 0);
|
__sm_set_chunk_map_count(map, 0);
|
||||||
|
@ -1299,34 +1337,56 @@ size_t
|
||||||
sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec)
|
sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec)
|
||||||
{
|
{
|
||||||
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
||||||
size_t result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
size_t gap, amt = 0, result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
||||||
uint8_t *p = __sm_get_chunk_map_data(map, 0);
|
uint8_t *p = __sm_get_chunk_map_data(map, 0);
|
||||||
|
|
||||||
/* The count/rank of zero bits in an empty map is inf, so what you requested is the answer. */
|
/* The count/rank of unset bits in an empty map is inf, so what you requested is the answer. */
|
||||||
if (count == 0 && value == false) {
|
if (count == 0 && value == false) {
|
||||||
return y - x + 1;
|
return y - x + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
sm_idx_t start = *(sm_idx_t *)p;
|
sm_idx_t start = *(sm_idx_t *)p;
|
||||||
|
gap = start - (prev == 0 ? start : prev);
|
||||||
/* Start of this chunk is greater than the end of the desired range. */
|
/* Start of this chunk is greater than the end of the desired range. */
|
||||||
if (start > y) {
|
if (start > y) {
|
||||||
if (value == true) {
|
if (value == true) {
|
||||||
return result;
|
return result;
|
||||||
} else {
|
} else {
|
||||||
/* This chunk starts after our range [x, y]. */
|
/* This chunk starts after our range [x, y]. */
|
||||||
return y - x + 1;
|
return result + gap + (y - x) + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* The range and this chunk overlap. */
|
||||||
|
if (value == false) {
|
||||||
|
result += start - x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
x -= start - prev;
|
x -= gap;
|
||||||
|
if (value == false) {
|
||||||
|
result += gap;
|
||||||
|
}
|
||||||
prev = start;
|
prev = start;
|
||||||
p += sizeof(sm_idx_t);
|
p += sizeof(sm_idx_t);
|
||||||
__sm_chunk_t chunk;
|
__sm_chunk_t chunk;
|
||||||
__sm_chunk_map_init(&chunk, p);
|
__sm_chunk_map_init(&chunk, p);
|
||||||
|
|
||||||
result += __sm_chunk_map_rank(&chunk, &x, y - start, vec, value);
|
/* Ensure that x, the offset, isn't beyond the start of this chunk. */
|
||||||
|
//if (x > y - start) {
|
||||||
|
// amt = value ? 0 : y - start + 1;
|
||||||
|
//} else {
|
||||||
|
/* Count all the set/unset inside this chunk. */
|
||||||
|
amt = __sm_chunk_map_rank(&chunk, &x, y - start, vec, value);
|
||||||
|
//}
|
||||||
|
result += amt;
|
||||||
p += __sm_chunk_map_get_size(&chunk);
|
p += __sm_chunk_map_get_size(&chunk);
|
||||||
}
|
}
|
||||||
|
/* Count/rank the unset bits that fall outside the last chunk but within the range. */
|
||||||
|
if (value == false) {
|
||||||
|
if (y > prev + amt) {
|
||||||
|
result += y - (prev + amt);
|
||||||
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
13
tests/test.c
13
tests/test.c
|
@ -820,11 +820,18 @@ test_api_rank_false(const MunitParameter params[], void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// one chunk means not so empty now!
|
// one chunk means not so empty now!
|
||||||
sparsemap_set(map, 4999, true);
|
sparsemap_idx_t hole = 4999;
|
||||||
|
sparsemap_set(map, hole, true);
|
||||||
for (int i = 0; i < 10000; i++) {
|
for (int i = 0; i < 10000; i++) {
|
||||||
for (int j = i; j < 10000; j++) {
|
for (int j = i; j < 10000; j++) {
|
||||||
int amt = j - i + 1 - ((4999 > i && 4999 < j) ? 1 : 0);
|
int amt = j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
|
||||||
r = sparsemap_rank(map, i, j, false); // GSB
|
r = sparsemap_rank(map, i, j, false);
|
||||||
|
#if 1
|
||||||
|
if (r != amt) {
|
||||||
|
printf("\033[2K\r");
|
||||||
|
printf("%d\t%d\t--\t%d\t%d", i, j, amt, r);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
assert_true(r == amt);
|
assert_true(r == amt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue