WIP: rank false 7041/7040
This commit is contained in:
parent
6e65bda211
commit
4cfecc045e
3 changed files with 43 additions and 45 deletions
|
@ -60,5 +60,6 @@ to an uncompressed bit vector (sometimes higher due to the bytes required for
|
|||
metadata). In such cases, other compression schemes are more efficient (i.e.
|
||||
http://lemire.me/blog/archives/2008/08/20/the-mythical-bitmap-index/).
|
||||
|
||||
This library was originally created for hamsterdb [http://hamsterdb.com] in
|
||||
C++ and then translated to C99 code by Greg Burd <greg@burd.me>.
|
||||
This library was originally created for [hamsterdb](http://hamsterdb.com) in
|
||||
C++ and then translated to C and further improved by Greg Burd <greg@burd.me>
|
||||
for use in LMDB and OpenLDAP.
|
||||
|
|
|
@ -484,10 +484,12 @@ extern void print_bits(char *name, uint64_t value); // GSB
|
|||
* the '*offset' has been reached 0.
|
||||
*/
|
||||
static size_t
|
||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *vec, bool value)
|
||||
__sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, size_t *pos, sm_bitvec_t *vec, bool value)
|
||||
{
|
||||
size_t ret = 0;
|
||||
|
||||
*pos = 0;
|
||||
|
||||
register uint8_t *p = (uint8_t *)map->m_data;
|
||||
for (size_t i = 0; i < sizeof(sm_bitvec_t); i++, p++) {
|
||||
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
||||
|
@ -498,6 +500,7 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
if (flags == SM_PAYLOAD_ZEROS) {
|
||||
*vec = 0;
|
||||
if (idx >= SM_BITS_PER_VECTOR) {
|
||||
*pos += SM_BITS_PER_VECTOR;
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
if (*offset > SM_BITS_PER_VECTOR) {
|
||||
*offset = *offset - SM_BITS_PER_VECTOR;
|
||||
|
@ -508,6 +511,7 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
*offset = 0;
|
||||
}
|
||||
} else {
|
||||
*pos += idx;
|
||||
if (value == false) {
|
||||
if (*offset > idx) {
|
||||
*offset = *offset - idx;
|
||||
|
@ -523,6 +527,7 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
} else if (flags == SM_PAYLOAD_ONES) {
|
||||
*vec = UINT64_MAX;
|
||||
if (idx >= SM_BITS_PER_VECTOR) {
|
||||
*pos += SM_BITS_PER_VECTOR;
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
if (*offset > SM_BITS_PER_VECTOR) {
|
||||
*offset = *offset - SM_BITS_PER_VECTOR;
|
||||
|
@ -533,6 +538,7 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
*offset = 0;
|
||||
}
|
||||
} else {
|
||||
*pos += idx;
|
||||
if (value == true) {
|
||||
if (*offset > idx) {
|
||||
*offset = *offset - idx;
|
||||
|
@ -548,45 +554,35 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *
|
|||
} else if (flags == SM_PAYLOAD_MIXED) {
|
||||
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
||||
if (idx >= SM_BITS_PER_VECTOR) {
|
||||
*pos += SM_BITS_PER_VECTOR;
|
||||
idx -= SM_BITS_PER_VECTOR;
|
||||
uint64_t mask = *offset == 0 ? UINT64_MAX : ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - (*offset >= 64 ? 64 : *offset)));
|
||||
size_t pc = popcountll(w & mask);
|
||||
sm_bitvec_t mw;
|
||||
if (value == true) {
|
||||
ret += pc;
|
||||
mw = w & mask;
|
||||
} else {
|
||||
ret += SM_BITS_PER_VECTOR - pc;
|
||||
mw = ~w & mask;
|
||||
}
|
||||
size_t pc = popcountll(mw);
|
||||
ret += pc;
|
||||
*offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0;
|
||||
} else {
|
||||
*pos += idx;
|
||||
sm_bitvec_t mw;
|
||||
uint64_t mask;
|
||||
uint64_t idx_mask = idx == 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
||||
uint64_t offset_mask = *offset == 0 ? 0 : UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset);
|
||||
uint64_t idx_mask = (idx == 63) ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1;
|
||||
uint64_t offset_mask = *offset == 0 ? UINT64_MAX : ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - (*offset >= 64 ? 64 : *offset)));
|
||||
/* To count the set bits we need to mask off the portion of the vector that we need
|
||||
to count then call popcount(). So, let's create a mask for the range between
|
||||
offset and idx inclusive [*offset, idx]. */
|
||||
mask = idx_mask & offset_mask;
|
||||
if (value == true) {
|
||||
/* To count the set bits we need to mask off the portion of the vector that we need
|
||||
to count then call popcount(). So, let's create a mask for the range between
|
||||
offset and idx inclusive [*offset, idx]. */
|
||||
mask = idx_mask - offset_mask;
|
||||
mw = w & mask;
|
||||
ret += popcountll(mw);
|
||||
} else {
|
||||
/* To count the unset bits in this partial vector we need to use the idx_mask but ensure
|
||||
that the offset bits are also set. Then popcount(). Then we subtract the count of set
|
||||
bits found after masking from the possible number of bits that we examined. This should
|
||||
have inverted the popcount() and counted the unset bits in the range [*offset, idx]. */
|
||||
mask = idx_mask | (offset_mask > idx_mask ? idx_mask : offset_mask);
|
||||
mw = w & mask;
|
||||
size_t pc = popcountll(mw);
|
||||
#if 0 // GSB
|
||||
printf("---------------------\n");
|
||||
print_bits("om", offset_mask);
|
||||
print_bits("im", idx_mask);
|
||||
print_bits("m", mask);
|
||||
print_bits("mw", mw);
|
||||
printf("pc: %lu\tidx:%lu\t*o:%lu\n", pc, idx, *offset);
|
||||
#endif
|
||||
ret += idx + 1 - pc; /* We accounted for offset in our masking above. */
|
||||
mw = ~w & mask;
|
||||
}
|
||||
int pc = popcountll(mw);
|
||||
ret += pc;
|
||||
*offset = *offset > idx ? *offset - idx + 1 : 0;
|
||||
*vec = mw;
|
||||
(*vec) <<= *offset;
|
||||
|
@ -1330,7 +1326,7 @@ size_t
|
|||
sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec)
|
||||
{
|
||||
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
||||
size_t amt = 0, result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
||||
size_t amt = 0, gap, pos = 0, result = 0, prev = 0, count = __sm_get_chunk_map_count(map);
|
||||
uint8_t *p = __sm_get_chunk_map_data(map, 0);
|
||||
|
||||
if (count == 0) {
|
||||
|
@ -1342,6 +1338,8 @@ sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t
|
|||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sm_idx_t start = *(sm_idx_t *)p;
|
||||
gap = start - (prev == 0 ? start : prev);
|
||||
(void)gap; // TODO... necessary?
|
||||
/* Start of this chunk is greater than the end of the desired range. */
|
||||
if (start > y) {
|
||||
if (value == true) {
|
||||
|
@ -1353,10 +1351,10 @@ sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t
|
|||
} else {
|
||||
/* The range and this chunk overlap. */
|
||||
if (value == false) {
|
||||
result += start - x;
|
||||
if (x > start) {
|
||||
x -= start;
|
||||
} else {
|
||||
result += start - x;
|
||||
x = 0;
|
||||
}
|
||||
}
|
||||
|
@ -1367,14 +1365,16 @@ sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t
|
|||
__sm_chunk_map_init(&chunk, p);
|
||||
|
||||
/* Count all the set/unset inside this chunk. */
|
||||
amt = __sm_chunk_map_rank(&chunk, &x, y - start, vec, value);
|
||||
amt = __sm_chunk_map_rank(&chunk, &x, y - start, &pos, vec, value);
|
||||
result += amt;
|
||||
p += __sm_chunk_map_get_size(&chunk);
|
||||
}
|
||||
/* Count/rank the unset bits that fall outside the last chunk but within the range. */
|
||||
/* Count any additional unset bits that fall outside the last chunk but
|
||||
within the range. */
|
||||
if (value == false) {
|
||||
if (y > prev + amt) {
|
||||
result += y - (prev + amt);
|
||||
size_t last = prev + pos - 1;
|
||||
if (y > last) {
|
||||
result += y - last;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
17
tests/test.c
17
tests/test.c
|
@ -777,12 +777,6 @@ test_api_rank_true(const MunitParameter params[], void *data)
|
|||
for (int j = i; j < 10000; j++) {
|
||||
int amt = j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
|
||||
int r = sparsemap_rank(map, i, j, true);
|
||||
#ifdef DEBUG
|
||||
if (r != amt) {
|
||||
printf("\033[2K\r");
|
||||
printf("%d\t%d\t--\t%d\t%d", i, j, amt, r);
|
||||
}
|
||||
#endif
|
||||
assert_true(r == amt);
|
||||
}
|
||||
}
|
||||
|
@ -829,10 +823,13 @@ test_api_rank_false(const MunitParameter params[], void *data)
|
|||
// one chunk means not so empty now!
|
||||
sparsemap_idx_t hole = 4999;
|
||||
sparsemap_set(map, hole, true);
|
||||
// for (int i = 0; i < 10000; i++) {
|
||||
// for (int j = i; j < 10000; j++) {
|
||||
for (int i = 5000; i < 10000; i++) {
|
||||
for (int j = 5000; j < 10000; j++) {
|
||||
#if 1
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
for (int j = i; j < 10000; j++) {
|
||||
#else
|
||||
for (int i = 7041; i < 10000; i++) {
|
||||
for (int j = 7040; j < 10000; j++) {
|
||||
#endif
|
||||
int amt = j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0);
|
||||
r = sparsemap_rank(map, i, j, false);
|
||||
#ifdef DEBUG
|
||||
|
|
Loading…
Reference in a new issue