From 8ed31ad904ff158ecda447fae104a66f024e0c5a Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 22 Apr 2024 21:10:19 -0400 Subject: [PATCH] WIP --- src/sparsemap.c | 100 ++++++++++++++++++++++++++++++++++++++---------- tests/test.c | 13 +++++-- 2 files changed, 90 insertions(+), 23 deletions(-) diff --git a/src/sparsemap.c b/src/sparsemap.c index 2f7e5ef..05d1296 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -475,11 +475,24 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) return ret; } +void printBits(char *name, uint64_t value) { + if (name) { + printf("%s\t", name); + } + for (int i = 63; i >= 0; i--) { + printf("%ld", (value >> i) & 1); + if (i % 8 == 0) { + printf(" "); // Add space for better readability + } + } + printf("\n"); +} + /** * Counts the set bits in the range [0, 'idx'] inclusive ignoring the first - * '*offset' bits. Modifies '*offset' decreasing it by the number of bits - * ignored during the search. The ranking (counting) will start after the - * '*offset' has been reached 0. + * '*offset' bits in this chunk. Modifies '*offset' decreasing it by the number + * of bits ignored during the search. The ranking (counting) will start after + * the '*offset' has been reached 0. */ static size_t __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t *vec, bool value) @@ -509,7 +522,11 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t * } else { *vec = 0; if (value == false) { - return ret + idx; + if (*offset > idx) { + *offset = *offset - idx; + } else { + return ret + idx - *offset; + } } else { return ret; } @@ -537,27 +554,45 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t *offset, size_t idx, sm_bitvec_t * } } else if (flags == SM_PAYLOAD_MIXED) { sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; - if (idx > SM_BITS_PER_VECTOR) { - uint64_t mask = ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset)); + if (idx >= SM_BITS_PER_VECTOR) { + uint64_t mask = *offset > 0 ? ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset)) : UINT64_MAX; idx -= SM_BITS_PER_VECTOR; size_t pc = popcountll(w & mask); if (value == true) { ret += pc; } else { - ret += popcountll(mask) - pc; + ret += SM_BITS_PER_VECTOR - pc; } *offset = (*offset > SM_BITS_PER_VECTOR) ? *offset - SM_BITS_PER_VECTOR : 0; } else { - /* Create a mask for the range between offset and idx inclusive [*offset, idx]. */ - uint64_t offset_mask = (((uint64_t)1 << *offset) - 1); - uint64_t idx_mask = idx >= 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1; - uint64_t mask = (idx_mask - offset_mask); - sm_bitvec_t mw = w & mask; - size_t pc = popcountll(mw); + sm_bitvec_t mw; + uint64_t mask; + uint64_t idx_mask = idx == 63 ? UINT64_MAX : ((uint64_t)1 << (idx + 1)) - 1; + uint64_t offset_mask = *offset == 0 ? 0 : UINT64_MAX >> (SM_BITS_PER_VECTOR - *offset); if (value == true) { - ret += pc; + /* To count the set bits we need to mask off the portion of the vector that we need + to count then call popcount(). So, let's create a mask for the range between + offset and idx inclusive [*offset, idx]. */ + mask = idx_mask - offset_mask; + mw = w & mask; + ret += popcountll(mw); } else { - ret += popcountll(mask) - pc; + /* To count the unset bits in this partial vector we need to use the idx_mask but ensure + that the offset bits are also set. Then popcount(). Then we subtract the count of set + bits found after masking from the possible number of bits that we examined. This should + have inverted the popcount() and counted the unset bits in the range [*offset, idx]. */ + mask = idx_mask | offset_mask; + mw = w & mask; + size_t pc = popcountll(mw); +#if 0 + printf("---------------------\n"); + printBits("om", offset_mask); + printBits("im", idx_mask); + printBits("m", mask); + printBits("mw", mw); + printf("pc: %lu\tidx:%lu\t*o:%lu\n", pc, idx, *offset); +#endif + ret += idx + 1 - pc; /* We accounted for offset in our masking above. */ } *offset = *offset > idx ? *offset - idx : 0; *vec = mw; @@ -817,6 +852,9 @@ __sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size) void sparsemap_clear(sparsemap_t *map) { + if (map == NULL) { + return; + } memset(map->m_data, 0, map->m_capacity); map->m_data_used = SM_SIZEOF_OVERHEAD; __sm_set_chunk_map_count(map, 0); @@ -1299,34 +1337,56 @@ size_t sparsemap_rank_vec(sparsemap_t *map, size_t x, size_t y, bool value, sm_bitvec_t *vec) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); - size_t result = 0, prev = 0, count = __sm_get_chunk_map_count(map); + size_t gap, amt = 0, result = 0, prev = 0, count = __sm_get_chunk_map_count(map); uint8_t *p = __sm_get_chunk_map_data(map, 0); - /* The count/rank of zero bits in an empty map is inf, so what you requested is the answer. */ + /* The count/rank of unset bits in an empty map is inf, so what you requested is the answer. */ if (count == 0 && value == false) { return y - x + 1; } for (size_t i = 0; i < count; i++) { sm_idx_t start = *(sm_idx_t *)p; + gap = start - (prev == 0 ? start : prev); /* Start of this chunk is greater than the end of the desired range. */ if (start > y) { if (value == true) { return result; } else { /* This chunk starts after our range [x, y]. */ - return y - x + 1; + return result + gap + (y - x) + 1; + } + } else { + /* The range and this chunk overlap. */ + if (value == false) { + result += start - x; } } - x -= start - prev; + x -= gap; + if (value == false) { + result += gap; + } prev = start; p += sizeof(sm_idx_t); __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); - result += __sm_chunk_map_rank(&chunk, &x, y - start, vec, value); + /* Ensure that x, the offset, isn't beyond the start of this chunk. */ + //if (x > y - start) { + // amt = value ? 0 : y - start + 1; + //} else { + /* Count all the set/unset inside this chunk. */ + amt = __sm_chunk_map_rank(&chunk, &x, y - start, vec, value); + //} + result += amt; p += __sm_chunk_map_get_size(&chunk); } + /* Count/rank the unset bits that fall outside the last chunk but within the range. */ + if (value == false) { + if (y > prev + amt) { + result += y - (prev + amt); + } + } return result; } diff --git a/tests/test.c b/tests/test.c index 0ea8d0b..330174f 100644 --- a/tests/test.c +++ b/tests/test.c @@ -820,11 +820,18 @@ test_api_rank_false(const MunitParameter params[], void *data) } // one chunk means not so empty now! - sparsemap_set(map, 4999, true); + sparsemap_idx_t hole = 4999; + sparsemap_set(map, hole, true); for (int i = 0; i < 10000; i++) { for (int j = i; j < 10000; j++) { - int amt = j - i + 1 - ((4999 > i && 4999 < j) ? 1 : 0); - r = sparsemap_rank(map, i, j, false); // GSB + int amt = j - i + 1 - ((hole >= i && j >= hole) ? 1 : 0); + r = sparsemap_rank(map, i, j, false); +#if 1 + if (r != amt) { + printf("\033[2K\r"); + printf("%d\t%d\t--\t%d\t%d", i, j, amt, r); + } +#endif assert_true(r == amt); } }