From 1d98bef7eddd412feec371e2b3c591f22236e93f Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Tue, 9 Apr 2024 14:46:49 -0400 Subject: [PATCH] fixing rank --- src/sparsemap.c | 49 +++++++++++++++++++-------------------- tests/common.c | 61 +++++++++++++++++++++++++++++++++++++------------ tests/common.h | 47 +++++++++++++++++++++++++++++++++++++ tests/test.c | 16 ++++++++++--- 4 files changed, 129 insertions(+), 44 deletions(-) create mode 100644 tests/common.h diff --git a/src/sparsemap.c b/src/sparsemap.c index e5c528a..f66df43 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -467,44 +467,39 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t first, size_t last, size_t *after) return (ret + last); } } else if (flags == SM_PAYLOAD_MIXED) { + sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; if (last > SM_BITS_PER_VECTOR) { last -= SM_BITS_PER_VECTOR; + /* Create a mask for the range of bits except those we don't want to consider. */ + uint64_t mask = ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *after)); + uint64_t mw = w & mask; + ret += popcountll(mw); if (*after > SM_BITS_PER_VECTOR) { - *after = *after - SM_BITS_PER_VECTOR; + *after -= SM_BITS_PER_VECTOR; } else { - sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; - uint64_t mask = UINT64_MAX; - if (*after > 0) { - mask = ~(mask >> (SM_BITS_PER_VECTOR - *after)); - size_t amt = popcountll(w & mask); - if (amt <= *after) { - *after = *after - amt; - } else { - *after = 0; - ret += popcountll(w & ~mask); - } - } else { - ret += popcountll(w); - } + *after = 0; } } else { - sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; - size_t ks = 0; + uint64_t mask_l, mask_r, mask; if (*after > 0) { if (*after > last) { - ks = last; *after = *after - last; + /* This gives us 'last' number of ones on the right. */ + mask_r = ((uint64_t)1 << last) - 1; } else { - ks += *after; + /* This gives us '*after' number of ones on the right. */ + mask_r = (((uint64_t)1 << *after) - 1); *after = 0; } + /* Used to shift the mask_r block to the left 'last' times. */ + mask_l = ((uint64_t)1 << (last + 1)); + mask = mask_l - 1 - mask_r; + } else { + mask = UINT64_MAX >> (SM_BITS_PER_VECTOR - last - 1); } - uint64_t mask = ((uint64_t)1 << (last + 1)) - 1 - (((uint64_t)1 << ks) - 1); - uint64_t masked = w & mask; - while (masked) { - ret += masked & 1; - masked >>= 1; - } + /* Create a mask for the range between *after and last. */ + uint64_t mw = w & mask; + ret += popcountll(mw); return (ret); } } @@ -1198,7 +1193,7 @@ size_t sparsemap_rank(sparsemap_t *map, size_t first, size_t last) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); - size_t result = 0, after = first, count = __sm_get_chunk_map_count(map); + size_t result = 0, after = first, prev = 0, count = __sm_get_chunk_map_count(map); uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { @@ -1206,6 +1201,8 @@ sparsemap_rank(sparsemap_t *map, size_t first, size_t last) if (start > last) { return (result); } + after -= start - prev; + prev = start; p += sizeof(sm_idx_t); __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); diff --git a/tests/common.c b/tests/common.c index 35537c5..a1834ab 100644 --- a/tests/common.c +++ b/tests/common.c @@ -42,7 +42,7 @@ xorshift32_seed() } void -shuffle(int *array, size_t n) +shuffle(int *array, size_t n) // TODO working? { for (size_t i = n - 1; i > 0; --i) { size_t j = xorshift32() % (i + 1); @@ -64,32 +64,40 @@ compare_ints(const void *a, const void *b) int has_sequential_set(int a[], int l, int r) { - int count = 1; // Start with a count of 1 for the first number + // Start with a count of 1 for the first number + int count = 1; for (int i = 1; i < l; ++i) { - if (a[i] - a[i - 1] == 1) { // Check if the current and previous elements are sequential + // Check if the current and previous elements are sequential + if (a[i] - a[i - 1] == 1) { count++; - if (count >= r) - return 1; // Found a sequential set of length 'r' + if (count >= r) { + // Found a sequential set of length 'r' starting at 'i' + return i; + } } else { - count = 1; // Reset count if the sequence breaks + // Reset count if the sequence breaks + count = 1; } } - return 0; // No sequential set of length 'r' found + // No sequential set of length 'r' found + return -1; } // Function to ensure an array contains a set of 'r' sequential integers -void -ensure_sequential_set(int *a, int l, int r) +int +ensure_sequential_set(int a[], int l, int r) { - if (!a || l == 0 || r > l) - return; + if (!a || l == 0 || r < 1 || r > l) { + return 0; + } // Sort the array to check for existing sequences qsort(a, l, sizeof(int), compare_ints); // Check if a sequential set of length 'r' already exists - if (has_sequential_set(a, l, r)) { - return; // Sequence already exists, no modification needed + int offset = has_sequential_set(a, l, r); + if (offset >= 0) { + return offset; // Sequence already exists, no modification needed } // Find the minimum and maximum values in the array @@ -98,14 +106,25 @@ ensure_sequential_set(int *a, int l, int r) // Generate a random value between min_value and max_value int value = random_uint32() % (max_value - min_value - r + 1); - // Generate a random location between 0 and l - r - int offset = random_uint32() % (l + r + 1); + offset = random_uint32() % (l - r - 1); // Adjust the array to include a sequential set of 'r' integers at the random offset for (int i = 0; i < r; ++i) { a[i + offset] = value + i; } + return value; +} + +int +create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r) +{ + int placed_at; + placed_at = random_uint32() % (s - r - 1); + for (int i = placed_at; i < placed_at + r; i++) { + sparsemap_set(map, i, true); + } + return placed_at; } void @@ -305,3 +324,15 @@ whats_set_uint64(uint64_t number, int pos[64]) return length; } + +void +whats_set(sparsemap_t *map, int m) +{ + logf("what's set in the range [0, %d): ", m); + for (int i = 0; i < m; i++) { + if (sparsemap_is_set(map, i)) { + logf("%d ", i); + } + } + logf("\n"); +} diff --git a/tests/common.h b/tests/common.h new file mode 100644 index 0000000..bbeac10 --- /dev/null +++ b/tests/common.h @@ -0,0 +1,47 @@ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvariadic-macros" +#define __diag(...) \ + do { \ + fprintf(stderr, "%s:%d:%s(): ", __FILE__, __LINE__, __func__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +#pragma GCC diagnostic pop + +#ifdef MUNIT_VERSION +#define random_uint32 munit_rand_uint32 +#define logf(...) munit_logf(MUNIT_LOG_INFO, __VA_ARGS__) +#else +#define random_uint32 xorshift32 +#define logf(...) fprintf(stderr, __VA_ARGS__) +#endif + +/* Stable seeds make for stable "random" sequences for repeatable tests. */ +#ifdef STABLE_SEED +#define XORSHIFT_SEED_VALUE (8675309) +#else +#define XORSHIFT_SEED_VALUE ((unsigned int)time(NULL) ^ getpid()) +#endif + +void xorshift32_seed(); +uint32_t xorshift32(); + +void print_array(int *array, int l); +void print_spans(int *array, int n); + +bool is_span(int *array, int n, int x, int l); +bool is_set(const int array[], int bit); +bool has_span(sparsemap_t *map, int *array, int l, int n); +int is_unique(int a[], int l, int value); + +void setup_test_array(int a[], int l, int max_value); +void shuffle(int *array, size_t n); +int ensure_sequential_set(int a[], int l, int r); +int create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r); + +void bitmap_from_uint32(sparsemap_t *map, uint32_t number); +void bitmap_from_uint64(sparsemap_t *map, uint64_t number); +uint32_t rank_uint64(uint64_t number, int n, int p); +int whats_set_uint64(uint64_t number, int bitPositions[64]); + +void whats_set(sparsemap_t *map, int m); diff --git a/tests/test.c b/tests/test.c index 75cc2b3..a95ee32 100644 --- a/tests/test.c +++ b/tests/test.c @@ -31,7 +31,7 @@ populate_map(sparsemap_t *map, int size, int max_value) int array[size]; setup_test_array(array, size, max_value); - ensure_sequential_set(array, size, 10); + //TODO ensure_sequential_set(array, size, 10); shuffle(array, size); for (int i = 0; i < size; i++) { sparsemap_set(map, array[i], true); @@ -574,7 +574,6 @@ test_api_span_setup(const MunitParameter params[], void *user_data) sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data); sparsemap_init(map, buf, 1024, 0); - populate_map(map, 1024, 3 * 1024); return (void *)map; } @@ -593,7 +592,18 @@ test_api_span(const MunitParameter params[], void *data) assert_ptr_not_null(map); - sparsemap_span(map, 0, 1); + int located_at, placed_at, amt = 5000; + for (int i = 1; i < amt; i++) { + for (int j = 1; j < amt / 10; j++) { + sparsemap_clear(map); + placed_at = create_sequential_set_in_empty_map(map, amt, j); +// whats_set(map, amt); + located_at = sparsemap_span(map, 0, j); + assert_true(located_at == placed_at); + located_at = sparsemap_span(map, (placed_at < j ? 0 : placed_at / 2), i); + assert_true(placed_at == located_at); + } + } return MUNIT_OK; }