fixing rank

This commit is contained in:
Gregory Burd 2024-04-09 14:46:49 -04:00
parent 605e9e9227
commit 1d98bef7ed
4 changed files with 129 additions and 44 deletions

View file

@ -467,44 +467,39 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t first, size_t last, size_t *after)
return (ret + last);
}
} else if (flags == SM_PAYLOAD_MIXED) {
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
if (last > SM_BITS_PER_VECTOR) {
last -= SM_BITS_PER_VECTOR;
/* Create a mask for the range of bits except those we don't want to consider. */
uint64_t mask = ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - *after));
uint64_t mw = w & mask;
ret += popcountll(mw);
if (*after > SM_BITS_PER_VECTOR) {
*after = *after - SM_BITS_PER_VECTOR;
*after -= SM_BITS_PER_VECTOR;
} else {
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
uint64_t mask = UINT64_MAX;
if (*after > 0) {
mask = ~(mask >> (SM_BITS_PER_VECTOR - *after));
size_t amt = popcountll(w & mask);
if (amt <= *after) {
*after = *after - amt;
} else {
*after = 0;
ret += popcountll(w & ~mask);
}
} else {
ret += popcountll(w);
}
*after = 0;
}
} else {
sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
size_t ks = 0;
uint64_t mask_l, mask_r, mask;
if (*after > 0) {
if (*after > last) {
ks = last;
*after = *after - last;
/* This gives us 'last' number of ones on the right. */
mask_r = ((uint64_t)1 << last) - 1;
} else {
ks += *after;
/* This gives us '*after' number of ones on the right. */
mask_r = (((uint64_t)1 << *after) - 1);
*after = 0;
}
/* Used to shift the mask_r block to the left 'last' times. */
mask_l = ((uint64_t)1 << (last + 1));
mask = mask_l - 1 - mask_r;
} else {
mask = UINT64_MAX >> (SM_BITS_PER_VECTOR - last - 1);
}
uint64_t mask = ((uint64_t)1 << (last + 1)) - 1 - (((uint64_t)1 << ks) - 1);
uint64_t masked = w & mask;
while (masked) {
ret += masked & 1;
masked >>= 1;
}
/* Create a mask for the range between *after and last. */
uint64_t mw = w & mask;
ret += popcountll(mw);
return (ret);
}
}
@ -1198,7 +1193,7 @@ size_t
sparsemap_rank(sparsemap_t *map, size_t first, size_t last)
{
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0, after = first, count = __sm_get_chunk_map_count(map);
size_t result = 0, after = first, prev = 0, count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) {
@ -1206,6 +1201,8 @@ sparsemap_rank(sparsemap_t *map, size_t first, size_t last)
if (start > last) {
return (result);
}
after -= start - prev;
prev = start;
p += sizeof(sm_idx_t);
__sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p);

View file

@ -42,7 +42,7 @@ xorshift32_seed()
}
void
shuffle(int *array, size_t n)
shuffle(int *array, size_t n) // TODO working?
{
for (size_t i = n - 1; i > 0; --i) {
size_t j = xorshift32() % (i + 1);
@ -64,32 +64,40 @@ compare_ints(const void *a, const void *b)
int
has_sequential_set(int a[], int l, int r)
{
int count = 1; // Start with a count of 1 for the first number
// Start with a count of 1 for the first number
int count = 1;
for (int i = 1; i < l; ++i) {
if (a[i] - a[i - 1] == 1) { // Check if the current and previous elements are sequential
// Check if the current and previous elements are sequential
if (a[i] - a[i - 1] == 1) {
count++;
if (count >= r)
return 1; // Found a sequential set of length 'r'
if (count >= r) {
// Found a sequential set of length 'r' starting at 'i'
return i;
}
} else {
count = 1; // Reset count if the sequence breaks
// Reset count if the sequence breaks
count = 1;
}
}
return 0; // No sequential set of length 'r' found
// No sequential set of length 'r' found
return -1;
}
// Function to ensure an array contains a set of 'r' sequential integers
void
ensure_sequential_set(int *a, int l, int r)
int
ensure_sequential_set(int a[], int l, int r)
{
if (!a || l == 0 || r > l)
return;
if (!a || l == 0 || r < 1 || r > l) {
return 0;
}
// Sort the array to check for existing sequences
qsort(a, l, sizeof(int), compare_ints);
// Check if a sequential set of length 'r' already exists
if (has_sequential_set(a, l, r)) {
return; // Sequence already exists, no modification needed
int offset = has_sequential_set(a, l, r);
if (offset >= 0) {
return offset; // Sequence already exists, no modification needed
}
// Find the minimum and maximum values in the array
@ -98,14 +106,25 @@ ensure_sequential_set(int *a, int l, int r)
// Generate a random value between min_value and max_value
int value = random_uint32() % (max_value - min_value - r + 1);
// Generate a random location between 0 and l - r
int offset = random_uint32() % (l + r + 1);
offset = random_uint32() % (l - r - 1);
// Adjust the array to include a sequential set of 'r' integers at the random offset
for (int i = 0; i < r; ++i) {
a[i + offset] = value + i;
}
return value;
}
int
create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r)
{
int placed_at;
placed_at = random_uint32() % (s - r - 1);
for (int i = placed_at; i < placed_at + r; i++) {
sparsemap_set(map, i, true);
}
return placed_at;
}
void
@ -305,3 +324,15 @@ whats_set_uint64(uint64_t number, int pos[64])
return length;
}
void
whats_set(sparsemap_t *map, int m)
{
logf("what's set in the range [0, %d): ", m);
for (int i = 0; i < m; i++) {
if (sparsemap_is_set(map, i)) {
logf("%d ", i);
}
}
logf("\n");
}

47
tests/common.h Normal file
View file

@ -0,0 +1,47 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvariadic-macros"
#define __diag(...) \
do { \
fprintf(stderr, "%s:%d:%s(): ", __FILE__, __LINE__, __func__); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
#pragma GCC diagnostic pop
#ifdef MUNIT_VERSION
#define random_uint32 munit_rand_uint32
#define logf(...) munit_logf(MUNIT_LOG_INFO, __VA_ARGS__)
#else
#define random_uint32 xorshift32
#define logf(...) fprintf(stderr, __VA_ARGS__)
#endif
/* Stable seeds make for stable "random" sequences for repeatable tests. */
#ifdef STABLE_SEED
#define XORSHIFT_SEED_VALUE (8675309)
#else
#define XORSHIFT_SEED_VALUE ((unsigned int)time(NULL) ^ getpid())
#endif
void xorshift32_seed();
uint32_t xorshift32();
void print_array(int *array, int l);
void print_spans(int *array, int n);
bool is_span(int *array, int n, int x, int l);
bool is_set(const int array[], int bit);
bool has_span(sparsemap_t *map, int *array, int l, int n);
int is_unique(int a[], int l, int value);
void setup_test_array(int a[], int l, int max_value);
void shuffle(int *array, size_t n);
int ensure_sequential_set(int a[], int l, int r);
int create_sequential_set_in_empty_map(sparsemap_t *map, int s, int r);
void bitmap_from_uint32(sparsemap_t *map, uint32_t number);
void bitmap_from_uint64(sparsemap_t *map, uint64_t number);
uint32_t rank_uint64(uint64_t number, int n, int p);
int whats_set_uint64(uint64_t number, int bitPositions[64]);
void whats_set(sparsemap_t *map, int m);

View file

@ -31,7 +31,7 @@ populate_map(sparsemap_t *map, int size, int max_value)
int array[size];
setup_test_array(array, size, max_value);
ensure_sequential_set(array, size, 10);
//TODO ensure_sequential_set(array, size, 10);
shuffle(array, size);
for (int i = 0; i < size; i++) {
sparsemap_set(map, array[i], true);
@ -574,7 +574,6 @@ test_api_span_setup(const MunitParameter params[], void *user_data)
sparsemap_t *map = (sparsemap_t *)test_api_setup(params, user_data);
sparsemap_init(map, buf, 1024, 0);
populate_map(map, 1024, 3 * 1024);
return (void *)map;
}
@ -593,7 +592,18 @@ test_api_span(const MunitParameter params[], void *data)
assert_ptr_not_null(map);
sparsemap_span(map, 0, 1);
int located_at, placed_at, amt = 5000;
for (int i = 1; i < amt; i++) {
for (int j = 1; j < amt / 10; j++) {
sparsemap_clear(map);
placed_at = create_sequential_set_in_empty_map(map, amt, j);
// whats_set(map, amt);
located_at = sparsemap_span(map, 0, j);
assert_true(located_at == placed_at);
located_at = sparsemap_span(map, (placed_at < j ? 0 : placed_at / 2), i);
assert_true(placed_at == located_at);
}
}
return MUNIT_OK;
}