locate first span of length 'n' using rank and select #3

Merged
greg merged 10 commits from gburd/locate-first-span into main 2024-04-07 20:38:57 +00:00
3 changed files with 167 additions and 44 deletions
Showing only changes of commit 1272db6744 - Show all commits

View file

@ -62,6 +62,92 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n)
} }
} }
int
compare_ints(const void *a, const void *b)
{
return *(const int *)a - *(const int *)b;
}
void
print_array(int *array, size_t l)
{
int a[l];
memcpy(a, array, sizeof(int) * l);
qsort(a, l, sizeof(int), compare_ints);
printf("int a[] = {");
for (int i = 0; i < l; i++) {
printf("%d", a[i]);
if (i != l) {
printf(", ");
}
}
printf("};\n");
}
bool
has_span(int *array, size_t n, int x, int l)
{
if (n == 0 || l <= 0) {
return false;
}
int a[n];
memcpy(a, array, sizeof(int) * n);
qsort(a, n, sizeof(int), compare_ints);
// Iterate through the array to find a span starting at x of length l
for (size_t i = 0; i < n; i++) {
if (a[i] == x) {
// Check if the span can fit in the array
if (i + l - 1 < n && a[i + l - 1] == x + l - 1) {
return true; // Found the span
}
}
}
return false; // Span not found
}
void
print_spans(int *array, size_t n)
{
int a[n];
size_t start = 0;
size_t end = 0;
if (n == 0) {
printf("Array is empty\n");
return;
}
memcpy(a, array, sizeof(int) * n);
qsort(a, n, sizeof(int), compare_ints);
for (size_t i = 1; i < n; i++) {
if (a[i] == a[i - 1] + 1) {
end = i; // Extend the span
} else {
// Print the current span
if (start == end) {
printf("[%d] ", a[start]);
} else {
printf("[%d, %d] ", a[start], a[end]);
}
// Move to the next span
start = i;
end = i;
}
}
// Print the last span if needed
if (start == end) {
printf("[%d]\n", a[start]);
} else {
printf("[%d, %d]\n", a[start], a[end]);
}
}
bool bool
was_set(size_t bit, const int array[]) was_set(size_t bit, const int array[])
{ {
@ -84,13 +170,18 @@ main(void)
int array[TEST_ARRAY_SIZE]; int array[TEST_ARRAY_SIZE];
// disable buffering // disable buffering
setbuf(stdout, 0);
setbuf(stderr, 0); setbuf(stderr, 0);
// int foo[] = {1, 2, 3, 5, 7, 8 ,9, 10, 21};
// print_spans(foo, sizeof(foo) / sizeof(foo[0]));
// exit(0);
// start with a 1KiB buffer, TEST_ARRAY_SIZE bits // start with a 1KiB buffer, TEST_ARRAY_SIZE bits
uint8_t *buf = calloc(TEST_ARRAY_SIZE, sizeof(uint8_t)); uint8_t *buf = calloc(TEST_ARRAY_SIZE * 1, sizeof(uint8_t));
// create the sparse bitmap // create the sparse bitmap
sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0); sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024 * 1, 0);
#if 0 #if 0
for (i = 0; i < 8; i++) sparsemap_set(map, i, true); for (i = 0; i < 8; i++) sparsemap_set(map, i, true);
@ -117,9 +208,9 @@ main(void)
__diag("rank was %lu between [0, 10]\n", rank); __diag("rank was %lu between [0, 10]\n", rank);
assert(rank == 5); assert(rank == 5);
sparsemap_clear(map); sparsemap_clear(map);
#endif
for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true); for (i = 2049; i < 2057; i++)
sparsemap_set(map, i, true);
rank = sparsemap_rank(map, 2049, 2058); rank = sparsemap_rank(map, 2049, 2058);
__diag("rank was %lu at offset 108\n", rank); __diag("rank was %lu at offset 108\n", rank);
assert(rank == 8); assert(rank == 8);
@ -137,6 +228,7 @@ main(void)
// rank = sparsemap_span(map, 2048, 8); // rank = sparsemap_span(map, 2048, 8);
//__diag("span was found at %lu\n", rank); //__diag("span was found at %lu\n", rank);
sparsemap_clear(map); sparsemap_clear(map);
#endif
// seed the PRNG // seed the PRNG
#ifdef SEED #ifdef SEED
@ -146,13 +238,26 @@ main(void)
#endif #endif
for (i = 0; i < TEST_ARRAY_SIZE; i++) { for (i = 0; i < TEST_ARRAY_SIZE; i++) {
array[i] = (int)__random(&prng) % TEST_ARRAY_SIZE + 1; uint32_t r = __random(&prng);
array[i] = (int)r % (4 * TEST_ARRAY_SIZE);
if (array[i] < 0) { if (array[i] < 0) {
i--; i--;
} }
for (int j = 0; j < i; j++) {
if (array[j] == array[i]) {
i--;
}
}
}
// create a span of at least 8 for testing between 141 and 153
int j = 143;
for (i = 0; i < 8; i++) {
uint32_t r = __random(&prng) % TEST_ARRAY_SIZE;
array[r] = j++;
} }
// randomize setting the bits on // randomize setting the bits on
shuffle(&prng, array, TEST_ARRAY_SIZE); shuffle(&prng, array, TEST_ARRAY_SIZE);
print_spans(array, TEST_ARRAY_SIZE);
// set all the bits on in a random order // set all the bits on in a random order
for (i = 0; i < TEST_ARRAY_SIZE; i++) { for (i = 0; i < TEST_ARRAY_SIZE; i++) {
@ -161,18 +266,23 @@ main(void)
assert(sparsemap_is_set(map, array[i]) == true); assert(sparsemap_is_set(map, array[i]) == true);
} }
size_t len = 6; // for (size_t len = 1; len < TEST_ARRAY_SIZE; len++) {
for (size_t len = 2; len <= 2; len++) {
__diag("================> %lu\n", len);
size_t l = sparsemap_span(map, 0, len); size_t l = sparsemap_span(map, 0, len);
__diag("found span of %d at %lu starting from 0\n", len, l); __diag("found span of %lu at %lu\n", len, l);
__diag("is_span(%lu, %lu) == %s\n", l, len,
has_span(array, TEST_ARRAY_SIZE, l, len) ? "yes" : "no");
for (i = (int)l; i < l + len; i++) { for (i = (int)l; i < l + len; i++) {
bool set = sparsemap_is_set(map, i); bool set = sparsemap_is_set(map, i);
if (set) { if (set) {
__diag("verified %lu was set\n", i); __diag("verified %d was set\n", i);
} else { } else {
__diag("darn, %lu was not really set, %s\n", i, __diag("darn, %d was not really set, %s\n", i,
was_set(i, array) ? "but we thought it was" : "because it wasn't"); was_set(i, array) ? "but we thought it was" : "because it wasn't");
} }
} }
}
return 0; return 0;
} }

BIN
main

Binary file not shown.

View file

@ -384,7 +384,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos,
* n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n| * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n|
*/ */
static size_t static size_t
__sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n)
{ {
size_t ret = 0; size_t ret = 0;
register uint8_t *p; register uint8_t *p;
@ -432,7 +432,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
} }
} }
*pnew_n = n; *pnew_n = (ssize_t)n;
return (ret); return (ret);
} }
@ -440,7 +440,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n)
* Counts the set bits in the range [start, idx]. * Counts the set bits in the range [start, idx].
*/ */
static size_t static size_t
__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx) __sm_chunk_map_rank(__sm_chunk_t *map, size_t *loc, size_t idx)
{ {
size_t ret = 0; size_t ret = 0;
@ -453,46 +453,56 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx)
} }
if (flags == SM_PAYLOAD_ZEROS) { if (flags == SM_PAYLOAD_ZEROS) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
if (start > SM_BITS_PER_VECTOR) { if (*loc > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR; *loc = *loc - SM_BITS_PER_VECTOR;
} else { } else {
idx -= SM_BITS_PER_VECTOR - start; idx -= SM_BITS_PER_VECTOR - *loc;
start = 0; *loc = 0;
} }
} else { } else {
return (ret); return (ret);
} }
} else if (flags == SM_PAYLOAD_ONES) { } else if (flags == SM_PAYLOAD_ONES) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
if (start > SM_BITS_PER_VECTOR) { if (*loc > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR; *loc = *loc - SM_BITS_PER_VECTOR;
} else { } else {
idx -= SM_BITS_PER_VECTOR - start; idx -= SM_BITS_PER_VECTOR - *loc;
if (start == 0) { if (*loc == 0) {
ret += SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR;
} }
start = 0; *loc = 0;
} }
} else { } else {
return (ret + idx); return (ret + idx);
} }
} else if (flags == SM_PAYLOAD_MIXED) { } else if (flags == SM_PAYLOAD_MIXED) {
if (idx > SM_BITS_PER_VECTOR) { if (idx > SM_BITS_PER_VECTOR) {
if (start > SM_BITS_PER_VECTOR) { if (*loc > SM_BITS_PER_VECTOR) {
start -= SM_BITS_PER_VECTOR; *loc = *loc - SM_BITS_PER_VECTOR;
} else { } else {
idx -= SM_BITS_PER_VECTOR - start; idx -= SM_BITS_PER_VECTOR - *loc;
if (start == 0) { if (*loc == 0) {
ret += popcountll((uint64_t)map->m_data[1 + ret += popcountll((uint64_t)map->m_data[1 +
__sm_chunk_map_get_position(map, __sm_chunk_map_get_position(map,
i * SM_FLAGS_PER_INDEX_BYTE + j)]); i * SM_FLAGS_PER_INDEX_BYTE + j)]);
} }
start = 0; *loc = 0;
} }
} else { } else {
sm_bitvec_t w = map->m_data[1 + sm_bitvec_t w = map->m_data[1 +
__sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)];
for (size_t k = 0; k < idx; k++) { size_t ks = 0;
if (*loc > 0) {
if (*loc > idx) {
ks = idx;
*loc = *loc - idx;
} else {
ks += *loc;
*loc = 0;
}
}
for (size_t k = ks; k < idx; k++) {
if (w & ((sm_bitvec_t)1 << k)) { if (w & ((sm_bitvec_t)1 << k)) {
ret++; ret++;
} }
@ -1149,7 +1159,6 @@ sparsemap_select(sparsemap_t *map, size_t n)
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
size_t result = 0; size_t result = 0;
size_t count = __sm_get_chunk_map_count(map); size_t count = __sm_get_chunk_map_count(map);
uint8_t *p = __sm_get_chunk_map_data(map, 0); uint8_t *p = __sm_get_chunk_map_data(map, 0);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
@ -1158,7 +1167,7 @@ sparsemap_select(sparsemap_t *map, size_t n)
__sm_chunk_t chunk; __sm_chunk_t chunk;
__sm_chunk_map_init(&chunk, p); __sm_chunk_map_init(&chunk, p);
ssize_t new_n = n; ssize_t new_n = (ssize_t)n;
size_t index = __sm_chunk_map_select(&chunk, n, &new_n); size_t index = __sm_chunk_map_select(&chunk, n, &new_n);
if (new_n == -1) { if (new_n == -1) {
return (result + index); return (result + index);
@ -1200,7 +1209,7 @@ sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
continue; continue;
} }
result += __sm_chunk_map_rank(&chunk, loc, idx - start); result += __sm_chunk_map_rank(&chunk, &loc, idx - start);
p += __sm_chunk_map_get_size(&chunk); p += __sm_chunk_map_get_size(&chunk);
} }
return (result); return (result);
@ -1214,17 +1223,21 @@ sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx)
size_t size_t
sparsemap_span(sparsemap_t *map, size_t loc, size_t len) sparsemap_span(sparsemap_t *map, size_t loc, size_t len)
{ {
size_t nth, count, size = 1024; size_t offset, nth = 0, count = 0;
offset = sparsemap_select(map, 0);
do { do {
nth = sparsemap_select(map, nth); count = sparsemap_rank(map, offset, offset + len);
count = sparsemap_rank(map, nth, nth + len);
if (count == len) { if (count == len) {
return nth; return offset;
} else { } else {
nth += count; count = len;
while(--count && sparsemap_is_set(map, offset)) {
nth++;
} }
} while (1); // TODO... ? until what? }
offset = sparsemap_select(map, nth);
} while (1); // TODO...until what?
return size; return 0;
} }