diff --git a/examples/ex_4.c b/examples/ex_4.c index a9135f7..af71e15 100644 --- a/examples/ex_4.c +++ b/examples/ex_4.c @@ -62,6 +62,92 @@ shuffle(rnd_ctx_t *prng, int *array, size_t n) } } +int +compare_ints(const void *a, const void *b) +{ + return *(const int *)a - *(const int *)b; +} + +void +print_array(int *array, size_t l) +{ + int a[l]; + memcpy(a, array, sizeof(int) * l); + qsort(a, l, sizeof(int), compare_ints); + + printf("int a[] = {"); + for (int i = 0; i < l; i++) { + printf("%d", a[i]); + if (i != l) { + printf(", "); + } + } + printf("};\n"); +} + +bool +has_span(int *array, size_t n, int x, int l) +{ + if (n == 0 || l <= 0) { + return false; + } + + int a[n]; + memcpy(a, array, sizeof(int) * n); + qsort(a, n, sizeof(int), compare_ints); + + // Iterate through the array to find a span starting at x of length l + for (size_t i = 0; i < n; i++) { + if (a[i] == x) { + // Check if the span can fit in the array + if (i + l - 1 < n && a[i + l - 1] == x + l - 1) { + return true; // Found the span + } + } + } + + return false; // Span not found +} + +void +print_spans(int *array, size_t n) +{ + int a[n]; + size_t start = 0; + size_t end = 0; + + if (n == 0) { + printf("Array is empty\n"); + return; + } + + memcpy(a, array, sizeof(int) * n); + qsort(a, n, sizeof(int), compare_ints); + + for (size_t i = 1; i < n; i++) { + if (a[i] == a[i - 1] + 1) { + end = i; // Extend the span + } else { + // Print the current span + if (start == end) { + printf("[%d] ", a[start]); + } else { + printf("[%d, %d] ", a[start], a[end]); + } + // Move to the next span + start = i; + end = i; + } + } + + // Print the last span if needed + if (start == end) { + printf("[%d]\n", a[start]); + } else { + printf("[%d, %d]\n", a[start], a[end]); + } +} + bool was_set(size_t bit, const int array[]) { @@ -84,13 +170,18 @@ main(void) int array[TEST_ARRAY_SIZE]; // disable buffering + setbuf(stdout, 0); setbuf(stderr, 0); + // int foo[] = {1, 2, 3, 5, 7, 8 ,9, 10, 21}; + // print_spans(foo, sizeof(foo) / sizeof(foo[0])); + // exit(0); + // start with a 1KiB buffer, TEST_ARRAY_SIZE bits - uint8_t *buf = calloc(TEST_ARRAY_SIZE, sizeof(uint8_t)); + uint8_t *buf = calloc(TEST_ARRAY_SIZE * 1, sizeof(uint8_t)); // create the sparse bitmap - sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * TEST_ARRAY_SIZE, 0); + sparsemap_t *map = sparsemap(buf, sizeof(uint8_t) * 1024 * 1, 0); #if 0 for (i = 0; i < 8; i++) sparsemap_set(map, i, true); @@ -117,9 +208,9 @@ main(void) __diag("rank was %lu between [0, 10]\n", rank); assert(rank == 5); sparsemap_clear(map); -#endif - for (i = 2049; i < 2057; i++) sparsemap_set(map, i, true); + for (i = 2049; i < 2057; i++) + sparsemap_set(map, i, true); rank = sparsemap_rank(map, 2049, 2058); __diag("rank was %lu at offset 108\n", rank); assert(rank == 8); @@ -134,9 +225,10 @@ main(void) __diag("and %d was %s", i, sparsemap_is_set(map, i + 1) ? "set" : "not set"); rank = sparsemap_rank(map, 2048, 2060); __diag("rank was %lu at offset 109\n", rank); - //rank = sparsemap_span(map, 2048, 8); + // rank = sparsemap_span(map, 2048, 8); //__diag("span was found at %lu\n", rank); sparsemap_clear(map); +#endif // seed the PRNG #ifdef SEED @@ -146,13 +238,26 @@ main(void) #endif for (i = 0; i < TEST_ARRAY_SIZE; i++) { - array[i] = (int)__random(&prng) % TEST_ARRAY_SIZE + 1; + uint32_t r = __random(&prng); + array[i] = (int)r % (4 * TEST_ARRAY_SIZE); if (array[i] < 0) { i--; } + for (int j = 0; j < i; j++) { + if (array[j] == array[i]) { + i--; + } + } + } + // create a span of at least 8 for testing between 141 and 153 + int j = 143; + for (i = 0; i < 8; i++) { + uint32_t r = __random(&prng) % TEST_ARRAY_SIZE; + array[r] = j++; } // randomize setting the bits on shuffle(&prng, array, TEST_ARRAY_SIZE); + print_spans(array, TEST_ARRAY_SIZE); // set all the bits on in a random order for (i = 0; i < TEST_ARRAY_SIZE; i++) { @@ -161,16 +266,21 @@ main(void) assert(sparsemap_is_set(map, array[i]) == true); } - size_t len = 6; - size_t l = sparsemap_span(map, 0, len); - __diag("found span of %d at %lu starting from 0\n", len, l); - for (i = (int)l; i < l + len; i++) { - bool set = sparsemap_is_set(map, i); - if (set) { - __diag("verified %lu was set\n", i); - } else { - __diag("darn, %lu was not really set, %s\n", i, - was_set(i, array) ? "but we thought it was" : "because it wasn't"); +// for (size_t len = 1; len < TEST_ARRAY_SIZE; len++) { + for (size_t len = 2; len <= 2; len++) { + __diag("================> %lu\n", len); + size_t l = sparsemap_span(map, 0, len); + __diag("found span of %lu at %lu\n", len, l); + __diag("is_span(%lu, %lu) == %s\n", l, len, + has_span(array, TEST_ARRAY_SIZE, l, len) ? "yes" : "no"); + for (i = (int)l; i < l + len; i++) { + bool set = sparsemap_is_set(map, i); + if (set) { + __diag("verified %d was set\n", i); + } else { + __diag("darn, %d was not really set, %s\n", i, + was_set(i, array) ? "but we thought it was" : "because it wasn't"); + } } } diff --git a/main b/main deleted file mode 100755 index 83e3a1b..0000000 Binary files a/main and /dev/null differ diff --git a/src/sparsemap.c b/src/sparsemap.c index dc1f8fd..2c73c02 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -384,7 +384,7 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, * n'th bit was found in this __sm_chunk_t, or to the new, reduced value of |n| */ static size_t -__sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) +__sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n) { size_t ret = 0; register uint8_t *p; @@ -432,7 +432,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) } } - *pnew_n = n; + *pnew_n = (ssize_t)n; return (ret); } @@ -440,7 +440,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, ssize_t n, ssize_t *pnew_n) * Counts the set bits in the range [start, idx]. */ static size_t -__sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx) +__sm_chunk_map_rank(__sm_chunk_t *map, size_t *loc, size_t idx) { size_t ret = 0; @@ -453,46 +453,56 @@ __sm_chunk_map_rank(__sm_chunk_t *map, size_t start, size_t idx) } if (flags == SM_PAYLOAD_ZEROS) { if (idx > SM_BITS_PER_VECTOR) { - if (start > SM_BITS_PER_VECTOR) { - start -= SM_BITS_PER_VECTOR; + if (*loc > SM_BITS_PER_VECTOR) { + *loc = *loc - SM_BITS_PER_VECTOR; } else { - idx -= SM_BITS_PER_VECTOR - start; - start = 0; + idx -= SM_BITS_PER_VECTOR - *loc; + *loc = 0; } } else { return (ret); } } else if (flags == SM_PAYLOAD_ONES) { if (idx > SM_BITS_PER_VECTOR) { - if (start > SM_BITS_PER_VECTOR) { - start -= SM_BITS_PER_VECTOR; + if (*loc > SM_BITS_PER_VECTOR) { + *loc = *loc - SM_BITS_PER_VECTOR; } else { - idx -= SM_BITS_PER_VECTOR - start; - if (start == 0) { + idx -= SM_BITS_PER_VECTOR - *loc; + if (*loc == 0) { ret += SM_BITS_PER_VECTOR; } - start = 0; + *loc = 0; } } else { return (ret + idx); } } else if (flags == SM_PAYLOAD_MIXED) { if (idx > SM_BITS_PER_VECTOR) { - if (start > SM_BITS_PER_VECTOR) { - start -= SM_BITS_PER_VECTOR; + if (*loc > SM_BITS_PER_VECTOR) { + *loc = *loc - SM_BITS_PER_VECTOR; } else { - idx -= SM_BITS_PER_VECTOR - start; - if (start == 0) { + idx -= SM_BITS_PER_VECTOR - *loc; + if (*loc == 0) { ret += popcountll((uint64_t)map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]); } - start = 0; + *loc = 0; } } else { sm_bitvec_t w = map->m_data[1 + __sm_chunk_map_get_position(map, i * SM_FLAGS_PER_INDEX_BYTE + j)]; - for (size_t k = 0; k < idx; k++) { + size_t ks = 0; + if (*loc > 0) { + if (*loc > idx) { + ks = idx; + *loc = *loc - idx; + } else { + ks += *loc; + *loc = 0; + } + } + for (size_t k = ks; k < idx; k++) { if (w & ((sm_bitvec_t)1 << k)) { ret++; } @@ -1149,7 +1159,6 @@ sparsemap_select(sparsemap_t *map, size_t n) assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); size_t result = 0; size_t count = __sm_get_chunk_map_count(map); - uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { @@ -1158,7 +1167,7 @@ sparsemap_select(sparsemap_t *map, size_t n) __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); - ssize_t new_n = n; + ssize_t new_n = (ssize_t)n; size_t index = __sm_chunk_map_select(&chunk, n, &new_n); if (new_n == -1) { return (result + index); @@ -1200,7 +1209,7 @@ sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx) continue; } - result += __sm_chunk_map_rank(&chunk, loc, idx - start); + result += __sm_chunk_map_rank(&chunk, &loc, idx - start); p += __sm_chunk_map_get_size(&chunk); } return (result); @@ -1214,17 +1223,21 @@ sparsemap_rank(sparsemap_t *map, size_t loc, size_t idx) size_t sparsemap_span(sparsemap_t *map, size_t loc, size_t len) { - size_t nth, count, size = 1024; + size_t offset, nth = 0, count = 0; + offset = sparsemap_select(map, 0); do { - nth = sparsemap_select(map, nth); - count = sparsemap_rank(map, nth, nth + len); + count = sparsemap_rank(map, offset, offset + len); if (count == len) { - return nth; + return offset; } else { - nth += count; + count = len; + while(--count && sparsemap_is_set(map, offset)) { + nth++; + } } - } while (1); // TODO... ? until what? + offset = sparsemap_select(map, nth); + } while (1); // TODO...until what? - return size; + return 0; }