From 6630fc759314039cff4d84f730b40c7c52769d41 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 26 Apr 2024 11:46:26 -0400 Subject: [PATCH] soak seems to be working --- examples/soak.c | 112 ++++++++++---------- include/sparsemap.h | 246 +++++++++++++++++++++++++++++++++----------- src/sparsemap.c | 76 ++++++++------ tests/test.c | 14 ++- 4 files changed, 301 insertions(+), 147 deletions(-) diff --git a/examples/soak.c b/examples/soak.c index 8c95d8d..6827b85 100644 --- a/examples/soak.c +++ b/examples/soak.c @@ -157,7 +157,6 @@ mdb_midl_search(MDB_IDL ids, MDB_ID id) return cursor; } -#if 0 /* superseded by append/sort */ int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) { unsigned x, i; @@ -190,7 +189,6 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) return 0; } -#endif MDB_IDL mdb_midl_alloc(int num) @@ -425,20 +423,36 @@ toss(size_t max) return level; } +bool +verify_midl_contains(MDB_IDL list, pgno_t pg) +{ + unsigned index = mdb_midl_search(list, pg); + return index <= list[0] && list[index] == pg; +} + +bool +verify_midl_nodups(MDB_IDL list) +{ + pgno_t id = 1; + while (id < list[0]) { + if (list[id] == list[id + 1]) + return false; + id++; + } + return true; +} + bool verify_span_midl(MDB_IDL list, pgno_t pg, unsigned len) { - pgno_t f = 1; - if (pg + len > list[0]) + pgno_t f = mdb_midl_search(list, pg); + bool found = (list[f] == pg) && (f <= list[0]); + if (!found) return false; - while (list[f] != pg && f <= list[0]) - f++; if (len == 1) return true; - for (pgno_t i = f; i < f + len; i++) { - if (list[i + 1] != list[i] + 1) - return false; - } + if (list[len] + 1 != list[len - 1]) + return false; return true; } @@ -446,10 +460,9 @@ bool verify_empty_midl(MDB_IDL list, pgno_t pg, unsigned len) { for (pgno_t i = pg; i < pg + len; i++) { - pgno_t f = 1; - while (list[f] != pg && f <= list[0]) - f++; - if (f != list[0]) + pgno_t f = mdb_midl_search(list, pg); + bool found = list[f] == pg && f <= list[0]; + if (found) return false; } return true; @@ -480,26 +493,12 @@ verify_empty_sparsemap(sparsemap_t *map, pgno_t pg, unsigned len) bool verify_sm_eq_ml(sparsemap_t *map, MDB_IDL list) { - // ensure all items in the MDB_IDL are set to true in the map - for (unsigned j = 1; j <= list[0]; j++) { - if (sparsemap_is_set(map, list[j]) == false) + for (unsigned i = 0; i <= list[1]; i++) { + pgno_t f = mdb_midl_search(list, i); + bool found = list[f] == i && f <= list[0]; + if (sparsemap_is_set(map, i) != found) return false; } - // ensure all items not in the MDB_IDL are set to false in the map - unsigned j = 1, last_pgno = list[list[0]]; - for (unsigned i = 0; i <= last_pgno; i++) { - if (list[j] > i) { - do { - if (sparsemap_is_set(map, i) == true) - return false; - } while (i++ < list[j]); - } else { - if (sparsemap_is_set(map, i) == false) - return false; - if (list[j] == i) - j++; - } - } return true; } @@ -517,7 +516,7 @@ int main() { size_t iterations = 0; - bool prefer_mdb_idl_location = true; // xorshift32() % 2; + bool prefer_mdb_idl_location = (bool)xorshift32() % 2; // disable buffering setvbuf(stdout, NULL, _IONBF, 0); @@ -525,7 +524,7 @@ main() __diag("starting...\n"); - size_t amt = 1024 * 2; // 1024*1024 * 2; + size_t amt = 1024 * 2; // 1024 * 1024 * 2; MDB_IDL list = mdb_midl_alloc(amt); sparsemap_t *map = sparsemap(3 * 1024); @@ -537,6 +536,7 @@ main() mdb_midl_xappend(list, pg); // listed page ids are free sparsemap_set(map, pg, true); // true means free in our bitmap } + mdb_midl_sort(list); print_sizes(map, list); assert(verify_sm_eq_ml(map, list)); @@ -558,21 +558,16 @@ main() unsigned n2 = n, mop_len = mop[0]; if (mop_len > n2) { i = mop_len; - if (n2 == 1) { - pgno = mop[mop_len]; - goto search_done; - } do { pgno = mop[i]; - if (mop[i - n2] == pgno - n2) + if (mop[i - n2] == pgno + n2) goto search_done; } while (--i > n2); if (--retry < 0) break; } search_done:; - assert(pgno != 0); - ml = pgno - (n2 > 1 ? n2 - 1 : 0); + ml = pgno; mi = i; } assert(verify_span_midl(list, ml, n)); @@ -598,6 +593,8 @@ main() /* Move any stragglers down */ for (j = i - num; j < mop_len;) mop[++j] = mop[++i]; + for (j = mop_len + 1; j <= mop[-1]; j++) + mop[j] = 0; } else { unsigned j, num = n; int i = mdb_midl_search(list, sl) + num; @@ -626,21 +623,30 @@ main() // Once we've used half of the free list, let's replenish it a bit. if (list[0] < amt / 2) { do { - size_t len = toss(15) + 1; - pgno_t l, s = (pgno_t)xorshift32() % amt - len; + pgno_t pg; + size_t len, retries = amt; do { - l = sparsemap_span(map, s--, len, false); - } while(SPARSEMAP_NOT_FOUND(l)); - assert(verify_empty_midl(list, l, len)); - assert(verify_empty_sparsemap(map, l, len)); - for (int i = l; i < l + len; i++) { - mdb_midl_xappend(list, i); - sparsemap_set(map, i, true); + len = toss(15) + 1; + pg = sparsemap_span(map, 0, len, false); + } while (SPARSEMAP_NOT_FOUND(pg) && --retries); + if (SPARSEMAP_FOUND(pg)) { + assert(verify_empty_midl(list, pg, len)); + assert(verify_empty_sparsemap(map, pg, len)); + for (int i = pg; i < pg + len; i++) { + if (pg + len > list[-1]) + mdb_midl_need(&list, pg + len); + assert(verify_midl_contains(list, i) == false); + mdb_midl_insert(list, i); + sparsemap_set(map, i, true); + } + mdb_midl_sort(list); + assert(verify_midl_nodups(list)); + assert(verify_span_midl(list, pg, len)); + assert(verify_span_sparsemap(map, pg, len)); } - assert(verify_span_midl(list, l, len)); - assert(verify_span_sparsemap(map, l, len)); - } while (list[0] > amt - 32); + } while (list[0] < amt - 32); } + print_sizes(map, list); iterations++; } diff --git a/include/sparsemap.h b/include/sparsemap.h index 233e530..b58450c 100644 --- a/include/sparsemap.h +++ b/include/sparsemap.h @@ -89,113 +89,239 @@ extern "C" { typedef struct sparsemap sparsemap_t; typedef long int sparsemap_idx_t; -#define SPARSEMAP_IDX_MAX ((1UL << (sizeof(long int) * CHAR_BIT - 1)) - 1) -#define SPARSEMAP_IDX_MIN (-(SPARSEMAP_IDX_MAX)-1) -#define SPARSEMAP_NOT_FOUND(_x) ((_x) == SPARSEMAP_IDX_MAX || (_x) == SPARSEMAP_IDX_MIN) +#define SPARSEMAP_IDX_MAX LONG_MAX +#define SPARSEMAP_IDX_MIN LONG_MIN +#define SPARSEMAP_FOUND(x) ((x) < SPARSEMAP_IDX_MAX || (x) > SPARSEMAP_IDX_MIN) +#define SPARSEMAP_NOT_FOUND(x) ((x) == SPARSEMAP_IDX_MAX || (x) == SPARSEMAP_IDX_MIN) typedef uint32_t sm_idx_t; typedef uint64_t sm_bitvec_t; -/** - * Create a new, empty sparsemap_t with a buffer of |size|. - * Default when set to 0 is 1024. +/** @brief Allocate a new, empty sparsemap_t with a buffer of \b size on the + * heap to use for storage of bitmap data. + * + * The buffer used for the bitmap is allocated in the same heap allocation as + * the structure, this means that you only need to call free() on the returned + * object to free all resources. Using this method it is allowable to grow the + * buffer size by calling #sparsemap_set_data_size(). This function calls + * #sparsemap_init(). + * + * @param[in] size The starting size of the buffer used for the bitmap, default + * is 1024 bytes. + * @returns The newly allocated sparsemap reference. */ sparsemap_t *sparsemap(size_t size); -/** - * Allocate on a sparsemap_t on the heap to wrap the provided fixed-size - * buffer (heap or stack allocated). +/** @brief Allocate a new, empty sparsemap_t that references (wraps) the buffer + * \b data of \b size bytes to use for storage of bitmap data. + * + * This function allocates a new sparsemap_t but not the buffer which is + * provided by the caller as \b data which can be allocated on the stack or + * heap. Caller is responsible for calling free() on the returned heap object + * and releasing the memory used for \b data. Resizing the buffer is not + * directly supported, you may attempt to resize by calling + * #sparsemap_set_data_size() with the potentially relocated address of \b data. + * This function calls #sparsemap_init(). + * + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. + * @returns The newly allocated sparsemap reference. */ sparsemap_t *sparsemap_wrap(uint8_t *data, size_t size); -/** - * Initialize a (possibly stack allocated) sparsemap_t with data (potentially - * also on the stack). +/** @brief Initialize an existing sparsemap_t by assigning \b data of \b size + * bytes for storage of bitmap data. + * + * Given the address of an existing \b map allocated on the stack or heap this + * function will initialize the datastructure and use the provided \b data of + * \b size for bitmap data. Caller is responsible for all memory management. + * Resizing the buffer is not directly supported, you + * may resize it and call #sparsemap_set_data_size() and then ensure that should + * the address of the object changed you need to update it by calling #sparsemap_ + * m_data field. + * + * @param[in] map The sparsemap reference. + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. */ void sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size); -/** - * Opens an existing sparsemap contained within the specified buffer. +/** @brief Opens, without initializing, an existing sparsemap contained within + * the specified buffer. + * + * Given the address of an existing \b map this function will assign to the + * provided datastructure \b data of \b size for bitmap data. Caller is + * responsible for all memory management. Use this when as a way to + * "deserialize" bytes and make them ready for use as a bitmap. + * + * @param[in] map The sparsemap reference. + * @param[in] data A heap or stack memory buffer of \b size for use storing + * bitmap data. + * @param[in] size The size of the buffer \b data used for the bitmap. */ -void sparsemap_open(sparsemap_t *, uint8_t *data, size_t data_size); +void sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size); -/** - * Resets values and empties the buffer making it ready to accept new data. +/** @brief Resets values and empties the buffer making it ready to accept new + * data. + * + * @param[in] map The sparsemap reference. */ void sparsemap_clear(sparsemap_t *map); -/** - * Resizes the data range within the limits of the provided buffer, the map may - * move to a new address returned iff the map was created with the sparsemap() API. - * Take care to use the new reference (think: realloc()). NOTE: If the returned - * value equals NULL then the map was not resized. +/** @brief Update the size of the buffer \b data used for storing the bitmap. + * + * When called with \b data NULL on a \b map that was created with #sparsemap() + * this function will reallocate the storage for both the map and data possibly + * changing the address of the map itself so it is important for the caller to + * update all references to this map to the address returned in this scenario. + * Access to stale references will result in memory violations and program + * termination. Caller is not required to free() the old address, only the new + * one should it have changed. This uses #realloc() under the covers, all + * caveats apply here as well. + * + * When called referencing a \b map that was allocate by the caller this + * function will only update the values within the datastructure. + * + * @param[in] map The sparsemap reference. + * @param[in] size The desired size of the buffer \b data used for the bitmap. + * @returns The -- potentially changed -- sparsemap reference, or NULL should a + * #realloc() fail (\b ENOMEM) + * @note The resizing of caller supplied allocated objects is not yet fully + * supported. */ -sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, size_t data_size); +sparsemap_t *sparsemap_set_data_size(sparsemap_t *map, size_t size, uint8_t *data); -/** - * Calculate remaining capacity, approaches 0 when full. +/** @brief Calculate remaining capacity, approaches 0 when full. + * + * Provides an estimate in the range [0.0, 100.0] of the remaining capacity of + * the buffer storing bitmap data. This can change up or down as more data + * is added/removed due to the method for compressed representation, do not + * expect a smooth progression either direction. This is a rough estimate only + * and may also jump in value after seemingly indiscriminate changes to the map. + * + * @param[in] map The sparsemap reference. + * @returns an estimate for remaining capacity that approaches 0.0 when full or + * 100.0 when empty */ double sparsemap_capacity_remaining(sparsemap_t *map); -/** - * Returns the capacity of the underlying byte array. +/** @brief Returns the capacity of the underlying byte array in bytes. + * + * Specifically, this returns the byte \b size provided for the underlying + * buffer used to store bitmap data. + * + * @param[in] map The sparsemap reference. + * @returns byte size of the buffer used for storing bitmap data */ size_t sparsemap_get_capacity(sparsemap_t *map); -/** - * Returns the value of a bit at index |idx|, either on/true/1 or off/false/0. +/** @brief Returns the value of a bit at index \b idx, either true for "set" (1) + * or \b false for "unset" (0). + * * When |idx| is negative it is an error. + * + * @param[in] map The sparsemap reference. + * @param[in] idx The 0-based offset into the bitmap index to examine. + * @returns either true or false; a negative idx is an error and always returns + * false + * @todo Support for negative relative offset in \idx. */ bool sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx); -/** - * Sets the bit at index |idx| to true or false, depending on |value|. - * When |idx| is negative is it an error. Returns the |idx| supplied or - * SPARSEMAP_IDX_MAX on error with |errno| set to ENOSP when the map is full. +/** @brief Sets the bit at index \b idx to \b value. + * + * A sparsemap has a fixed size buffer with a capacity that can be exhausted by + * when calling this function. In such cases the return value is not equal to + * the provided \b idx and errno is set to ENOSPC. In such situations it is + * possible to grow the data size and retry the set() operation under certain + * circumstances (see #sparsemap() and #sparsemap_set_data_size()). + * + * @param[in] map The sparsemap reference. + * @param[in] idx The 0-based offset into the bitmap index to modify. + * @returns the \b idx supplied on success or SPARSEMAP_IDX_MIN/MAX on error + * with \b errno set to ENOSPC when the map is full; a negative idx is an error + * and always returns SPARSEMAP_IDX_MIN. */ sparsemap_idx_t sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value); -/** - * Returns the offset of the very first/last bit in the map. - */ -sm_idx_t sparsemap_get_starting_offset(sparsemap_t *map); - -/** - * Returns the used size in the data buffer in bytes. +/** @brief Returns the byte size of the data buffer that has been used thus far. + * + * @param[in] map The sparsemap reference. + * @returns the byte size of the data buffer that has been used thus far */ size_t sparsemap_get_size(sparsemap_t *map); -/** - * Decompresses the whole bitmap; calls scanner for all bits with a set of - * |n| vectors |vec| each a sm_bitmap_t which can be masked and read using - * bit operators to read the values for each position in the bitmap index. - * Setting |skip| will start the scan after "skip" bits. +/** @brief Provides a method for a callback function to examine every bit set in + * the index. + * + * This decompresses the whole bitmap and invokes #scanner() passing a 64bit + * "vector" of bits in order from 0 index to the end of the map. Using standard + * bit masking techniques it is possible to read each bit from LSB to MSB in + * these vectors to read the entire content of the bitmap index (see + * examples/ex_4.c). + * + * @param[in] map The sparsemap reference. + * @param[in] skip Start the scan after "skip" bits. */ void sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t vec[], size_t n), size_t skip); -/** - * Appends all chunk maps from |map| starting at |offset| to |other|, then - * reduces the chunk map-count appropriately. +/** @brief Splits the bitmap by assigning all bits starting at \b offset to the + * \b other bitmap while removing them from \b map. + * + * @param[in] map The sparsemap reference. + * @param[in] skip Start the scan after "skip" bits. */ void sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other); -/** - * Finds the offset of the n'th bit either set (|value| is true) or unset - * (|value| is false) from the start (positive |n|), or end (negative |n|), - * of the bitmap and returns that (uses a 0-based index). Returns -inf or +inf - * if not found (where "inf" is SPARSEMAP_IDX_MAX and "-inf" is SPARSEMAP_IDX_MIN). +/** @brief Finds the index of the \b n'th bit set to \b value. + * + * Locates the \b n'th bit either set, \b value is true, or unset, \b value is + * false, from the start, positive \b n, or end, negative \b n, of the bitmap. + * So, if your bit pattern is: ```1101 1110 1010 1101 1011 1110 1110 1111``` and + * you request the first set bit the result is `0` (meaning the 1st bit in the + * map which is index 0 because this is 0-based indexing). The first unset bit + * is `2` (or the third bit in the pattern). When n is 3 and value is true the + * result would be `3` (the fourth bit, or the third set bit which is at index + * 3 when 0-based). + * + * @param[in] map The sparsemap reference. + * @param[in] n Specifies how many bits to ignore (when n=3 return the position + * of the third matching bit). + * @param[in] value Determines if the search is to examine set (true) or unset + * (false) bits in the bitmap index. + * @returns the 0-based index of the located bit position within the map; when + * not found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN. */ sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value); -/** - * Counts the set (|value| is true) or unset (|value| is false) bits starting - * at |x| bits (0-based) in the range [x, y] (inclusive on either end). +/** @brief Counts the bits matching \b value in the provided range, [\b x, \b + * y]. + * + * Counts the set, \b value is true, or unset, \b value is false, bits starting + * at the \b idx'th bit (0-based) in the range [\b x, \b y] (inclusive on either + * end). If range is [0, 0] this examines 1 bit, the first one in the map, and + * returns 1 if value is true and the bit was set. + * + * @param[in] map The sparsemap reference. + * @param[in] x 0-based start of the inclusive range to examine. + * @param[in] y 0-based end of the inclusive range to examine. + * @param[in] value Determines if the scan is to count the set (true) or unset + * (false) bits in the range. + * @returns the count of bits found within the range that match the \b value */ size_t sparsemap_rank(sparsemap_t *map, size_t x, size_t y, bool value); -/** - * Finds the first span (i.e. a contiguous set of bits), in the bitmap that - * are set (|value| is true) or unset (|value| is false) and returns the - * starting offset for the span (0-based). +/** @brief Locates the first contiguous set of bits of \b len starting at \b idx + * matching \b value in the bitmap. + * + * @param[in] map The sparsemap reference. + * @param[in] idx 0-based start of search within the bitmap. + * @param[in] len The length of contiguous bits we're seeking. + * @param[in] value Determines if the scan is to find all set (true) or unset + * (false) bits of \b len. + * @returns the index of the first bit matching the criteria; when not found not + * found either SPARSEMAP_IDX_MAX or SPARSEMAP_IDX_MIN. */ size_t sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value); diff --git a/src/sparsemap.c b/src/sparsemap.c index 3c05a55..bc97738 100644 --- a/src/sparsemap.c +++ b/src/sparsemap.c @@ -395,19 +395,19 @@ __sm_chunk_map_set(__sm_chunk_t *map, size_t idx, bool value, size_t *pos, sm_bi } /** - * Returns the index of the n'th set bit; sets |*pnew_n| to 0 if the - * n'th bit was found in this __sm_chunk_t, or to the new, reduced - * value of |n|. + * Returns the index of the offset'th set bit; sets |*pnew_n| to 0 if the + * offset'th bit was found in this __sm_chunk_t, or to the new, reduced + * value of |offset|. */ static size_t -__sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) +__sm_chunk_map_select(__sm_chunk_t *map, size_t offset, ssize_t *pnew_n, bool value) { size_t ret = 0; register uint8_t *p; p = (uint8_t *)map->m_data; for (size_t i = 0; i < sizeof(sm_bitvec_t); i++, p++) { - if (*p == 0) { + if (*p == 0 && value) { ret += (size_t)SM_FLAGS_PER_INDEX_BYTE * SM_BITS_PER_VECTOR; continue; } @@ -418,28 +418,28 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) continue; } if (flags == SM_PAYLOAD_ZEROS) { - if (value) { + if (value == true) { ret += SM_BITS_PER_VECTOR; continue; } else { - if (n > SM_BITS_PER_VECTOR) { - n -= SM_BITS_PER_VECTOR; + if (offset > SM_BITS_PER_VECTOR) { + offset -= SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR; continue; } *pnew_n = -1; - return ret + n; + return ret + offset; } } if (flags == SM_PAYLOAD_ONES) { if (value) { - if (n > SM_BITS_PER_VECTOR) { - n -= SM_BITS_PER_VECTOR; + if (offset > SM_BITS_PER_VECTOR) { + offset -= SM_BITS_PER_VECTOR; ret += SM_BITS_PER_VECTOR; continue; } *pnew_n = -1; - return ret + n; + return ret + offset; } else { ret += SM_BITS_PER_VECTOR; continue; @@ -450,20 +450,20 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) for (int k = 0; k < SM_BITS_PER_VECTOR; k++) { if (value) { if (w & ((sm_bitvec_t)1 << k)) { - if (n == 0) { + if (offset == 0) { *pnew_n = -1; return ret; } - n--; + offset--; } ret++; } else { if (!(w & ((sm_bitvec_t)1 << k))) { - if (n == 0) { + if (offset == 0) { *pnew_n = -1; return ret; } - n--; + offset--; } ret++; } @@ -471,7 +471,7 @@ __sm_chunk_map_select(__sm_chunk_t *map, size_t n, ssize_t *pnew_n, bool value) } } } - *pnew_n = (ssize_t)n; + *pnew_n = (ssize_t)offset; return ret; } @@ -914,11 +914,13 @@ sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size) * data_size is not exceeding the size of the underlying buffer. */ sparsemap_t * -sparsemap_set_data_size(sparsemap_t *map, size_t size) +sparsemap_set_data_size(sparsemap_t *map, size_t size, uint8_t *data) { - if ((uintptr_t)map->m_data == (uintptr_t)map + sizeof(sparsemap_t) && size > map->m_capacity) { - /* This sparsemap was allocated by the sparsemap() API, we can resize it. */ - size_t data_size = (size * sizeof(uint8_t)); + size_t data_size = (size * sizeof(uint8_t)); + + /* If this sparsemap was allocated by the sparsemap() API and we're not handed + a new data, it's up to us to resize it. */ + if (data == NULL && (uintptr_t)map->m_data == (uintptr_t)map + sizeof(sparsemap_t) && size > map->m_capacity) { /* Ensure that m_data is 8-byte aligned. */ size_t total_size = sizeof(sparsemap_t) + data_size; @@ -934,6 +936,9 @@ sparsemap_set_data_size(sparsemap_t *map, size_t size) m->m_data = (uint8_t *)(((uintptr_t)m + sizeof(sparsemap_t)) & ~(uintptr_t)7); __sm_when_diag({ __sm_assert(IS_8_BYTE_ALIGNED(m->m_data)); }) return m; } else { + if (data != NULL && data_size > sparsemap_get_capacity(map) && data != map->m_data) { + map->m_data = data; + } map->m_capacity = size; return map; } @@ -1131,7 +1136,7 @@ sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value) return idx; } -sm_idx_t +sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map) { size_t count = __sm_get_chunk_map_count(map); @@ -1139,7 +1144,7 @@ sparsemap_get_starting_offset(sparsemap_t *map) return 0; } sm_idx_t *chunk = (sm_idx_t *)__sm_get_chunk_map_data(map, 0); - return *chunk; + return (sparsemap_idx_t)*chunk; } /** @@ -1304,13 +1309,19 @@ sparsemap_idx_t sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) { assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD); - size_t result; + sm_idx_t start; size_t count = __sm_get_chunk_map_count(map); + if (n >= 0) { uint8_t *p = __sm_get_chunk_map_data(map, 0); for (size_t i = 0; i < count; i++) { - result = *(sm_idx_t *)p; + start = *(sm_idx_t *)p; + /* Start of this chunk is greater than n meaning there are a set of 0s + before the first 1 sufficient to consume n. */ + if (value == false && i == 0 && start > n) { + return n; + } p += sizeof(sm_idx_t); __sm_chunk_t chunk; __sm_chunk_map_init(&chunk, p); @@ -1318,15 +1329,20 @@ sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value) ssize_t new_n = (ssize_t)n; size_t index = __sm_chunk_map_select(&chunk, n, &new_n, value); if (new_n == -1) { - return result + index; + return start + index; } n = new_n; p += __sm_chunk_map_get_size(&chunk); } - return SPARSEMAP_IDX_MAX; // TODO... shouldn't be here? + if (value) { + return SPARSEMAP_IDX_MAX; + } else { + return count * SM_CHUNK_MAX_CAPACITY + 1; + } } else { - return SPARSEMAP_IDX_MIN; // TODO... sparsemap_select(map, -n, value); seek from end, not start + // TODO... sparsemap_select(map, -n, value); seek from end, not start + return SPARSEMAP_IDX_MIN; } } @@ -1416,7 +1432,9 @@ sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value) nth = (idx > 0) ? sparsemap_rank(map, 0, idx - 1, value) : 0; offset = sparsemap_select(map, nth++, value); - if (len == 1) { + if (SPARSEMAP_NOT_FOUND(offset)) + offset = 0; + else if (len == 1) { return offset; } do { diff --git a/tests/test.c b/tests/test.c index c3d4ef6..bf4220b 100644 --- a/tests/test.c +++ b/tests/test.c @@ -103,7 +103,7 @@ test_api_new_realloc(const MunitParameter params[], void *data) assert_true(map->m_capacity == 1024); assert_true(map->m_data_used == sizeof(uint32_t)); - map = sparsemap_set_data_size(map, 2048); + map = sparsemap_set_data_size(map, 2048, NULL); assert_true(map->m_capacity == 2048); assert_true(map->m_data_used == sizeof(uint32_t)); @@ -276,7 +276,7 @@ test_api_set_data_size(const MunitParameter params[], void *data) assert_ptr_not_null(map); assert_true(map->m_capacity == 1024); assert_true(map->m_capacity == sparsemap_get_capacity(map)); - sparsemap_set_data_size(map, 512); + sparsemap_set_data_size(map, 512, NULL); assert_true(map->m_capacity == 512); assert_true(map->m_capacity == sparsemap_get_capacity(map)); return MUNIT_OK; @@ -445,6 +445,9 @@ test_api_set(const MunitParameter params[], void *data) return MUNIT_OK; } +// TODO remove? not public API anymore... +extern sparsemap_idx_t sparsemap_get_starting_offset(sparsemap_t *map); + static void * test_api_get_starting_offset_setup(const MunitParameter params[], void *user_data) { @@ -962,7 +965,7 @@ test_scale_lots_o_spans(const MunitParameter params[], void *data) // TODO: sm_add_span(map, amt, l); sm_add_span(map, 10000, l); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL); errno = 0; } i += l; @@ -1008,7 +1011,7 @@ test_scale_ondrej(const MunitParameter params[], void *data) bool set = (i != needle) ? (j < 10) : (j < 9); sparsemap_set(map, i, set); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) * 2, NULL); errno = 0; } } @@ -1043,6 +1046,7 @@ test_scale_fuzz(const MunitParameter params[], void *data) { sparsemap_t *map = (sparsemap_t *)data; (void)params; + (void)map; //TODO... return MUNIT_OK; } @@ -1075,7 +1079,7 @@ test_scale_spans_come_spans_go(const MunitParameter params[], void *data) int l = i % 31 + 16; sm_add_span(map, amt, l); if (errno == ENOSPC) { - map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024); + map = sparsemap_set_data_size(map, sparsemap_get_capacity(map) + 1024, NULL); assert_ptr_not_null(map); errno = 0; }