2024-04-04 19:24:02 +00:00
|
|
|
/*
|
2024-04-10 19:34:19 +00:00
|
|
|
* Copyright (c) 2024 Gregory Burd <greg@burd.me>. All rights reserved.
|
2024-04-04 19:24:02 +00:00
|
|
|
*
|
2024-04-10 19:34:19 +00:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
2024-04-04 19:24:02 +00:00
|
|
|
*
|
2024-04-10 19:34:19 +00:00
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
|
2024-04-10 19:34:19 +00:00
|
|
|
#include <assert.h>
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <errno.h>
|
2024-04-10 19:34:19 +00:00
|
|
|
#include <popcount.h>
|
|
|
|
#include <sparsemap.h>
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <stdarg.h>
|
2024-04-11 03:16:06 +00:00
|
|
|
#include <stdbool.h>
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <stddef.h>
|
2024-04-11 03:16:06 +00:00
|
|
|
#include <stdint.h>
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <stdio.h>
|
2024-04-11 03:16:06 +00:00
|
|
|
#include <stdlib.h>
|
2024-04-24 20:32:09 +00:00
|
|
|
#include <string.h>
|
2024-04-04 19:24:02 +00:00
|
|
|
|
|
|
|
#ifdef SPARSEMAP_DIAGNOSTIC
|
|
|
|
#pragma GCC diagnostic push
|
2024-04-05 14:34:59 +00:00
|
|
|
#pragma GCC diagnostic ignored "-Wpedantic"
|
2024-04-04 19:24:02 +00:00
|
|
|
#pragma GCC diagnostic ignored "-Wvariadic-macros"
|
2024-04-07 20:38:57 +00:00
|
|
|
#define __sm_diag(format, ...) __sm_diag_(__FILE__, __LINE__, __func__, format, ##__VA_ARGS__)
|
2024-04-04 19:24:02 +00:00
|
|
|
#pragma GCC diagnostic pop
|
2024-04-07 20:38:57 +00:00
|
|
|
void __attribute__((format(printf, 4, 5))) __sm_diag_(const char *file, int line, const char *func, const char *format, ...)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
fprintf(stderr, "%s:%d:%s(): ", file, line, func);
|
2024-04-08 22:14:47 +00:00
|
|
|
va_start(args, format);
|
2024-04-04 19:24:02 +00:00
|
|
|
vfprintf(stderr, format, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-07 20:38:57 +00:00
|
|
|
#define __sm_assert(expr) \
|
|
|
|
if (!(expr)) \
|
2024-04-30 15:20:48 +00:00
|
|
|
fprintf(stderr, "%s:%d:%s(): assertion failed! %s\n", __FILE__, __LINE__, __func__, #expr)
|
2024-04-24 20:32:09 +00:00
|
|
|
|
|
|
|
#define __sm_when_diag(expr) \
|
|
|
|
if (1) \
|
|
|
|
expr
|
2024-04-04 19:24:02 +00:00
|
|
|
#else
|
2024-04-24 20:32:09 +00:00
|
|
|
#define __sm_diag(file, line, func, format, ...) ((void)0)
|
2024-04-04 19:24:02 +00:00
|
|
|
#define __sm_assert(expr) ((void)0)
|
2024-04-24 20:32:09 +00:00
|
|
|
#define __sm_when_diag(expr) \
|
|
|
|
if (0) \
|
|
|
|
expr
|
2024-04-04 19:24:02 +00:00
|
|
|
#endif
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
#define IS_8_BYTE_ALIGNED(addr) (((uintptr_t)(addr)&0x7) == 0)
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
typedef uint64_t __sm_bitvec_t;
|
|
|
|
|
2024-07-19 08:44:26 +00:00
|
|
|
typedef struct {
|
|
|
|
__sm_bitvec_t *m_data;
|
|
|
|
} __sm_chunk_t;
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
enum __SM_CHUNK_INFO {
|
|
|
|
/* metadata overhead: 4 bytes for __sm_chunk_t count */
|
|
|
|
SM_SIZEOF_OVERHEAD = sizeof(uint32_t),
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* number of bits that can be stored in a __sm_bitvec_t */
|
|
|
|
SM_BITS_PER_VECTOR = (sizeof(__sm_bitvec_t) * 8),
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* number of flags that can be stored in a single index byte */
|
|
|
|
SM_FLAGS_PER_INDEX_BYTE = 4,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* number of flags that can be stored in the index */
|
2024-07-15 14:37:16 +00:00
|
|
|
SM_FLAGS_PER_INDEX = (sizeof(__sm_bitvec_t) * SM_FLAGS_PER_INDEX_BYTE),
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* maximum capacity of a __sm_chunk_t (in bits) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_CHUNK_MAX_CAPACITY = (SM_BITS_PER_VECTOR * SM_FLAGS_PER_INDEX),
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* minimum capacity of a __sm_chunk_t (in bits) */
|
2024-07-19 08:44:26 +00:00
|
|
|
SM_CHUNK_MIN_CAPACITY = (SM_BITS_PER_VECTOR - 2),
|
2024-07-15 14:37:16 +00:00
|
|
|
|
|
|
|
/* __sm_bitvec_t payload is all zeros (2#00) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_PAYLOAD_ZEROS = 0,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* __sm_bitvec_t payload is all ones (2#11) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_PAYLOAD_ONES = 3,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* __sm_bitvec_t payload is mixed (2#10) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_PAYLOAD_MIXED = 2,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* __sm_bitvec_t is not used (2#01) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_PAYLOAD_NONE = 1,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
/* a mask for checking flags (2 bits, 2#11) */
|
2024-04-04 19:24:02 +00:00
|
|
|
SM_FLAG_MASK = 3,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* return code for set(): ok, no further action required */
|
|
|
|
SM_OK = 0,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* return code for set(): needs to grow this __sm_chunk_t */
|
|
|
|
SM_NEEDS_TO_GROW = 1,
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* return code for set(): needs to shrink this __sm_chunk_t */
|
|
|
|
SM_NEEDS_TO_SHRINK = 2
|
|
|
|
};
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
#define SM_CHUNK_GET_FLAGS(from, at) ((((from)) & ((__sm_bitvec_t)SM_FLAG_MASK << ((at)*2))) >> ((at)*2))
|
2024-07-19 08:44:26 +00:00
|
|
|
#define SM_IS_CHUNK_RLE(chunk) \
|
|
|
|
(((*((__sm_bitvec_t *)(chunk)->m_data) & (((__sm_bitvec_t)0x3) << (SM_BITS_PER_VECTOR - 2))) >> (SM_BITS_PER_VECTOR - 2)) == SM_PAYLOAD_NONE)
|
2024-07-15 14:37:16 +00:00
|
|
|
#define SM_CHUNK_RLE_LENGTH(chunk) (size_t)(*((__sm_bitvec_t *)(chunk)->m_data) & ~(((__sm_bitvec_t)0x3) << (SM_BITS_PER_VECTOR - 2)))
|
2024-04-10 19:34:19 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
struct __attribute__((aligned(8))) sparsemap {
|
2024-04-11 03:16:06 +00:00
|
|
|
size_t m_capacity; /* The total size of m_data */
|
|
|
|
size_t m_data_used; /* The used size of m_data */
|
2024-04-24 20:32:09 +00:00
|
|
|
uint8_t *m_data; /* The serialized bitmap data */
|
2024-04-10 19:34:19 +00:00
|
|
|
};
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/** @brief Calculates the additional vectors required based on \b b.
|
|
|
|
*
|
|
|
|
* This function uses a precomputed lookup table to efficiently determine the
|
|
|
|
* number of vectors required based on the value of the input byte \b b.
|
|
|
|
*
|
|
|
|
* Each entry in the lookup table represents a possible combination of 4 2-bit
|
|
|
|
* values (00, 01, 10, 11). The value at each index corresponds to the count of
|
|
|
|
* "10" patterns in that 4-bit combination. For example, lookup[10] is 2
|
|
|
|
* because the binary representation of 10 (0000 1010) contains the "01" pattern
|
|
|
|
* twice.
|
|
|
|
*
|
|
|
|
* @param[in] b The input byte used for the calculation.
|
|
|
|
* @return The calculated number of vectors.
|
|
|
|
* @see bin/gen_chunk_vector_size_table.py
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_calc_vector_size(uint8_t b)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
// clang-format off
|
|
|
|
static int lookup[] = {
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
2, 2, 3, 2, 2, 2, 3, 2, 3, 3, 4, 3, 2, 2, 3, 2,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0,
|
|
|
|
1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1,
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0
|
|
|
|
};
|
|
|
|
// clang-format on
|
2024-04-24 20:32:09 +00:00
|
|
|
return (size_t)lookup[b];
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-07-15 14:37:16 +00:00
|
|
|
|
|
|
|
/** @brief Calculates the byte offset of a vector within a chunk.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-07-15 14:37:16 +00:00
|
|
|
* This function determines the starting byte offset of the specified vector
|
|
|
|
* within the chunk's data. The chunk's data is organized as a descriptor
|
|
|
|
* followed by zero or more vectors. The descriptor's flags indicate whether
|
|
|
|
* additional vectors are stored.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-07-15 14:37:16 +00:00
|
|
|
* @param[in] chunk Pointer to the chunk containing the vector.
|
2024-07-19 08:44:26 +00:00
|
|
|
* @param[in] bv Index of the desired vector within the chunk (0-based).
|
2024-07-15 14:37:16 +00:00
|
|
|
* @return Byte offset of the vector within the chunk's data.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-07-19 08:44:26 +00:00
|
|
|
__sm_chunk_get_position(__sm_chunk_t *chunk, size_t bv)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-29 16:10:21 +00:00
|
|
|
/* Handle 4 indices (1 byte) at a time. */
|
2024-07-15 14:37:16 +00:00
|
|
|
size_t num_bytes;
|
2024-04-04 19:24:02 +00:00
|
|
|
size_t position = 0;
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
|
|
|
|
/* Handle RLE by examining the first byte. */
|
|
|
|
if (SM_IS_CHUNK_RLE(chunk)) {
|
|
|
|
return position;
|
|
|
|
}
|
|
|
|
|
2024-07-19 08:44:26 +00:00
|
|
|
num_bytes = bv / ((size_t)SM_FLAGS_PER_INDEX_BYTE * SM_BITS_PER_VECTOR);
|
2024-04-04 19:24:02 +00:00
|
|
|
for (size_t i = 0; i < num_bytes; i++, p++) {
|
2024-05-06 19:43:47 +00:00
|
|
|
position += __sm_chunk_calc_vector_size(*p);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-19 08:44:26 +00:00
|
|
|
bv -= num_bytes * SM_FLAGS_PER_INDEX_BYTE;
|
|
|
|
for (size_t i = 0; i < bv; i++) {
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*chunk->m_data, i);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_MIXED) {
|
|
|
|
position++;
|
|
|
|
}
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
return position;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/** @brief Initializes a chunk structure with raw data.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-07-15 14:37:16 +00:00
|
|
|
* This function casts the provided raw data pointer to a `__sm_bitvec_t` pointer
|
|
|
|
* and stores it in the `m_data` member of the `__sm_chunk_t` structure.
|
|
|
|
*
|
|
|
|
* @param chunk Pointer to the chunk structure to initialize.
|
|
|
|
* @param data Pointer to the raw data to be used by the chunk.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
2024-04-07 20:38:57 +00:00
|
|
|
static inline void
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(__sm_chunk_t *chunk, uint8_t *data)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
chunk->m_data = (__sm_bitvec_t *)data;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/** @brief Calculates the representation capacity of a chunk in bits.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-07-15 14:37:16 +00:00
|
|
|
* Determines the maximum number of bit available for storing data within the chunk.
|
|
|
|
* The capacity is typically `SM_CHUNK_MAX_CAPACITY` bites, but it can be reduced
|
|
|
|
* if the chunk contains flags indicating an unused portion of the chunk or more
|
|
|
|
* when this chunk represents RLE-encoded data.
|
|
|
|
*
|
|
|
|
* @param[in] chunk Pointer to the chunk to examine.
|
|
|
|
* @return The maximum usable capacity of the chunk in bits.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_get_capacity(__sm_chunk_t *chunk)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
size_t capacity = 0;
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
|
|
|
|
/* Handle RLE by examining the first byte, then decode the remainder. */
|
|
|
|
if (SM_IS_CHUNK_RLE(chunk)) {
|
|
|
|
return SM_CHUNK_RLE_LENGTH(chunk);
|
|
|
|
}
|
|
|
|
|
|
|
|
capacity = SM_CHUNK_MAX_CAPACITY;
|
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-05-06 19:43:47 +00:00
|
|
|
if (!*p || *p == 0xff) {
|
2024-04-04 19:24:02 +00:00
|
|
|
continue;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
2024-04-10 19:34:19 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
|
|
|
capacity -= SM_BITS_PER_VECTOR;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return capacity;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-05-13 02:07:45 +00:00
|
|
|
/** @brief Reduces the capacity of this chunk.
|
2024-07-15 14:37:16 +00:00
|
|
|
*
|
|
|
|
* A chunk's capacity is generally bounded by `SM_CHUNK_MAX_CAPACITY` bits but
|
|
|
|
* can be more or less in certain circumstances. This function reduces capacity
|
|
|
|
* by marking flags as `SM_PAYLOAD_NONE` starting from the least signifcant pair
|
|
|
|
* of bits. Each flag set as such reduces the capacity by `SM_BITS_PER_VECTOR`.
|
|
|
|
* When the capacity would drop to zero the caller should remove the chunk. It
|
|
|
|
* is illegal to have all flags set to `SM_PAYLOAD_NONE` as that would be
|
|
|
|
* erroneously interpreted as an RLE chunk.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-05-13 02:07:45 +00:00
|
|
|
* @param[in] capacity The reduced capacity in bytes to assign to the chunk,
|
2024-04-29 16:10:21 +00:00
|
|
|
* must be less than SM_CHUNK_MAX_CAPACITY.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
2024-07-15 14:37:16 +00:00
|
|
|
static int
|
2024-05-13 02:07:45 +00:00
|
|
|
__sm_chunk_reduce_capacity(__sm_chunk_t *chunk, size_t capacity)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(capacity % SM_BITS_PER_VECTOR == 0);
|
2024-05-13 16:46:25 +00:00
|
|
|
__sm_assert(capacity <= SM_CHUNK_MAX_CAPACITY);
|
2024-05-06 19:43:47 +00:00
|
|
|
|
2024-05-13 02:07:45 +00:00
|
|
|
if (capacity >= SM_CHUNK_MAX_CAPACITY) {
|
2024-07-15 14:37:16 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (capacity < SM_CHUNK_MIN_CAPACITY) {
|
|
|
|
return 1;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
size_t reduced = 0;
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (ssize_t i = sizeof(__sm_bitvec_t) - 1; i >= 0; i--) {
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int j = SM_FLAGS_PER_INDEX_BYTE - 1; j >= 0; j--) {
|
2024-07-15 14:37:16 +00:00
|
|
|
p[i] &= ~((__sm_bitvec_t)SM_PAYLOAD_ONES << (j * 2));
|
|
|
|
p[i] |= ((__sm_bitvec_t)SM_PAYLOAD_NONE << (j * 2));
|
2024-04-04 19:24:02 +00:00
|
|
|
reduced += SM_BITS_PER_VECTOR;
|
|
|
|
if (capacity + reduced == SM_CHUNK_MAX_CAPACITY) {
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(__sm_chunk_get_capacity(chunk) == capacity);
|
2024-07-15 14:37:16 +00:00
|
|
|
return 0;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(__sm_chunk_get_capacity(chunk) == capacity);
|
2024-07-19 08:44:26 +00:00
|
|
|
__sm_assert(SM_IS_CHUNK_RLE(chunk) == false);
|
2024-07-15 14:37:16 +00:00
|
|
|
return 0;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-05-13 02:07:45 +00:00
|
|
|
static void
|
|
|
|
__sm_chunk_increase_capacity(__sm_chunk_t *chunk, size_t capacity)
|
|
|
|
{
|
|
|
|
__sm_assert(capacity % SM_BITS_PER_VECTOR == 0);
|
|
|
|
__sm_assert(capacity <= SM_CHUNK_MAX_CAPACITY);
|
|
|
|
__sm_assert(capacity > __sm_chunk_get_capacity(chunk));
|
|
|
|
|
|
|
|
size_t initial_capacity = __sm_chunk_get_capacity(chunk);
|
|
|
|
if (capacity <= initial_capacity || capacity > SM_CHUNK_MAX_CAPACITY) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t increased = 0;
|
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-05-13 02:07:45 +00:00
|
|
|
if (!*p || *p == 0xff) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
2024-07-15 14:37:16 +00:00
|
|
|
*p &= ~((__sm_bitvec_t)SM_PAYLOAD_ONES << (j * 2));
|
|
|
|
*p |= ((__sm_bitvec_t)SM_PAYLOAD_ZEROS << (j * 2));
|
2024-05-13 02:07:45 +00:00
|
|
|
increased += SM_BITS_PER_VECTOR;
|
|
|
|
if (increased + initial_capacity == capacity) {
|
|
|
|
__sm_assert(__sm_chunk_get_capacity(chunk) == capacity);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
__sm_assert(__sm_chunk_get_capacity(chunk) == capacity);
|
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Examines the chunk to determine if it is empty.
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @returns true if this __sm_chunk_t is empty
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static bool
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_is_empty(__sm_chunk_t *chunk)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-02 13:07:25 +00:00
|
|
|
if (chunk->m_data[0] != 0) {
|
|
|
|
/* A chunk is considered empty if all flags are SM_PAYLOAD_ZERO or _NONE. */
|
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-07-02 13:07:25 +00:00
|
|
|
if (*p) {
|
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
|
|
|
if (flags != SM_PAYLOAD_NONE && flags != SM_PAYLOAD_ZEROS) {
|
|
|
|
return false;
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-07-02 13:07:25 +00:00
|
|
|
/* The __sm_chunk_t is empty if all flags (in m_data[0]) are zero. */
|
2024-04-24 20:32:09 +00:00
|
|
|
return true;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Examines the chunk to determine its size.
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @returns the size of the data buffer, in bytes.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_get_size(__sm_chunk_t *chunk)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
/* At least one __sm_bitvec_t is required for the flags (m_data[0]) */
|
|
|
|
size_t size = sizeof(__sm_bitvec_t);
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Use a lookup table for each byte of the flags */
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
|
|
|
size += sizeof(__sm_bitvec_t) * __sm_chunk_calc_vector_size(*p);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
return size;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Examines the chunk at \b idx to determine that bit's state (set,
|
|
|
|
* or unset).
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @param[in] idx The 0-based index into this chunk to examine.
|
|
|
|
* @returns the value of a bit at index \b idx
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static bool
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_is_set(__sm_chunk_t *chunk, size_t idx)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
/* in which __sm_bitvec_t is |idx| stored? */
|
2024-04-04 19:24:02 +00:00
|
|
|
size_t bv = idx / SM_BITS_PER_VECTOR;
|
|
|
|
__sm_assert(bv < SM_FLAGS_PER_INDEX);
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* now retrieve the flags of that __sm_bitvec_t */
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*chunk->m_data, bv);
|
2024-04-04 19:24:02 +00:00
|
|
|
switch (flags) {
|
|
|
|
case SM_PAYLOAD_ZEROS:
|
|
|
|
case SM_PAYLOAD_NONE:
|
2024-04-24 20:32:09 +00:00
|
|
|
return false;
|
2024-04-04 19:24:02 +00:00
|
|
|
case SM_PAYLOAD_ONES:
|
2024-04-24 20:32:09 +00:00
|
|
|
return true;
|
2024-04-04 19:24:02 +00:00
|
|
|
default:
|
|
|
|
__sm_assert(flags == SM_PAYLOAD_MIXED);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
}
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* get the __sm_bitvec_t at |bv| */
|
|
|
|
__sm_bitvec_t w = chunk->m_data[1 + __sm_chunk_get_position(chunk, bv)];
|
|
|
|
/* and finally check the bit in that __sm_bitvec_t */
|
|
|
|
return (w & ((__sm_bitvec_t)1 << (idx % SM_BITS_PER_VECTOR))) > 0;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Assigns a state to a bit in the chunk (set or unset).
|
2024-04-04 19:24:02 +00:00
|
|
|
*
|
2024-04-29 16:10:21 +00:00
|
|
|
* Sets the value of a bit at index \b idx. Then updates position \b pos to the
|
2024-07-15 14:37:16 +00:00
|
|
|
* position of the __sm_bitvec_t which is inserted/deleted and \b fill - the value
|
2024-04-29 16:10:21 +00:00
|
|
|
* of the fill word (used when growing).
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @param[in] idx The 0-based index into this chunk to mutate.
|
|
|
|
* @param[in] value The new state for the \b idx'th bit.
|
2024-07-15 14:37:16 +00:00
|
|
|
* @param[in,out] pos The position of the __sm_bitvec_t inserted/deleted within the chunk.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @param[in,out] fill The value of the fill word (when growing).
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] retired When not retried, grow the chunk by a bitvec.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @returns \b SM_NEEDS_TO_GROW, \b SM_NEEDS_TO_SHRINK, or \b SM_OK
|
|
|
|
* @note, the caller MUST to perform the relevant actions and call set() again,
|
|
|
|
* this time with \b retried = true.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static int
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_chunk_set(__sm_chunk_t *chunk, size_t idx, bool value, size_t *pos, __sm_bitvec_t *fill, bool retried)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
/* In which __sm_bitvec_t is |idx| stored? */
|
2024-04-04 19:24:02 +00:00
|
|
|
size_t bv = idx / SM_BITS_PER_VECTOR;
|
|
|
|
__sm_assert(bv < SM_FLAGS_PER_INDEX);
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* Now retrieve the flags of that __sm_bitvec_t. */
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*chunk->m_data, bv);
|
2024-04-04 19:24:02 +00:00
|
|
|
assert(flags != SM_PAYLOAD_NONE);
|
|
|
|
if (flags == SM_PAYLOAD_ZEROS) {
|
2024-07-15 14:37:16 +00:00
|
|
|
/* Easy - set bit to 0 in a __sm_bitvec_t of zeroes. */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (value == false) {
|
|
|
|
*pos = 0;
|
|
|
|
*fill = 0;
|
|
|
|
return SM_OK;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-07-15 14:37:16 +00:00
|
|
|
/* The sparsemap must grow this __sm_chunk_t by one additional __sm_bitvec_t,
|
2024-04-04 23:56:31 +00:00
|
|
|
then try again. */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (!retried) {
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos = 1 + __sm_chunk_get_position(chunk, bv);
|
2024-04-04 19:24:02 +00:00
|
|
|
*fill = 0;
|
|
|
|
return SM_NEEDS_TO_GROW;
|
|
|
|
}
|
2024-04-10 19:34:19 +00:00
|
|
|
/* New flags are 2#10 meaning SM_PAYLOAD_MIXED. Currently, flags are set
|
|
|
|
to 2#00, so 2#00 | 2#10 = 2#10. */
|
2024-07-15 14:37:16 +00:00
|
|
|
*chunk->m_data |= ((__sm_bitvec_t)SM_PAYLOAD_MIXED << (bv * 2));
|
2024-04-04 19:24:02 +00:00
|
|
|
/* FALLTHROUGH */
|
|
|
|
} else if (flags == SM_PAYLOAD_ONES) {
|
2024-07-15 14:37:16 +00:00
|
|
|
/* Easy - set bit to 1 in a __sm_bitvec_t of ones. */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (value == true) {
|
|
|
|
*pos = 0;
|
|
|
|
*fill = 0;
|
|
|
|
return SM_OK;
|
|
|
|
}
|
2024-07-15 14:37:16 +00:00
|
|
|
/* The sparsemap must grow this __sm_chunk_t by one additional __sm_bitvec_t,
|
2024-04-10 19:34:19 +00:00
|
|
|
then try again. */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (!retried) {
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos = 1 + __sm_chunk_get_position(chunk, bv);
|
2024-07-15 14:37:16 +00:00
|
|
|
*fill = (__sm_bitvec_t)-1;
|
2024-04-04 19:24:02 +00:00
|
|
|
return SM_NEEDS_TO_GROW;
|
|
|
|
}
|
2024-04-10 19:34:19 +00:00
|
|
|
/* New flags are 2#10 meaning SM_PAYLOAD_MIXED. Currently, flags are
|
|
|
|
set to 2#11, so 2#11 ^ 2#01 = 2#10. */
|
2024-07-15 14:37:16 +00:00
|
|
|
chunk->m_data[0] ^= ((__sm_bitvec_t)SM_PAYLOAD_NONE << (bv * 2));
|
2024-04-04 19:24:02 +00:00
|
|
|
/* FALLTHROUGH */
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-10 19:34:19 +00:00
|
|
|
/* Now flip the bit. */
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t position = 1 + __sm_chunk_get_position(chunk, bv);
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk->m_data[position];
|
2024-04-04 19:24:02 +00:00
|
|
|
if (value) {
|
2024-07-15 14:37:16 +00:00
|
|
|
w |= (__sm_bitvec_t)1 << (idx % SM_BITS_PER_VECTOR);
|
2024-04-04 19:24:02 +00:00
|
|
|
} else {
|
2024-07-15 14:37:16 +00:00
|
|
|
w &= ~((__sm_bitvec_t)1 << (idx % SM_BITS_PER_VECTOR));
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* If this __sm_bitvec_t is now all zeroes or ones then we can remove it. */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (w == 0) {
|
2024-07-15 14:37:16 +00:00
|
|
|
chunk->m_data[0] &= ~((__sm_bitvec_t)SM_PAYLOAD_ONES << (bv * 2));
|
2024-04-04 19:24:02 +00:00
|
|
|
*pos = position;
|
|
|
|
*fill = 0;
|
|
|
|
return SM_NEEDS_TO_SHRINK;
|
|
|
|
}
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w == (__sm_bitvec_t)-1) {
|
|
|
|
chunk->m_data[0] |= (__sm_bitvec_t)SM_PAYLOAD_ONES << (bv * 2);
|
2024-04-04 19:24:02 +00:00
|
|
|
*pos = position;
|
|
|
|
*fill = 0;
|
|
|
|
return SM_NEEDS_TO_SHRINK;
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
chunk->m_data[position] = w;
|
2024-04-04 19:24:02 +00:00
|
|
|
*pos = 0;
|
|
|
|
*fill = 0;
|
|
|
|
return SM_OK;
|
|
|
|
}
|
|
|
|
|
2024-04-30 18:40:23 +00:00
|
|
|
/** @brief Finds the index of the \b n'th bit after \b offset bits with \b
|
|
|
|
* value.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* Scans the \b chunk until after \b offset bits (of any value) have
|
2024-04-29 16:10:21 +00:00
|
|
|
* passed and then begins counting the bits that match \b value looking
|
2024-05-06 19:43:47 +00:00
|
|
|
* for the \b n'th bit. It may not be in this chunk, when it is offset is set.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @param[in] value Informs what we're seeking, a set or unset bit's position.
|
2024-05-07 12:46:42 +00:00
|
|
|
* @param offset[in,out] Sets \b offset to n if the n'th bit was found
|
2024-04-29 16:10:21 +00:00
|
|
|
* in this __sm_chunk_t, or reduced value of \b n bits observed the search up
|
|
|
|
* to a maximum of SM_BITS_PER_VECTOR.
|
|
|
|
* @returns the 0-based index of the n'th set bit when found, otherwise
|
|
|
|
* SM_BITS_PER_VECTOR
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_select(__sm_chunk_t *chunk, size_t n, ssize_t *offset, bool value)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
|
|
|
register uint8_t *p;
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-04-26 20:25:17 +00:00
|
|
|
if (*p == 0 && value) {
|
2024-04-04 19:24:02 +00:00
|
|
|
ret += (size_t)SM_FLAGS_PER_INDEX_BYTE * SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
2024-04-10 19:34:19 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
|
|
|
continue;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_ZEROS) {
|
2024-04-26 20:25:17 +00:00
|
|
|
if (value == true) {
|
2024-04-24 20:32:09 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
} else {
|
2024-04-29 16:10:21 +00:00
|
|
|
if (n > SM_BITS_PER_VECTOR) {
|
|
|
|
n -= SM_BITS_PER_VECTOR;
|
2024-04-24 20:32:09 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
}
|
2024-04-29 16:10:21 +00:00
|
|
|
*offset = -1;
|
|
|
|
return ret + n;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_ONES) {
|
2024-04-28 16:26:31 +00:00
|
|
|
if (value == true) {
|
2024-04-29 16:10:21 +00:00
|
|
|
if (n > SM_BITS_PER_VECTOR) {
|
|
|
|
n -= SM_BITS_PER_VECTOR;
|
2024-04-24 20:32:09 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
}
|
2024-04-29 16:10:21 +00:00
|
|
|
*offset = -1;
|
|
|
|
return ret + n;
|
2024-04-24 20:32:09 +00:00
|
|
|
} else {
|
2024-04-04 19:24:02 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_MIXED) {
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk->m_data[1 + __sm_chunk_get_position(chunk, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int k = 0; k < SM_BITS_PER_VECTOR; k++) {
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value) {
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w & ((__sm_bitvec_t)1 << k)) {
|
2024-04-29 16:10:21 +00:00
|
|
|
if (n == 0) {
|
|
|
|
*offset = -1;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2024-04-29 16:10:21 +00:00
|
|
|
n--;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
ret++;
|
|
|
|
} else {
|
2024-07-15 14:37:16 +00:00
|
|
|
if (!(w & ((__sm_bitvec_t)1 << k))) {
|
2024-04-29 16:10:21 +00:00
|
|
|
if (n == 0) {
|
|
|
|
*offset = -1;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2024-04-29 16:10:21 +00:00
|
|
|
n--;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
ret++;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-30 15:20:48 +00:00
|
|
|
*offset = n;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Counts the bits matching \b value in the range [0, \b idx]
|
|
|
|
* inclusive after ignoring the first \b offset bits in the chunk.
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* Scans the \b chunk until after \b offset bits (of any value) have
|
2024-04-29 16:10:21 +00:00
|
|
|
* passed and then begins counting the bits that match \b value. The
|
|
|
|
* result should never be greater than \b idx + 1 maxing out at
|
|
|
|
* SM_BITS_PER_VECTOR. A range of [0, 0] will count 1 bit at \b offset
|
|
|
|
* + 1 in this chunk. A range of [0, 9] will count 10 bits, starting
|
|
|
|
* with the 0th and ending with the 9th and return at most a count of
|
|
|
|
* 10.
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
|
|
|
* @param[in,out] begin Decreases \b offset by the number of bits ignored,
|
2024-04-29 16:10:21 +00:00
|
|
|
* at most by SM_BITS_PER_VECTOR.
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] end The ending value of the range (inclusive) to count.
|
|
|
|
* @param[out] pos_in_chunk The position of the last bit examined in this chunk,
|
|
|
|
* always
|
2024-04-29 16:10:21 +00:00
|
|
|
* <= SM_BITS_PER_VECTOR, used when counting unset bits that fall within this
|
|
|
|
* chunk's range but after the last set bit.
|
2024-07-15 14:37:16 +00:00
|
|
|
* @param[out] last_bitvec The last __sm_bitvec_t, masked and shifted, so as to be able
|
2024-04-29 16:10:21 +00:00
|
|
|
* to examine the bits used in the last portion of the ranking as a way to
|
|
|
|
* skip forward during a #span() operation.
|
|
|
|
* @param[in] value Informs what we're seeking, set or unset bits.
|
|
|
|
* @returns the count of the bits matching \b value within the range.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_chunk_rank(__sm_chunk_t *chunk, size_t *begin, size_t end, size_t *pos_in_chunk, __sm_bitvec_t *last_bitvec, bool value)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos_in_chunk = 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* A chunk can only hold at most SM_CHUNK_MAX_CAPACITY bits, so if
|
|
|
|
begin is larger than that, we're basically done. */
|
|
|
|
if (*begin >= SM_CHUNK_MAX_CAPACITY) {
|
|
|
|
*pos_in_chunk = SM_CHUNK_MAX_CAPACITY;
|
|
|
|
*begin -= SM_CHUNK_MAX_CAPACITY;
|
2024-04-24 20:32:09 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
2024-04-10 19:34:19 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
|
|
|
continue;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_ZEROS) {
|
2024-05-06 19:43:47 +00:00
|
|
|
*last_bitvec = 0;
|
|
|
|
if (end >= SM_BITS_PER_VECTOR) {
|
|
|
|
*pos_in_chunk += SM_BITS_PER_VECTOR;
|
|
|
|
end -= SM_BITS_PER_VECTOR;
|
|
|
|
if (*begin >= SM_BITS_PER_VECTOR) {
|
|
|
|
*begin = *begin - SM_BITS_PER_VECTOR;
|
2024-04-07 20:38:57 +00:00
|
|
|
} else {
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == false) {
|
2024-05-06 19:43:47 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR - *begin;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
*begin = 0;
|
2024-04-07 20:38:57 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos_in_chunk += end + 1;
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == false) {
|
2024-05-06 19:43:47 +00:00
|
|
|
if (*begin > end) {
|
|
|
|
*begin = *begin - end;
|
2024-04-24 20:32:09 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
ret += end + 1 - *begin;
|
|
|
|
*begin = 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return ret;
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
} else if (flags == SM_PAYLOAD_ONES) {
|
2024-05-06 19:43:47 +00:00
|
|
|
*last_bitvec = UINT64_MAX;
|
|
|
|
if (end >= SM_BITS_PER_VECTOR) {
|
|
|
|
*pos_in_chunk += SM_BITS_PER_VECTOR;
|
|
|
|
end -= SM_BITS_PER_VECTOR;
|
|
|
|
if (*begin >= SM_BITS_PER_VECTOR) {
|
|
|
|
*begin = *begin - SM_BITS_PER_VECTOR;
|
2024-04-07 20:38:57 +00:00
|
|
|
} else {
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == true) {
|
2024-05-06 19:43:47 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR - *begin;
|
2024-04-07 20:38:57 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
*begin = 0;
|
2024-04-07 20:38:57 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos_in_chunk += end + 1;
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == true) {
|
2024-05-06 19:43:47 +00:00
|
|
|
if (*begin > end) {
|
|
|
|
*begin = *begin - end;
|
2024-04-24 20:32:09 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
ret += end + 1 - *begin;
|
|
|
|
*begin = 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return ret;
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
} else if (flags == SM_PAYLOAD_MIXED) {
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk->m_data[1 + __sm_chunk_get_position(chunk, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
2024-05-06 19:43:47 +00:00
|
|
|
if (end >= SM_BITS_PER_VECTOR) {
|
|
|
|
*pos_in_chunk += SM_BITS_PER_VECTOR;
|
|
|
|
end -= SM_BITS_PER_VECTOR;
|
|
|
|
uint64_t mask = *begin == 0 ? UINT64_MAX : ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - (*begin >= 64 ? 64 : *begin)));
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t mw;
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == true) {
|
|
|
|
mw = w & mask;
|
|
|
|
} else {
|
|
|
|
mw = ~w & mask;
|
|
|
|
}
|
|
|
|
size_t pc = popcountll(mw);
|
|
|
|
ret += pc;
|
2024-05-06 19:43:47 +00:00
|
|
|
*begin = (*begin > SM_BITS_PER_VECTOR) ? *begin - SM_BITS_PER_VECTOR : 0;
|
2024-04-04 19:24:02 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
*pos_in_chunk += end + 1;
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t mw;
|
2024-04-24 20:32:09 +00:00
|
|
|
uint64_t mask;
|
2024-05-06 19:43:47 +00:00
|
|
|
uint64_t end_mask = (end == 63) ? UINT64_MAX : ((uint64_t)1 << (end + 1)) - 1;
|
|
|
|
uint64_t begin_mask = *begin == 0 ? UINT64_MAX : ~(UINT64_MAX >> (SM_BITS_PER_VECTOR - (*begin >= 64 ? 64 : *begin)));
|
2024-04-24 20:32:09 +00:00
|
|
|
/* To count the set bits we need to mask off the portion of the vector that we need
|
|
|
|
to count then call popcount(). So, let's create a mask for the range between
|
2024-05-06 19:43:47 +00:00
|
|
|
begin and end inclusive [*begin, end]. */
|
|
|
|
mask = end_mask & begin_mask;
|
2024-04-28 16:26:31 +00:00
|
|
|
if (value) {
|
2024-04-24 20:32:09 +00:00
|
|
|
mw = w & mask;
|
|
|
|
} else {
|
|
|
|
mw = ~w & mask;
|
|
|
|
}
|
|
|
|
int pc = popcountll(mw);
|
|
|
|
ret += pc;
|
2024-05-06 19:43:47 +00:00
|
|
|
*last_bitvec = mw >> ((*begin > 63) ? 63 : *begin);
|
|
|
|
*begin = *begin > end ? *begin - end + 1 : 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Calls \b scanner with sm_bitmap_t for each vector in this chunk.
|
|
|
|
*
|
|
|
|
* Decompresses the whole chunk into separate bitmaps then calls visitor's
|
2024-05-06 19:43:47 +00:00
|
|
|
* \b #operator() function for all bits that are set.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The chunk in question.
|
|
|
|
* @param[in] start Starting offset
|
|
|
|
* @param[in] scanner Callback function which receives an array of indices (with
|
|
|
|
* bits set to 1), the size of the array and an auxiliary pointer provided by
|
|
|
|
* the caller.
|
|
|
|
* @param[in] skip The number of bits to skip in the beginning.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @returns the number of (set) bits that were passed to the scanner
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_scan(__sm_chunk_t *chunk, sm_idx_t start, void (*scanner)(sm_idx_t[], size_t, void *aux), size_t skip, void *aux)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
2024-05-06 19:43:47 +00:00
|
|
|
register uint8_t *p = (uint8_t *)chunk->m_data;
|
2024-04-04 19:24:02 +00:00
|
|
|
sm_idx_t buffer[SM_BITS_PER_VECTOR];
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t i = 0; i < sizeof(__sm_bitvec_t); i++, p++) {
|
2024-04-04 19:24:02 +00:00
|
|
|
if (*p == 0) {
|
2024-05-02 18:55:04 +00:00
|
|
|
/* Skip chunks that are all zeroes. */
|
|
|
|
skip -= skip > SM_BITS_PER_VECTOR ? SM_BITS_PER_VECTOR : skip;
|
2024-04-04 19:24:02 +00:00
|
|
|
continue;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int j = 0; j < SM_FLAGS_PER_INDEX_BYTE; j++) {
|
2024-04-10 19:34:19 +00:00
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, j);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (flags == SM_PAYLOAD_NONE || flags == SM_PAYLOAD_ZEROS) {
|
2024-05-02 18:55:04 +00:00
|
|
|
/* Skip when all zeroes. */
|
|
|
|
skip -= skip > SM_BITS_PER_VECTOR ? SM_BITS_PER_VECTOR : skip;
|
2024-04-04 19:24:02 +00:00
|
|
|
} else if (flags == SM_PAYLOAD_ONES) {
|
|
|
|
if (skip) {
|
|
|
|
if (skip >= SM_BITS_PER_VECTOR) {
|
|
|
|
skip -= SM_BITS_PER_VECTOR;
|
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
size_t n = 0;
|
2024-05-21 00:29:26 +00:00
|
|
|
for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) {
|
|
|
|
buffer[n++] = start + ret + b;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-05-02 18:55:04 +00:00
|
|
|
scanner(&buffer[0], n, aux);
|
2024-04-04 19:24:02 +00:00
|
|
|
ret += n;
|
|
|
|
skip = 0;
|
|
|
|
} else {
|
|
|
|
for (size_t b = 0; b < SM_BITS_PER_VECTOR; b++) {
|
2024-05-21 00:29:26 +00:00
|
|
|
buffer[b] = start + ret + b;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-05-02 18:55:04 +00:00
|
|
|
scanner(&buffer[0], SM_BITS_PER_VECTOR, aux);
|
2024-04-04 19:24:02 +00:00
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
}
|
|
|
|
} else if (flags == SM_PAYLOAD_MIXED) {
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk->m_data[1 + __sm_chunk_get_position(chunk, i * SM_FLAGS_PER_INDEX_BYTE + j)];
|
2024-05-02 18:55:04 +00:00
|
|
|
size_t n = 0;
|
2024-04-04 19:24:02 +00:00
|
|
|
if (skip) {
|
2024-05-02 18:55:04 +00:00
|
|
|
if (skip >= SM_BITS_PER_VECTOR) {
|
|
|
|
skip -= SM_BITS_PER_VECTOR;
|
|
|
|
ret += SM_BITS_PER_VECTOR;
|
|
|
|
continue;
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
for (int b = 0; b < SM_BITS_PER_VECTOR; b++) {
|
2024-05-02 18:55:04 +00:00
|
|
|
if (skip > 0) {
|
2024-04-04 19:24:02 +00:00
|
|
|
skip--;
|
|
|
|
continue;
|
2024-05-02 18:55:04 +00:00
|
|
|
}
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w & ((__sm_bitvec_t)1 << b)) {
|
2024-05-21 00:29:26 +00:00
|
|
|
buffer[n++] = start + ret + b;
|
2024-04-04 19:24:02 +00:00
|
|
|
ret++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (int b = 0; b < SM_BITS_PER_VECTOR; b++) {
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w & ((__sm_bitvec_t)1 << b)) {
|
2024-05-21 00:29:26 +00:00
|
|
|
buffer[n++] = start + ret + b;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
ret += n;
|
|
|
|
}
|
|
|
|
__sm_assert(n > 0);
|
2024-05-02 18:55:04 +00:00
|
|
|
scanner(&buffer[0], n, aux);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return ret;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Provides the number of chunks currently in the map.
|
|
|
|
*
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] chunk The sparsemap_t in question.
|
|
|
|
* @returns the number of chunks in the sparsemap
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_get_chunk_count(sparsemap_t *map)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-24 20:32:09 +00:00
|
|
|
return *(uint32_t *)&map->m_data[0];
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Encapsulates the method to find the starting address of a chunk's
|
|
|
|
* data.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] offset The offset in bytes for the desired chunk.
|
2024-04-29 16:10:21 +00:00
|
|
|
* @returns the data for the specified \b offset
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
2024-04-10 19:34:19 +00:00
|
|
|
static inline uint8_t *
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_get_chunk_data(sparsemap_t *map, size_t offset)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-24 20:32:09 +00:00
|
|
|
return &map->m_data[SM_SIZEOF_OVERHEAD + offset];
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Encapsulates the method to find the address of the first unused byte
|
|
|
|
* in \b m_data.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @returns a pointer after the end of the used data
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static uint8_t *
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_get_chunk_end(sparsemap_t *map)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p);
|
|
|
|
p += __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return p;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Provides the byte size amount of \b m_data consumed.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @returns the used size in the data buffer
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static size_t
|
|
|
|
__sm_get_size_impl(sparsemap_t *map)
|
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *start = __sm_get_chunk_data(map, 0);
|
2024-04-04 19:24:02 +00:00
|
|
|
uint8_t *p = start;
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p);
|
|
|
|
p += __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return SM_SIZEOF_OVERHEAD + p - start;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Aligns to SM_BITS_PER_VECTOR a given index \b idx.
|
|
|
|
*
|
|
|
|
* @param[in] idx The index to align.
|
2024-07-15 14:37:16 +00:00
|
|
|
* @returns the aligned offset (aligned to __sm_bitvec_t capacity).
|
2024-04-04 19:58:06 +00:00
|
|
|
*/
|
|
|
|
static sm_idx_t
|
2024-05-09 19:50:56 +00:00
|
|
|
__sm_get_vector_aligned_offset(size_t idx)
|
2024-04-04 19:58:06 +00:00
|
|
|
{
|
|
|
|
const size_t capacity = SM_BITS_PER_VECTOR;
|
2024-04-24 20:32:09 +00:00
|
|
|
return (idx / capacity) * capacity;
|
2024-04-04 19:58:06 +00:00
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/** @brief Aligns to SM_CHUNK_CAPACITY a given index \b idx.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
|
|
|
* @param[in] idx The index to align.
|
|
|
|
* @returns the aligned offset (aligned to __sm_chunk_t capacity)
|
|
|
|
*/
|
|
|
|
static sm_idx_t
|
2024-05-09 19:50:56 +00:00
|
|
|
__sm_get_chunk_aligned_offset(size_t idx)
|
2024-04-29 16:10:21 +00:00
|
|
|
{
|
|
|
|
const size_t capacity = SM_CHUNK_MAX_CAPACITY;
|
|
|
|
return (idx / capacity) * capacity;
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/** @brief Provides the byte offset of a chunk at index \b idx.
|
2024-04-29 16:10:21 +00:00
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
2024-05-06 19:43:47 +00:00
|
|
|
* @param[in] idx The index of the chunk to locate.
|
2024-05-15 17:57:40 +00:00
|
|
|
* @returns the byte offset of a __sm_chunk_t in m_data, or -1 if there
|
2024-04-30 15:20:48 +00:00
|
|
|
* are no chunks.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
2024-05-15 17:57:40 +00:00
|
|
|
static ssize_t
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_get_chunk_offset(sparsemap_t *map, sparsemap_idx_t idx)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (count == 0) {
|
2024-04-24 20:32:09 +00:00
|
|
|
return -1;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *start = __sm_get_chunk_data(map, 0);
|
2024-04-28 16:26:31 +00:00
|
|
|
uint8_t *p = start;
|
2024-04-24 20:32:09 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
for (sparsemap_idx_t i = 0; i < count - 1; i++) {
|
|
|
|
sm_idx_t s = *(sm_idx_t *)p;
|
2024-05-09 19:50:56 +00:00
|
|
|
__sm_assert(s == __sm_get_vector_aligned_offset(s));
|
2024-04-28 16:26:31 +00:00
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
|
|
|
|
if (s >= idx || idx < s + __sm_chunk_get_capacity(&chunk)) {
|
2024-04-28 16:26:31 +00:00
|
|
|
break;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
p += sizeof(sm_idx_t) + __sm_chunk_get_size(&chunk);
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
|
|
|
|
return (ssize_t)(p - start);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Sets the number of __sm_chunk_t's.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @param[in] new_count The new number of chunks in the map.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static void
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_set_chunk_count(sparsemap_t *map, size_t new_count)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
*(uint32_t *)&map->m_data[0] = (uint32_t)new_count;
|
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Appends raw data at the end of used portion of \b m_data.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @param[in] buffer The bytes to copy into \b m_data.
|
|
|
|
* @param[in] buffer_size The size of the byte array \b buffer to copy.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
__sm_append_data(sparsemap_t *map, uint8_t *buffer, size_t buffer_size)
|
|
|
|
{
|
2024-05-07 12:46:42 +00:00
|
|
|
__sm_assert(map->m_data_used + buffer_size <= map->m_capacity);
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
memcpy(&map->m_data[map->m_data_used], buffer, buffer_size);
|
|
|
|
map->m_data_used += buffer_size;
|
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Inserts data at \b offset in the middle of \b m_data.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @param[in] offset The offset in bytes into \b m_data to place the buffer.
|
|
|
|
* @param[in] buffer The bytes to copy into \b m_data.
|
|
|
|
* @param[in] buffer_size The size of the byte array \b buffer to copy.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
2024-04-24 20:32:09 +00:00
|
|
|
void
|
2024-04-07 20:38:57 +00:00
|
|
|
__sm_insert_data(sparsemap_t *map, size_t offset, uint8_t *buffer, size_t buffer_size)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(map->m_data_used + buffer_size <= map->m_capacity);
|
2024-05-07 12:46:42 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, offset);
|
2024-04-04 19:24:02 +00:00
|
|
|
memmove(p + buffer_size, p, map->m_data_used - offset);
|
|
|
|
memcpy(p, buffer, buffer_size);
|
|
|
|
map->m_data_used += buffer_size;
|
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/** @brief Removes data from \b m_data.
|
|
|
|
*
|
|
|
|
* @param[in] map The sparsemap_t in question.
|
|
|
|
* @param[in] offset The offset in bytes into \b m_data at which to excise data.
|
|
|
|
* @param[in] gap_size The size of the excision.
|
2024-04-04 19:24:02 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
__sm_remove_data(sparsemap_t *map, size_t offset, size_t gap_size)
|
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(map->m_data_used >= gap_size);
|
|
|
|
uint8_t *p = __sm_get_chunk_data(map, offset);
|
2024-04-04 19:24:02 +00:00
|
|
|
memmove(p, p + gap_size, map->m_data_used - offset - gap_size);
|
|
|
|
map->m_data_used -= gap_size;
|
|
|
|
}
|
|
|
|
|
2024-05-10 20:25:08 +00:00
|
|
|
/** @brief Merges into the chunk at \b offset all set bits from \b src.
|
|
|
|
*
|
2024-05-15 17:57:40 +00:00
|
|
|
* @param[in] map The map the chunk belongs too.
|
2024-05-10 20:25:08 +00:00
|
|
|
* @param[in] offset The offset of the first bit in the chunk to be merged.
|
2024-05-15 17:57:40 +00:00
|
|
|
* @todo merge at the vector level not offset
|
2024-05-10 20:25:08 +00:00
|
|
|
*/
|
|
|
|
void
|
2024-05-15 17:57:40 +00:00
|
|
|
__sm_merge_chunk(sparsemap_t *map, sparsemap_idx_t src_start, sparsemap_idx_t dst_start, sparsemap_idx_t capacity, __sm_chunk_t *dst_chunk,
|
|
|
|
__sm_chunk_t *src_chunk)
|
2024-05-10 20:25:08 +00:00
|
|
|
{
|
2024-05-15 17:57:40 +00:00
|
|
|
ssize_t delta = src_start - dst_start;
|
2024-05-10 20:25:08 +00:00
|
|
|
for (sparsemap_idx_t j = 0; j < capacity; j++) {
|
2024-05-15 17:57:40 +00:00
|
|
|
ssize_t offset = __sm_get_chunk_offset(map, src_start + j);
|
|
|
|
if (__sm_chunk_is_set(src_chunk, j) && !__sm_chunk_is_set(dst_chunk, j + delta)) {
|
2024-05-11 01:25:15 +00:00
|
|
|
size_t position;
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t fill;
|
2024-05-15 17:57:40 +00:00
|
|
|
switch (__sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, false)) {
|
2024-05-11 01:25:15 +00:00
|
|
|
case SM_NEEDS_TO_GROW:
|
2024-07-15 14:37:16 +00:00
|
|
|
offset += sizeof(sm_idx_t) + position * sizeof(__sm_bitvec_t);
|
|
|
|
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(__sm_bitvec_t));
|
2024-05-15 17:57:40 +00:00
|
|
|
__sm_chunk_set(dst_chunk, j + delta, true, &position, &fill, true);
|
2024-05-11 01:25:15 +00:00
|
|
|
break;
|
|
|
|
case SM_NEEDS_TO_SHRINK:
|
|
|
|
if (__sm_chunk_is_empty(src_chunk)) {
|
|
|
|
__sm_assert(position == 1);
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2);
|
2024-05-11 01:25:15 +00:00
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
|
|
|
|
} else {
|
2024-07-15 14:37:16 +00:00
|
|
|
offset += sizeof(sm_idx_t) + position * sizeof(__sm_bitvec_t);
|
|
|
|
__sm_remove_data(map, offset, sizeof(__sm_bitvec_t));
|
2024-05-10 20:25:08 +00:00
|
|
|
}
|
2024-05-11 01:25:15 +00:00
|
|
|
break;
|
2024-05-15 17:57:40 +00:00
|
|
|
case SM_OK:
|
|
|
|
default:
|
|
|
|
break;
|
2024-05-10 20:25:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-29 16:10:21 +00:00
|
|
|
/*
|
2024-05-06 19:43:47 +00:00
|
|
|
* The following is the "Sparsemap" implementation, it uses chunks (code above)
|
2024-04-29 16:10:21 +00:00
|
|
|
* and is the public API for this compressed bitmap representation.
|
2024-04-07 20:38:57 +00:00
|
|
|
*/
|
2024-04-29 16:10:21 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
void
|
2024-04-04 19:58:06 +00:00
|
|
|
sparsemap_clear(sparsemap_t *map)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-24 20:32:09 +00:00
|
|
|
if (map == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
2024-04-10 19:34:19 +00:00
|
|
|
memset(map->m_data, 0, map->m_capacity);
|
2024-04-04 19:24:02 +00:00
|
|
|
map->m_data_used = SM_SIZEOF_OVERHEAD;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_set_chunk_count(map, 0);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sparsemap_t *
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap(size_t size)
|
|
|
|
{
|
|
|
|
if (size == 0) {
|
|
|
|
size = 1024;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t data_size = (size * sizeof(uint8_t));
|
|
|
|
|
|
|
|
/* Ensure that m_data is 8-byte aligned. */
|
|
|
|
size_t total_size = sizeof(sparsemap_t) + data_size;
|
|
|
|
size_t padding = total_size % 8 == 0 ? 0 : 8 - (total_size % 8);
|
|
|
|
total_size += padding;
|
|
|
|
|
|
|
|
sparsemap_t *map = (sparsemap_t *)calloc(1, total_size);
|
|
|
|
if (map) {
|
|
|
|
uint8_t *data = (uint8_t *)(((uintptr_t)map + sizeof(sparsemap_t)) & ~(uintptr_t)7);
|
|
|
|
sparsemap_init(map, data, size);
|
|
|
|
__sm_when_diag({ __sm_assert(IS_8_BYTE_ALIGNED(map->m_data)); });
|
|
|
|
}
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
2024-05-04 13:38:26 +00:00
|
|
|
sparsemap_t *
|
|
|
|
sparsemap_copy(sparsemap_t *other)
|
|
|
|
{
|
|
|
|
size_t cap = sparsemap_get_capacity(other);
|
|
|
|
sparsemap_t *map = sparsemap(cap);
|
|
|
|
if (map) {
|
|
|
|
map->m_capacity = other->m_capacity;
|
|
|
|
map->m_data_used = other->m_data_used;
|
|
|
|
memcpy(map->m_data, other->m_data, cap);
|
|
|
|
}
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_t *
|
|
|
|
sparsemap_wrap(uint8_t *data, size_t size)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
sparsemap_t *map = (sparsemap_t *)calloc(1, sizeof(sparsemap_t));
|
|
|
|
if (map) {
|
2024-05-09 19:50:56 +00:00
|
|
|
map->m_data = data;
|
|
|
|
map->m_data_used = 0;
|
|
|
|
map->m_capacity = size;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2024-04-10 19:34:19 +00:00
|
|
|
sparsemap_init(sparsemap_t *map, uint8_t *data, size_t size)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-10 19:48:53 +00:00
|
|
|
map->m_data = data;
|
2024-04-10 19:34:19 +00:00
|
|
|
map->m_data_used = 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
map->m_capacity = size;
|
2024-04-04 19:58:06 +00:00
|
|
|
sparsemap_clear(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-05 14:34:59 +00:00
|
|
|
void
|
2024-04-26 20:25:17 +00:00
|
|
|
sparsemap_open(sparsemap_t *map, uint8_t *data, size_t size)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-10 19:48:53 +00:00
|
|
|
map->m_data = data;
|
2024-05-16 16:00:09 +00:00
|
|
|
map->m_data_used = __sm_get_size_impl(map);
|
2024-04-26 20:25:17 +00:00
|
|
|
map->m_capacity = size;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_t *
|
2024-05-06 19:43:47 +00:00
|
|
|
sparsemap_set_data_size(sparsemap_t *map, uint8_t *data, size_t size)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-26 20:25:17 +00:00
|
|
|
size_t data_size = (size * sizeof(uint8_t));
|
|
|
|
|
|
|
|
/* If this sparsemap was allocated by the sparsemap() API and we're not handed
|
|
|
|
a new data, it's up to us to resize it. */
|
|
|
|
if (data == NULL && (uintptr_t)map->m_data == (uintptr_t)map + sizeof(sparsemap_t) && size > map->m_capacity) {
|
2024-04-24 20:32:09 +00:00
|
|
|
|
|
|
|
/* Ensure that m_data is 8-byte aligned. */
|
|
|
|
size_t total_size = sizeof(sparsemap_t) + data_size;
|
|
|
|
size_t padding = total_size % 8 == 0 ? 0 : 8 - (total_size % 8);
|
|
|
|
total_size += padding;
|
|
|
|
|
|
|
|
sparsemap_t *m = (sparsemap_t *)realloc(map, total_size);
|
|
|
|
if (!m) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
memset(((uint8_t *)m) + sizeof(sparsemap_t) + (m->m_capacity * sizeof(uint8_t)), 0, size - m->m_capacity + padding);
|
|
|
|
m->m_capacity = data_size;
|
|
|
|
m->m_data = (uint8_t *)(((uintptr_t)m + sizeof(sparsemap_t)) & ~(uintptr_t)7);
|
|
|
|
__sm_when_diag({ __sm_assert(IS_8_BYTE_ALIGNED(m->m_data)); }) return m;
|
|
|
|
} else {
|
2024-05-02 18:55:04 +00:00
|
|
|
/* NOTE: It is up to the caller to realloc their buffer and provide it here
|
|
|
|
for reassignment. */
|
2024-05-06 19:43:47 +00:00
|
|
|
if (data != NULL && data != map->m_data) {
|
2024-04-26 20:25:17 +00:00
|
|
|
map->m_data = data;
|
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
map->m_capacity = size;
|
|
|
|
return map;
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-09 13:13:38 +00:00
|
|
|
double
|
2024-04-11 03:16:06 +00:00
|
|
|
sparsemap_capacity_remaining(sparsemap_t *map)
|
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
if (map->m_data_used >= map->m_capacity) {
|
2024-04-09 03:23:22 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2024-04-10 19:34:19 +00:00
|
|
|
if (map->m_capacity == 0) {
|
2024-04-09 13:13:38 +00:00
|
|
|
return 100.0;
|
|
|
|
}
|
2024-04-10 19:34:19 +00:00
|
|
|
return 100 - (((double)map->m_data_used / (double)map->m_capacity) * 100);
|
2024-04-09 03:23:22 +00:00
|
|
|
}
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
size_t
|
2024-04-10 19:34:19 +00:00
|
|
|
sparsemap_get_capacity(sparsemap_t *map)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-24 20:32:09 +00:00
|
|
|
return map->m_capacity;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_is_set(sparsemap_t *map, sparsemap_idx_t idx)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-04 19:58:06 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Get the __sm_chunk_t which manages this index */
|
2024-05-06 19:43:47 +00:00
|
|
|
ssize_t offset = __sm_get_chunk_offset(map, idx);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* No __sm_chunk_t's available -> the bit is not set */
|
|
|
|
if (offset == -1) {
|
2024-04-24 20:32:09 +00:00
|
|
|
return false;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Otherwise load the __sm_chunk_t */
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, offset);
|
2024-04-04 19:24:02 +00:00
|
|
|
sm_idx_t start = *(sm_idx_t *)p;
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Determine if the bit is out of bounds of the __sm_chunk_t; if yes then
|
2024-04-24 20:32:09 +00:00
|
|
|
the bit is not set. */
|
2024-05-06 19:43:47 +00:00
|
|
|
if (idx < start || (unsigned long)idx - start >= __sm_chunk_get_capacity(&chunk)) {
|
2024-04-24 20:32:09 +00:00
|
|
|
return false;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Otherwise ask the __sm_chunk_t whether the bit is set. */
|
2024-05-06 19:43:47 +00:00
|
|
|
return __sm_chunk_is_set(&chunk, idx - start);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_idx_t
|
|
|
|
sparsemap_set(sparsemap_t *map, sparsemap_idx_t idx, bool value)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-04-04 19:58:06 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Get the __sm_chunk_t which manages this index */
|
2024-05-06 19:43:47 +00:00
|
|
|
ssize_t offset = __sm_get_chunk_offset(map, idx);
|
2024-04-04 19:24:02 +00:00
|
|
|
bool dont_grow = false;
|
2024-07-15 14:37:16 +00:00
|
|
|
if (map->m_data_used + sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2 > map->m_capacity) {
|
2024-04-24 20:32:09 +00:00
|
|
|
errno = ENOSPC;
|
|
|
|
return SPARSEMAP_IDX_MAX;
|
|
|
|
}
|
|
|
|
|
2024-04-30 15:20:48 +00:00
|
|
|
/* If there are no __sm_chunk_t and the bit is set to zero then return
|
2024-04-04 19:24:02 +00:00
|
|
|
immediately; otherwise create an initial __sm_chunk_t. */
|
|
|
|
if (offset == -1) {
|
|
|
|
if (value == false) {
|
2024-04-24 20:32:09 +00:00
|
|
|
return idx;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
uint8_t buf[sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2] = { 0 };
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_append_data(map, &buf[0], sizeof(buf));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
2024-05-16 16:00:09 +00:00
|
|
|
*(sm_idx_t *)p = __sm_get_chunk_aligned_offset(idx);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_set_chunk_count(map, 1);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* We already inserted an additional __sm_bitvec_t; given that has happened
|
2024-04-24 20:32:09 +00:00
|
|
|
there is no need to grow the vector even further. */
|
2024-04-04 19:24:02 +00:00
|
|
|
dont_grow = true;
|
|
|
|
offset = 0;
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Load the __sm_chunk_t */
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, offset);
|
2024-04-04 19:24:02 +00:00
|
|
|
sm_idx_t start = *(sm_idx_t *)p;
|
2024-05-13 02:07:45 +00:00
|
|
|
__sm_assert(start == __sm_get_vector_aligned_offset(start));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* The new index is smaller than the first __sm_chunk_t: create a new
|
|
|
|
__sm_chunk_t and insert it at the front. */
|
|
|
|
if (idx < start) {
|
|
|
|
if (value == false) {
|
|
|
|
/* nothing to do */
|
2024-04-24 20:32:09 +00:00
|
|
|
return idx;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
uint8_t buf[sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2] = { 0 };
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_insert_data(map, offset, &buf[0], sizeof(buf));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-16 16:00:09 +00:00
|
|
|
size_t aligned_idx = __sm_get_chunk_aligned_offset(idx);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (start - aligned_idx < SM_CHUNK_MAX_CAPACITY) {
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
|
2024-07-15 14:37:16 +00:00
|
|
|
if (__sm_chunk_reduce_capacity(&chunk, start - aligned_idx)) {
|
2024-07-19 08:44:26 +00:00
|
|
|
/* TODO: The __sm_chunk_t is empty then remove it.
|
|
|
|
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2);
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
|
|
|
|
*/
|
2024-07-15 14:37:16 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
*(sm_idx_t *)p = start = aligned_idx;
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* We just added another chunk! */
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) + 1);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* We already inserted an additional __sm_bitvec_t; later on there
|
2024-04-04 23:56:31 +00:00
|
|
|
is no need to grow the vector even further. */
|
2024-04-04 19:24:02 +00:00
|
|
|
dont_grow = true;
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* A __sm_chunk_t exists, but the new index exceeds its capacities: create
|
2024-05-13 02:07:45 +00:00
|
|
|
a new __sm_chunk_t and insert it after the current one. */
|
2024-04-04 19:24:02 +00:00
|
|
|
else {
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
|
|
|
|
if (idx - start >= (sparsemap_idx_t)__sm_chunk_get_capacity(&chunk)) {
|
2024-04-04 19:24:02 +00:00
|
|
|
if (value == false) {
|
|
|
|
/* nothing to do */
|
2024-04-24 20:32:09 +00:00
|
|
|
return idx;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t size = __sm_chunk_get_size(&chunk);
|
2024-04-30 15:20:48 +00:00
|
|
|
offset += (sizeof(sm_idx_t) + size);
|
2024-04-04 19:24:02 +00:00
|
|
|
p += sizeof(sm_idx_t) + size;
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
uint8_t buf[sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2] = { 0 };
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_insert_data(map, offset, &buf[0], sizeof(buf));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
start += __sm_chunk_get_capacity(&chunk);
|
2024-05-13 16:46:25 +00:00
|
|
|
if ((sparsemap_idx_t)start + SM_CHUNK_MAX_CAPACITY <= idx) {
|
2024-05-09 19:50:56 +00:00
|
|
|
start = __sm_get_chunk_aligned_offset(idx);
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
*(sm_idx_t *)p = start;
|
2024-05-13 02:07:45 +00:00
|
|
|
__sm_assert(start == __sm_get_vector_aligned_offset(start));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* We just added another chunk! */
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) + 1);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-07-15 14:37:16 +00:00
|
|
|
/* We already inserted an additional __sm_bitvec_t; later on there
|
2024-04-04 19:24:02 +00:00
|
|
|
is no need to grow the vector even further. */
|
|
|
|
dont_grow = true;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p + sizeof(sm_idx_t));
|
2024-04-04 19:24:02 +00:00
|
|
|
|
|
|
|
/* Now update the __sm_chunk_t. */
|
|
|
|
size_t position;
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t fill;
|
2024-05-06 19:43:47 +00:00
|
|
|
int code = __sm_chunk_set(&chunk, idx - start, value, &position, &fill, false);
|
2024-04-04 19:24:02 +00:00
|
|
|
switch (code) {
|
|
|
|
case SM_OK:
|
|
|
|
break;
|
|
|
|
case SM_NEEDS_TO_GROW:
|
|
|
|
if (!dont_grow) {
|
2024-07-15 14:37:16 +00:00
|
|
|
offset += (sizeof(sm_idx_t) + position * sizeof(__sm_bitvec_t));
|
|
|
|
__sm_insert_data(map, offset, (uint8_t *)&fill, sizeof(__sm_bitvec_t));
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_set(&chunk, idx - start, value, &position, &fill, true);
|
2024-04-04 19:24:02 +00:00
|
|
|
break;
|
|
|
|
case SM_NEEDS_TO_SHRINK:
|
|
|
|
/* If the __sm_chunk_t is empty then remove it. */
|
2024-05-06 19:43:47 +00:00
|
|
|
if (__sm_chunk_is_empty(&chunk)) {
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_assert(position == 1);
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2);
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
|
2024-04-04 19:24:02 +00:00
|
|
|
} else {
|
2024-07-15 14:37:16 +00:00
|
|
|
offset += (sizeof(sm_idx_t) + position * sizeof(__sm_bitvec_t));
|
|
|
|
__sm_remove_data(map, offset, sizeof(__sm_bitvec_t));
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
__sm_assert(!"shouldn't be here");
|
|
|
|
#ifdef DEBUG
|
|
|
|
abort();
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
2024-04-04 19:58:06 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
2024-04-24 20:32:09 +00:00
|
|
|
return idx;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-26 20:25:17 +00:00
|
|
|
sparsemap_idx_t
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_get_starting_offset(sparsemap_t *map)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-09 19:50:56 +00:00
|
|
|
sparsemap_idx_t offset = 0;
|
|
|
|
size_t count = __sm_get_chunk_count(map);
|
|
|
|
if (count == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
2024-07-02 13:07:25 +00:00
|
|
|
sparsemap_idx_t relative_position = *(sm_idx_t *)p;
|
2024-05-09 19:50:56 +00:00
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
|
|
|
__sm_chunk_init(&chunk, p);
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t m = 0; m < sizeof(__sm_bitvec_t); m++, p++) {
|
2024-05-09 19:50:56 +00:00
|
|
|
for (int n = 0; n < SM_FLAGS_PER_INDEX_BYTE; n++) {
|
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, n);
|
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
|
|
|
continue;
|
|
|
|
} else if (flags == SM_PAYLOAD_ZEROS) {
|
|
|
|
relative_position += SM_BITS_PER_VECTOR;
|
|
|
|
} else if (flags == SM_PAYLOAD_ONES) {
|
|
|
|
offset = relative_position;
|
|
|
|
goto done;
|
|
|
|
} else if (flags == SM_PAYLOAD_MIXED) {
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk.m_data[1 + __sm_chunk_get_position(&chunk, m * SM_FLAGS_PER_INDEX_BYTE + n)];
|
2024-05-09 19:50:56 +00:00
|
|
|
for (int k = 0; k < SM_BITS_PER_VECTOR; k++) {
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w & ((__sm_bitvec_t)1 << k)) {
|
2024-05-09 19:50:56 +00:00
|
|
|
offset = relative_position + k;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
relative_position += SM_BITS_PER_VECTOR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
done:;
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
sparsemap_idx_t
|
|
|
|
sparsemap_get_ending_offset(sparsemap_t *map)
|
|
|
|
{
|
|
|
|
sparsemap_idx_t offset = 0;
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-24 20:32:09 +00:00
|
|
|
if (count == 0) {
|
|
|
|
return 0;
|
2024-04-08 22:14:47 +00:00
|
|
|
}
|
2024-05-09 19:50:56 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
|
|
|
for (size_t i = 0; i < count - 1; i++) {
|
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
|
|
|
__sm_chunk_init(&chunk, p);
|
|
|
|
p += __sm_chunk_get_size(&chunk);
|
|
|
|
}
|
|
|
|
sm_idx_t start = *(sm_idx_t *)p;
|
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
|
|
|
__sm_chunk_init(&chunk, p);
|
|
|
|
sparsemap_idx_t relative_position = start;
|
2024-07-15 14:37:16 +00:00
|
|
|
for (size_t m = 0; m < sizeof(__sm_bitvec_t); m++, p++) {
|
2024-05-09 19:50:56 +00:00
|
|
|
for (int n = 0; n < SM_FLAGS_PER_INDEX_BYTE; n++) {
|
|
|
|
size_t flags = SM_CHUNK_GET_FLAGS(*p, n);
|
|
|
|
if (flags == SM_PAYLOAD_NONE) {
|
|
|
|
continue;
|
|
|
|
} else if (flags == SM_PAYLOAD_ZEROS) {
|
|
|
|
relative_position += SM_BITS_PER_VECTOR;
|
|
|
|
} else if (flags == SM_PAYLOAD_ONES) {
|
|
|
|
relative_position += SM_BITS_PER_VECTOR;
|
|
|
|
offset = relative_position;
|
|
|
|
} else if (flags == SM_PAYLOAD_MIXED) {
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t w = chunk.m_data[1 + __sm_chunk_get_position(&chunk, m * SM_FLAGS_PER_INDEX_BYTE + n)];
|
2024-05-09 19:50:56 +00:00
|
|
|
int idx = 0;
|
|
|
|
for (int k = 0; k < SM_BITS_PER_VECTOR; k++) {
|
2024-07-15 14:37:16 +00:00
|
|
|
if (w & ((__sm_bitvec_t)1 << k)) {
|
2024-05-09 19:50:56 +00:00
|
|
|
idx = k;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
offset = relative_position + idx;
|
|
|
|
relative_position += SM_BITS_PER_VECTOR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2024-05-15 19:50:15 +00:00
|
|
|
double
|
|
|
|
sparsemap_fill_factor(sparsemap_t *map)
|
|
|
|
{
|
|
|
|
size_t rank = sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true);
|
|
|
|
sparsemap_idx_t end = sparsemap_get_ending_offset(map);
|
|
|
|
return (double)rank / (double)end * 100.0;
|
|
|
|
}
|
|
|
|
|
2024-05-09 19:50:56 +00:00
|
|
|
void *
|
|
|
|
sparsemap_get_data(sparsemap_t *map)
|
|
|
|
{
|
|
|
|
return map->m_data;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t
|
|
|
|
sparsemap_get_size(sparsemap_t *map)
|
|
|
|
{
|
|
|
|
if (map->m_data_used) {
|
2024-05-16 16:00:09 +00:00
|
|
|
size_t size = __sm_get_size_impl(map);
|
|
|
|
if (size != map->m_data_used) {
|
|
|
|
map->m_data_used = size;
|
|
|
|
}
|
2024-04-30 15:20:48 +00:00
|
|
|
__sm_when_diag({ __sm_assert(map->m_data_used == __sm_get_size_impl(map)); });
|
2024-04-24 20:32:09 +00:00
|
|
|
return map->m_data_used;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
return map->m_data_used = __sm_get_size_impl(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-05-09 19:50:56 +00:00
|
|
|
size_t
|
|
|
|
sparsemap_count(sparsemap_t *map)
|
|
|
|
{
|
|
|
|
return sparsemap_rank(map, 0, SPARSEMAP_IDX_MAX, true);
|
|
|
|
}
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
void
|
2024-05-02 18:55:04 +00:00
|
|
|
sparsemap_scan(sparsemap_t *map, void (*scanner)(sm_idx_t[], size_t, void *aux), size_t skip, void *aux)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
sm_idx_t start = *(sm_idx_t *)p;
|
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p);
|
|
|
|
size_t skipped = __sm_chunk_scan(&chunk, start, scanner, skip, aux);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (skip) {
|
|
|
|
assert(skip >= skipped);
|
|
|
|
skip -= skipped;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
p += __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-13 02:07:45 +00:00
|
|
|
int
|
2024-05-15 17:57:40 +00:00
|
|
|
sparsemap_merge(sparsemap_t *destination, sparsemap_t *source)
|
2024-05-13 02:07:45 +00:00
|
|
|
{
|
|
|
|
uint8_t *src, *dst;
|
2024-05-15 17:57:40 +00:00
|
|
|
size_t src_count = __sm_get_chunk_count(source);
|
|
|
|
sparsemap_idx_t dst_ending_offset = sparsemap_get_ending_offset(destination);
|
2024-05-13 02:07:45 +00:00
|
|
|
|
2024-05-15 17:57:40 +00:00
|
|
|
if (src_count == 0) {
|
|
|
|
return 0;
|
2024-05-13 02:07:45 +00:00
|
|
|
}
|
|
|
|
|
2024-05-10 20:25:08 +00:00
|
|
|
ssize_t remaining_capacity = destination->m_capacity - destination->m_data_used -
|
2024-07-15 14:37:16 +00:00
|
|
|
(source->m_data_used + src_count * (sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2));
|
2024-05-03 20:03:09 +00:00
|
|
|
|
|
|
|
/* Estimate worst-case overhead required for merge. */
|
2024-05-10 15:27:43 +00:00
|
|
|
if (remaining_capacity <= 0) {
|
2024-05-03 20:03:09 +00:00
|
|
|
errno = ENOSPC;
|
2024-05-10 15:27:43 +00:00
|
|
|
return -remaining_capacity;
|
2024-05-03 20:03:09 +00:00
|
|
|
}
|
2024-04-30 15:20:48 +00:00
|
|
|
|
2024-05-10 15:27:43 +00:00
|
|
|
src = __sm_get_chunk_data(source, 0);
|
2024-05-15 17:57:40 +00:00
|
|
|
while (src_count) {
|
2024-04-30 15:20:48 +00:00
|
|
|
sm_idx_t src_start = *(sm_idx_t *)src;
|
2024-05-15 17:57:40 +00:00
|
|
|
__sm_chunk_t src_chunk;
|
|
|
|
__sm_chunk_init(&src_chunk, src + sizeof(sm_idx_t));
|
|
|
|
size_t src_capacity = __sm_chunk_get_capacity(&src_chunk);
|
|
|
|
ssize_t dst_offset = __sm_get_chunk_offset(destination, src_start);
|
|
|
|
if (dst_offset >= 0) {
|
|
|
|
dst = __sm_get_chunk_data(destination, dst_offset);
|
|
|
|
sm_idx_t dst_start = *(sm_idx_t *)dst;
|
2024-04-30 15:20:48 +00:00
|
|
|
__sm_chunk_t dst_chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&dst_chunk, dst + sizeof(sm_idx_t));
|
2024-05-10 20:25:08 +00:00
|
|
|
size_t dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
|
2024-05-15 17:57:40 +00:00
|
|
|
|
|
|
|
/* Try to expand the capacity if there's room before the start of the next chunk. */
|
|
|
|
if (src_start == dst_start && dst_capacity < src_capacity) {
|
|
|
|
ssize_t nxt_offset = __sm_get_chunk_offset(destination, dst_start + dst_capacity + 1);
|
|
|
|
uint8_t *nxt_dst = __sm_get_chunk_data(destination, nxt_offset);
|
|
|
|
sm_idx_t nxt_dst_start = *(sm_idx_t *)nxt_dst;
|
|
|
|
if (nxt_dst_start > dst_start + src_capacity) {
|
|
|
|
__sm_chunk_increase_capacity(&dst_chunk, src_capacity);
|
|
|
|
dst_capacity = __sm_chunk_get_capacity(&dst_chunk);
|
|
|
|
}
|
2024-05-10 20:25:08 +00:00
|
|
|
}
|
2024-05-15 17:57:40 +00:00
|
|
|
|
|
|
|
/* Source chunk precedes next destination chunk. */
|
|
|
|
if ((src_start + src_capacity) <= dst_start) {
|
|
|
|
size_t src_size = __sm_chunk_get_size(&src_chunk);
|
|
|
|
ssize_t offset = __sm_get_chunk_offset(destination, dst_start);
|
|
|
|
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
|
|
|
|
/* Update the chunk count and data_used. */
|
|
|
|
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
|
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
2024-05-10 20:25:08 +00:00
|
|
|
}
|
2024-05-15 17:57:40 +00:00
|
|
|
|
|
|
|
/* Source chunk follows next destination chunk. */
|
|
|
|
if (src_start >= (dst_start + dst_capacity)) {
|
|
|
|
size_t src_size = __sm_chunk_get_size(&src_chunk);
|
|
|
|
if (dst_offset == __sm_get_chunk_offset(destination, SPARSEMAP_IDX_MAX)) {
|
|
|
|
__sm_append_data(destination, src, sizeof(sm_idx_t) + src_size);
|
|
|
|
} else {
|
|
|
|
ssize_t offset = __sm_get_chunk_offset(destination, src_start);
|
|
|
|
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
|
|
|
|
}
|
|
|
|
/* Update the chunk count and data_used. */
|
|
|
|
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
|
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Source and destination and a perfect overlapping pair. */
|
|
|
|
if (src_start == dst_start && src_capacity == dst_capacity) {
|
|
|
|
__sm_merge_chunk(destination, src_start, dst_start, dst_capacity, &dst_chunk, &src_chunk);
|
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Non-uniform overlapping chunks. */
|
|
|
|
if (dst_start < src_start || (dst_start == src_start && dst_capacity != src_capacity)) {
|
|
|
|
size_t src_end = src_start + src_capacity;
|
|
|
|
size_t dst_end = dst_start + dst_capacity;
|
|
|
|
size_t overlap = src_end > dst_end ? src_capacity - (src_end - dst_end) : src_capacity;
|
|
|
|
__sm_merge_chunk(destination, src_start, dst_start, overlap, &dst_chunk, &src_chunk);
|
|
|
|
for (size_t n = src_start + overlap; n <= src_end; n++) {
|
|
|
|
if (sparsemap_is_set(source, n)) {
|
|
|
|
sparsemap_set(destination, n, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (src_start >= dst_ending_offset) {
|
|
|
|
/* Starting offset is after destination chunks, so append data. */
|
|
|
|
size_t src_size = __sm_chunk_get_size(&src_chunk);
|
2024-05-10 15:27:43 +00:00
|
|
|
__sm_append_data(destination, src, sizeof(sm_idx_t) + src_size);
|
2024-05-15 17:57:40 +00:00
|
|
|
|
|
|
|
/* Update the chunk count and data_used. */
|
|
|
|
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
|
|
|
|
|
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
2024-04-30 15:20:48 +00:00
|
|
|
} else {
|
2024-05-15 17:57:40 +00:00
|
|
|
/* Source chunk precedes next destination chunk. */
|
|
|
|
size_t src_size = __sm_chunk_get_size(&src_chunk);
|
|
|
|
ssize_t offset = __sm_get_chunk_offset(destination, src_start);
|
2024-05-10 15:27:43 +00:00
|
|
|
__sm_insert_data(destination, offset, src, sizeof(sm_idx_t) + src_size);
|
2024-04-30 15:20:48 +00:00
|
|
|
|
2024-05-15 17:57:40 +00:00
|
|
|
/* Update the chunk count and data_used. */
|
|
|
|
__sm_set_chunk_count(destination, __sm_get_chunk_count(destination) + 1);
|
2024-04-30 15:20:48 +00:00
|
|
|
|
2024-05-15 17:57:40 +00:00
|
|
|
src_count--;
|
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&src_chunk);
|
|
|
|
continue;
|
|
|
|
}
|
2024-04-30 15:20:48 +00:00
|
|
|
}
|
|
|
|
}
|
2024-05-03 20:03:09 +00:00
|
|
|
return 0;
|
2024-04-30 15:20:48 +00:00
|
|
|
}
|
|
|
|
|
2024-05-09 19:50:56 +00:00
|
|
|
sparsemap_idx_t
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_split(sparsemap_t *map, sparsemap_idx_t offset, sparsemap_t *other)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-15 17:57:40 +00:00
|
|
|
if (!(offset == SPARSEMAP_IDX_MAX) && offset >= sparsemap_get_ending_offset(map)) {
|
2024-05-09 19:50:56 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (offset == SPARSEMAP_IDX_MAX) {
|
|
|
|
sparsemap_idx_t begin = sparsemap_get_starting_offset(map);
|
|
|
|
sparsemap_idx_t end = sparsemap_get_ending_offset(map);
|
|
|
|
if (begin != end) {
|
|
|
|
size_t count = sparsemap_rank(map, begin, end, true);
|
|
|
|
offset = sparsemap_select(map, count / 2, true);
|
|
|
|
} else {
|
|
|
|
return SPARSEMAP_IDX_MAX;
|
|
|
|
}
|
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* |dst| points to the destination buffer */
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *dst = __sm_get_chunk_end(other);
|
2024-04-04 19:24:02 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* |src| points to the source-chunk */
|
|
|
|
uint8_t *src = __sm_get_chunk_data(map, 0);
|
2024-04-04 19:24:02 +00:00
|
|
|
|
|
|
|
bool in_middle = false;
|
|
|
|
uint8_t *prev = src;
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t i, count = __sm_get_chunk_count(map);
|
2024-04-04 19:24:02 +00:00
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
sm_idx_t start = *(sm_idx_t *)src;
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, src + sizeof(sm_idx_t));
|
2024-04-24 20:32:09 +00:00
|
|
|
if (start == offset) {
|
2024-04-04 19:24:02 +00:00
|
|
|
break;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-05-06 19:43:47 +00:00
|
|
|
if (start + __sm_chunk_get_capacity(&chunk) > (unsigned long)offset) {
|
2024-04-04 19:24:02 +00:00
|
|
|
in_middle = true;
|
|
|
|
break;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
if (start > offset) {
|
2024-04-04 19:24:02 +00:00
|
|
|
src = prev;
|
|
|
|
i--;
|
|
|
|
break;
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
prev = src;
|
2024-05-06 19:43:47 +00:00
|
|
|
src += sizeof(sm_idx_t) + __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
if (i == count) {
|
2024-05-07 12:46:42 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) > SM_SIZEOF_OVERHEAD);
|
|
|
|
__sm_assert(sparsemap_get_size(other) > SM_SIZEOF_OVERHEAD);
|
2024-05-09 19:50:56 +00:00
|
|
|
return offset;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Now copy all the remaining chunks. */
|
|
|
|
int moved = 0;
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
/* If |offset| is in the middle of a chunk then this chunk has to be split */
|
2024-04-04 19:24:02 +00:00
|
|
|
if (in_middle) {
|
2024-07-15 14:37:16 +00:00
|
|
|
uint8_t buf[sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2] = { 0 };
|
2024-04-04 19:24:02 +00:00
|
|
|
memcpy(dst, &buf[0], sizeof(buf));
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-16 16:00:09 +00:00
|
|
|
*(sm_idx_t *)dst = __sm_get_vector_aligned_offset(offset);
|
2024-04-04 19:24:02 +00:00
|
|
|
dst += sizeof(sm_idx_t);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* the |other| sparsemap_t now has one additional chunk */
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_set_chunk_count(other, __sm_get_chunk_count(other) + 1);
|
2024-04-04 19:24:02 +00:00
|
|
|
if (other->m_data_used != 0) {
|
2024-07-15 14:37:16 +00:00
|
|
|
other->m_data_used += sizeof(sm_idx_t) + sizeof(__sm_bitvec_t);
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
|
2024-05-15 17:57:40 +00:00
|
|
|
sm_idx_t start = *(sm_idx_t *)src;
|
2024-04-04 19:24:02 +00:00
|
|
|
src += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t s_chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&s_chunk, src);
|
|
|
|
size_t capacity = __sm_chunk_get_capacity(&s_chunk);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
__sm_chunk_t d_chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&d_chunk, dst);
|
2024-07-15 14:37:16 +00:00
|
|
|
if (__sm_chunk_reduce_capacity(&d_chunk, __sm_get_vector_aligned_offset(capacity - (offset % capacity)))) {
|
2024-07-19 08:44:26 +00:00
|
|
|
/* TODO: The __sm_chunk_t is empty then remove it.
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2);
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
|
|
|
|
*/
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Now copy the bits. */
|
2024-05-13 16:46:25 +00:00
|
|
|
sparsemap_idx_t b = __sm_get_vector_aligned_offset(offset % capacity);
|
2024-05-15 17:57:40 +00:00
|
|
|
for (size_t j = start; j < capacity + start; j++) {
|
|
|
|
if (j >= offset) {
|
|
|
|
if (__sm_chunk_is_set(&s_chunk, j - start)) {
|
|
|
|
sparsemap_set(other, j, true);
|
|
|
|
if (j >= b && (j - b) % capacity < SM_BITS_PER_VECTOR) {
|
|
|
|
sparsemap_set(map, j, false);
|
|
|
|
}
|
2024-05-13 16:46:25 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
src += __sm_chunk_get_size(&s_chunk);
|
|
|
|
size_t dsize = __sm_chunk_get_size(&d_chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
dst += dsize;
|
|
|
|
i++;
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-13 02:07:45 +00:00
|
|
|
/* Reduce the capacity of the source-chunk effectively erases bits. */
|
2024-05-15 17:57:40 +00:00
|
|
|
size_t r = __sm_get_vector_aligned_offset(((offset - start) % capacity) + SM_BITS_PER_VECTOR);
|
2024-07-15 14:37:16 +00:00
|
|
|
if (__sm_chunk_reduce_capacity(&s_chunk, r)) {
|
2024-07-19 08:44:26 +00:00
|
|
|
/* TODO: The __sm_chunk_t is empty then remove it.
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_remove_data(map, offset, sizeof(sm_idx_t) + sizeof(__sm_bitvec_t) * 2);
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - 1);
|
|
|
|
*/
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* Now continue with all remaining chunks. */
|
2024-04-04 19:24:02 +00:00
|
|
|
for (; i < count; i++) {
|
|
|
|
sm_idx_t start = *(sm_idx_t *)src;
|
|
|
|
src += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, src);
|
|
|
|
size_t s = __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
|
|
|
|
*(sm_idx_t *)dst = start;
|
|
|
|
dst += sizeof(sm_idx_t);
|
|
|
|
memcpy(dst, src, s);
|
|
|
|
src += s;
|
|
|
|
dst += s;
|
|
|
|
|
|
|
|
moved++;
|
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
/* Force new calculation. */
|
|
|
|
other->m_data_used = 0;
|
|
|
|
map->m_data_used = 0;
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
/* Update the Chunk counters. */
|
|
|
|
__sm_set_chunk_count(map, __sm_get_chunk_count(map) - moved);
|
|
|
|
__sm_set_chunk_count(other, __sm_get_chunk_count(other) + moved);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
|
|
|
__sm_assert(sparsemap_get_size(other) > SM_SIZEOF_OVERHEAD);
|
2024-05-09 19:50:56 +00:00
|
|
|
|
|
|
|
return offset;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
sparsemap_idx_t
|
|
|
|
sparsemap_select(sparsemap_t *map, sparsemap_idx_t n, bool value)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
2024-04-26 20:25:17 +00:00
|
|
|
sm_idx_t start;
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t count = __sm_get_chunk_count(map);
|
2024-04-26 20:25:17 +00:00
|
|
|
|
2024-06-13 09:58:12 +00:00
|
|
|
if (count == 0 && value == false) {
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
uint8_t *p = __sm_get_chunk_data(map, 0);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
start = *(sm_idx_t *)p;
|
|
|
|
/* Start of this chunk is greater than n meaning there are a set of 0s
|
|
|
|
before the first 1 sufficient to consume n. */
|
|
|
|
if (value == false && i == 0 && start > n) {
|
|
|
|
return n;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p);
|
2024-04-28 16:26:31 +00:00
|
|
|
|
2024-04-30 15:20:48 +00:00
|
|
|
ssize_t new_n = n;
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t index = __sm_chunk_select(&chunk, n, &new_n, value);
|
2024-04-28 16:26:31 +00:00
|
|
|
if (new_n == -1) {
|
|
|
|
return start + index;
|
2024-04-26 20:25:17 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
n = new_n;
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
p += __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
return SPARSEMAP_IDX_MAX;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
|
|
|
|
2024-05-07 12:46:42 +00:00
|
|
|
static size_t
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_rank_vec(sparsemap_t *map, size_t begin, size_t end, bool value, __sm_bitvec_t *vec)
|
2024-04-04 19:24:02 +00:00
|
|
|
{
|
|
|
|
assert(sparsemap_get_size(map) >= SM_SIZEOF_OVERHEAD);
|
2024-05-06 19:43:47 +00:00
|
|
|
size_t amt, gap, pos = 0, result = 0, prev = 0, count, len = end - begin + 1;
|
2024-04-24 20:32:09 +00:00
|
|
|
uint8_t *p;
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
if (begin > end) {
|
2024-04-24 20:32:09 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
count = __sm_get_chunk_count(map);
|
2024-04-24 20:32:09 +00:00
|
|
|
|
|
|
|
if (count == 0) {
|
|
|
|
if (value == false) {
|
|
|
|
/* The count/rank of unset bits in an empty map is inf, so what you requested is the answer. */
|
2024-04-28 16:26:31 +00:00
|
|
|
return len;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-06 19:43:47 +00:00
|
|
|
p = __sm_get_chunk_data(map, 0);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-04 19:24:02 +00:00
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
sm_idx_t start = *(sm_idx_t *)p;
|
2024-04-24 20:32:09 +00:00
|
|
|
/* [prev, start + pos), prev is the last bit examined 0-based. */
|
2024-04-28 16:26:31 +00:00
|
|
|
if (i == 0) {
|
|
|
|
gap = start;
|
|
|
|
} else {
|
|
|
|
if (prev + SM_CHUNK_MAX_CAPACITY == start) {
|
|
|
|
gap = 0;
|
|
|
|
} else {
|
|
|
|
gap = start - (prev + pos);
|
|
|
|
}
|
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
/* Start of this chunk is greater than the end of the desired range. */
|
2024-05-06 19:43:47 +00:00
|
|
|
if (start > end) {
|
2024-04-24 20:32:09 +00:00
|
|
|
if (value == true) {
|
2024-04-28 16:26:31 +00:00
|
|
|
/* We're counting set bits and this chunk starts after the range
|
2024-05-06 19:43:47 +00:00
|
|
|
[begin, end], we're done. */
|
2024-04-24 20:32:09 +00:00
|
|
|
return result;
|
|
|
|
} else {
|
2024-04-28 16:26:31 +00:00
|
|
|
if (i == 0) {
|
|
|
|
/* We're counting unset bits and the first chunk starts after the
|
|
|
|
range meaning everything proceeding this chunk was zero and should
|
|
|
|
be counted, also we're done. */
|
2024-05-06 19:43:47 +00:00
|
|
|
result += (end - begin) + 1;
|
2024-04-28 16:26:31 +00:00
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
/* We're counting unset bits and some chunk starts after the range, so
|
|
|
|
we've counted enough, we're done. */
|
2024-05-06 19:43:47 +00:00
|
|
|
if (pos > end) {
|
2024-04-28 16:26:31 +00:00
|
|
|
return result;
|
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
if (end - pos < gap) {
|
|
|
|
result += end - pos;
|
2024-04-28 16:26:31 +00:00
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
result += gap;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* The range and this chunk overlap. */
|
|
|
|
if (value == false) {
|
2024-05-06 19:43:47 +00:00
|
|
|
if (begin > gap) {
|
|
|
|
begin -= gap;
|
2024-04-24 20:32:09 +00:00
|
|
|
} else {
|
2024-05-06 19:43:47 +00:00
|
|
|
result += gap - begin;
|
|
|
|
begin = 0;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
} else {
|
2024-05-16 02:03:36 +00:00
|
|
|
if (begin >= gap) {
|
2024-05-06 19:43:47 +00:00
|
|
|
begin -= gap;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-09 18:46:49 +00:00
|
|
|
prev = start;
|
2024-04-04 19:24:02 +00:00
|
|
|
p += sizeof(sm_idx_t);
|
|
|
|
__sm_chunk_t chunk;
|
2024-05-06 19:43:47 +00:00
|
|
|
__sm_chunk_init(&chunk, p);
|
2024-04-03 00:41:55 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
/* Count all the set/unset inside this chunk. */
|
2024-05-06 19:43:47 +00:00
|
|
|
amt = __sm_chunk_rank(&chunk, &begin, end - start, &pos, vec, value);
|
2024-04-24 20:32:09 +00:00
|
|
|
result += amt;
|
2024-05-06 19:43:47 +00:00
|
|
|
p += __sm_chunk_get_size(&chunk);
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-24 20:32:09 +00:00
|
|
|
/* Count any additional unset bits that fall outside the last chunk but
|
|
|
|
within the range. */
|
|
|
|
if (value == false) {
|
|
|
|
size_t last = prev - 1 + pos;
|
2024-05-06 19:43:47 +00:00
|
|
|
if (end > last) {
|
|
|
|
result += end - last - begin;
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
2024-04-04 19:24:02 +00:00
|
|
|
}
|
2024-04-07 20:38:57 +00:00
|
|
|
|
|
|
|
size_t
|
2024-05-06 19:43:47 +00:00
|
|
|
sparsemap_rank(sparsemap_t *map, size_t begin, size_t end, bool value)
|
2024-04-07 20:38:57 +00:00
|
|
|
{
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t vec;
|
2024-05-06 19:43:47 +00:00
|
|
|
return __sm_rank_vec(map, begin, end, value, &vec);
|
2024-04-24 20:32:09 +00:00
|
|
|
}
|
2024-04-07 20:38:57 +00:00
|
|
|
|
2024-04-24 20:32:09 +00:00
|
|
|
size_t
|
|
|
|
sparsemap_span(sparsemap_t *map, sparsemap_idx_t idx, size_t len, bool value)
|
|
|
|
{
|
2024-04-28 16:26:31 +00:00
|
|
|
size_t rank, nth;
|
2024-07-15 14:37:16 +00:00
|
|
|
__sm_bitvec_t vec = 0;
|
2024-05-03 01:13:17 +00:00
|
|
|
sparsemap_idx_t offset;
|
2024-04-28 16:26:31 +00:00
|
|
|
|
|
|
|
/* When skipping forward to `idx` offset in the map we can determine how
|
|
|
|
many selects we can avoid by taking the rank of the range and starting
|
|
|
|
at that bit. */
|
2024-05-03 01:13:17 +00:00
|
|
|
nth = (idx == 0) ? 0 : sparsemap_rank(map, 0, idx - 1, value);
|
2024-06-13 09:58:12 +00:00
|
|
|
if (SPARSEMAP_NOT_FOUND(nth)) {
|
|
|
|
return nth;
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
/* Find the first bit that matches value, then... */
|
|
|
|
offset = sparsemap_select(map, nth, value);
|
2024-04-07 20:38:57 +00:00
|
|
|
do {
|
2024-04-28 16:26:31 +00:00
|
|
|
/* See if the rank of the bits in the range starting at offset is equal
|
|
|
|
to the desired amount. */
|
2024-05-06 19:43:47 +00:00
|
|
|
rank = (len == 1) ? 1 : __sm_rank_vec(map, offset, offset + len - 1, value, &vec);
|
2024-04-28 16:26:31 +00:00
|
|
|
if (rank >= len) {
|
|
|
|
/* We've found what we're looking for, return the index of the first
|
|
|
|
bit in the range. */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Now we try to jump forward as much as possible before we look for a
|
|
|
|
new match. We do this by counting the remaining bits in the returned
|
|
|
|
vec from the call to rank_vec(). */
|
2024-05-03 01:13:17 +00:00
|
|
|
int amt = 1;
|
|
|
|
if (vec > 0) {
|
|
|
|
/* The returned vec had som set bits, let's move forward in the map as much
|
|
|
|
as possible (max: 64 bit positions). */
|
2024-04-28 16:26:31 +00:00
|
|
|
int max = len > SM_BITS_PER_VECTOR ? SM_BITS_PER_VECTOR : len;
|
|
|
|
while (amt < max && (vec & 1 << amt)) {
|
|
|
|
amt++;
|
2024-04-07 20:38:57 +00:00
|
|
|
}
|
|
|
|
}
|
2024-04-28 16:26:31 +00:00
|
|
|
nth += amt;
|
2024-04-24 20:32:09 +00:00
|
|
|
offset = sparsemap_select(map, nth, value);
|
2024-04-28 16:26:31 +00:00
|
|
|
} while (SPARSEMAP_FOUND(offset));
|
2024-04-07 20:38:57 +00:00
|
|
|
|
2024-04-28 16:26:31 +00:00
|
|
|
return offset;
|
2024-04-07 20:38:57 +00:00
|
|
|
}
|
2024-07-19 08:44:26 +00:00
|
|
|
|
|
|
|
#ifdef SPARSEMAP_TESTING
|
|
|
|
|
|
|
|
#include <qc.h>
|
|
|
|
|
|
|
|
static double
|
|
|
|
_tst_pow(double base, int exponent)
|
|
|
|
{
|
|
|
|
if (exponent == 0) {
|
|
|
|
return 1.0; // 0^0 is 1
|
|
|
|
} else if (base == 0.0) {
|
|
|
|
return 0.0; // 0 raised to any positive exponent is 0 (except 0^0)
|
|
|
|
} else if (base < 0.0 && (exponent & 1) != 0) {
|
|
|
|
// negative base with odd exponent, results in a negative
|
|
|
|
return -_tst_pow(-base, exponent);
|
|
|
|
}
|
|
|
|
|
|
|
|
double result = base;
|
|
|
|
for (unsigned int i = 1; i < exponent; i++) {
|
|
|
|
result *= base;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
|
|
|
QCC_showChunk(void *value, int len)
|
|
|
|
{
|
|
|
|
static char buffer[(SM_FLAGS_PER_INDEX * 16) + (SM_BITS_PER_VECTOR * 64) + 16];
|
|
|
|
|
|
|
|
__sm_chunk_t *chunk = (__sm_chunk_t *)value;
|
|
|
|
__sm_bitvec_t desc = chunk->m_data[0];
|
|
|
|
|
|
|
|
if (!SM_IS_CHUNK_RLE(chunk)) {
|
|
|
|
char desc_str[SM_FLAGS_PER_INDEX + 1] = { 0 };
|
|
|
|
char *str = desc_str;
|
|
|
|
int mixed = 0;
|
|
|
|
for (int i = 0; i < SM_FLAGS_PER_INDEX; i++) {
|
|
|
|
uint8_t flag = SM_CHUNK_GET_FLAGS(desc, i);
|
|
|
|
switch (flag) {
|
|
|
|
case SM_PAYLOAD_NONE:
|
|
|
|
str += sprintf(str, "Ø");
|
|
|
|
break;
|
|
|
|
case SM_PAYLOAD_ONES:
|
|
|
|
str += sprintf(str, "1");
|
|
|
|
break;
|
|
|
|
case SM_PAYLOAD_ZEROS:
|
|
|
|
str += sprintf(str, "0");
|
|
|
|
break;
|
|
|
|
case SM_PAYLOAD_MIXED:
|
|
|
|
str += sprintf(str, "≡");
|
|
|
|
mixed++;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
str = buffer + snprintf(buffer, sizeof(buffer), "%s :: ", desc_str);
|
|
|
|
for (int i = 0; i < mixed; i++) {
|
|
|
|
str += sprintf(str, "0x%lx%s", chunk->m_data[1 + i], i + 1 < mixed ? " " : "");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sprintf(buffer, "1»%zu", SM_CHUNK_RLE_LENGTH(chunk));
|
|
|
|
}
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
QCC_freeChunkValue(void *value)
|
|
|
|
{
|
|
|
|
free(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
QCC_GenValue *
|
|
|
|
QCC_genChunk()
|
|
|
|
{
|
|
|
|
bool rle = ((double)random() / (double)RAND_MAX) > 0.5;
|
|
|
|
|
|
|
|
if (rle) {
|
|
|
|
// Generate a chunk with the offset equal to the encoded run length.
|
|
|
|
sparsemap_idx_t from = 1, to = (sparsemap_idx_t)1ULL << (SM_BITS_PER_VECTOR - 2);
|
|
|
|
unsigned int len = ((unsigned int)random() % (to - from)) + from;
|
|
|
|
__sm_chunk_t *chunk = malloc(sizeof(__sm_chunk_t) + (sizeof(__sm_bitvec_t) * 2));
|
|
|
|
__sm_bitvec_t *v = (__sm_bitvec_t *)(chunk + sizeof(__sm_chunk_t));
|
|
|
|
chunk->m_data = v;
|
|
|
|
*v = ((((__sm_bitvec_t)1) << (SM_BITS_PER_VECTOR - 2)) | len);
|
|
|
|
assert(SM_IS_CHUNK_RLE(chunk));
|
|
|
|
assert(SM_CHUNK_RLE_LENGTH(chunk) == (unsigned int)len);
|
|
|
|
QCC_showChunk(chunk, len);
|
|
|
|
return QCC_initGenValue(chunk, 1, QCC_showChunk, QCC_freeChunkValue);
|
|
|
|
} else {
|
|
|
|
// Generate a chunk with the offset equal to the number of additional
|
|
|
|
// vectors (len) and a descriptor that matches that with random data.
|
|
|
|
unsigned int from = 0, to = SM_FLAGS_PER_INDEX;
|
|
|
|
unsigned int len = ((unsigned int)random() % (to - from)) + from;
|
|
|
|
unsigned int cut = ((unsigned int)random() % ((SM_FLAGS_PER_INDEX - len) - from)) + from;
|
|
|
|
__sm_chunk_t *chunk = malloc(sizeof(__sm_chunk_t) + (sizeof(__sm_bitvec_t) * (len + 2)));
|
|
|
|
chunk->m_data = (__sm_bitvec_t *)((uintptr_t)chunk + sizeof(__sm_chunk_t));
|
|
|
|
__sm_bitvec_t *desc = &chunk->m_data[0];
|
|
|
|
*desc = 0;
|
|
|
|
// Run through the flags (2 bits), we know that exactly `len` many flags
|
|
|
|
// must be SM_PAYLOAD_MIXED.
|
|
|
|
for (size_t i = 0, n = len; (i < SM_FLAGS_PER_INDEX - cut) && n; i++) {
|
|
|
|
size_t left = SM_FLAGS_PER_INDEX - cut - i;
|
|
|
|
double prob = (double)n / (double)left;
|
|
|
|
double dice = (double)random() / RAND_MAX;
|
|
|
|
if (dice < prob || left == n) {
|
|
|
|
*desc |= ((__sm_bitvec_t)SM_PAYLOAD_MIXED) << i;
|
|
|
|
size_t pos = (len - n);
|
|
|
|
chunk->m_data[1 + pos] = (uintptr_t)chunk + pos;
|
|
|
|
n--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (int i = 1; i < SM_FLAGS_PER_INDEX - cut; i++) {
|
|
|
|
uint8_t flag = SM_CHUNK_GET_FLAGS(*desc, i);
|
|
|
|
if (flag != SM_PAYLOAD_MIXED && ((double)random() / (double)RAND_MAX) > 0.5) {
|
|
|
|
*desc ^= ~((__sm_bitvec_t)SM_PAYLOAD_ONES << (i * 2));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//TODO: __sm_chunk_reduce_capacity(chunk, SM_CHUNK_MAX_CAPACITY - (cut * SM_BITS_PER_VECTOR));
|
|
|
|
assert(SM_IS_CHUNK_RLE(chunk) == false);
|
|
|
|
return QCC_initGenValue(chunk, 1, QCC_showChunk, QCC_freeChunkValue);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
_tst_sm_chunk_calc_vector_size(uint8_t b)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (((b >> (i * 2)) & 0x03) == 0x02) {
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
QCC_TestStatus
|
|
|
|
_tst_chunk_calc_vector_size_equality(QCC_GenValue **vals, int len, QCC_Stamp **stamp)
|
|
|
|
{
|
|
|
|
unsigned int a = *QCC_getValue(vals, 0, unsigned int *) % 256;
|
|
|
|
return _tst_sm_chunk_calc_vector_size(a) == __sm_chunk_calc_vector_size(a);
|
|
|
|
}
|
|
|
|
|
|
|
|
QCC_TestStatus
|
|
|
|
_tst_chunk_get_position(QCC_GenValue **vals, int len, QCC_Stamp **stamp)
|
|
|
|
{
|
|
|
|
size_t pos;
|
|
|
|
__sm_chunk_t *chunk = QCC_getValue(vals, 0, __sm_chunk_t *);
|
|
|
|
if (SM_IS_CHUNK_RLE(chunk)) {
|
|
|
|
for (size_t i = 0; i < SM_FLAGS_PER_INDEX; i++) {
|
|
|
|
pos = __sm_chunk_get_position(chunk, i);
|
|
|
|
if (pos != 0) {
|
|
|
|
return QCC_FAIL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
size_t mixed = 0;
|
|
|
|
for (size_t i = 0; i < SM_FLAGS_PER_INDEX; i++) {
|
|
|
|
uint8_t flag = SM_CHUNK_GET_FLAGS(*chunk->m_data, i);
|
|
|
|
switch (flag) {
|
|
|
|
case SM_PAYLOAD_MIXED:
|
|
|
|
pos = __sm_chunk_get_position(chunk, i);
|
|
|
|
if (chunk->m_data[1 + pos] != (uintptr_t)chunk + pos) {
|
|
|
|
return QCC_FAIL;
|
|
|
|
}
|
|
|
|
mixed++;
|
|
|
|
break;
|
|
|
|
case SM_PAYLOAD_ONES:
|
|
|
|
case SM_PAYLOAD_ZEROS:
|
|
|
|
pos = __sm_chunk_get_position(chunk, i);
|
|
|
|
if (pos != mixed) {
|
|
|
|
return QCC_FAIL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SM_PAYLOAD_NONE:
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return QCC_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|