// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! // Created by amalgamation.sh on 2024-05-03T13:59:37Z /* * The CRoaring project is under a dual license (Apache/MIT). * Users of the library may choose one or the other license. */ /* * Copyright 2016-2022 The CRoaring authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 */ /* * MIT License * * Copyright 2016-2022 The CRoaring authors * * Permission is hereby granted, free of charge, to any * person obtaining a copy of this software and associated * documentation files (the "Software"), to deal in the * Software without restriction, including without * limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software * is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice * shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * SPDX-License-Identifier: MIT */ /* begin file include/roaring/roaring_version.h */ // /include/roaring/roaring_version.h automatically generated by release.py, do // not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION #define ROARING_VERSION "3.0.0" enum { ROARING_VERSION_MAJOR = 3, ROARING_VERSION_MINOR = 0, ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION /* end file include/roaring/roaring_version.h */ /* begin file include/roaring/roaring_types.h */ /* Typedefs used by various components */ #ifndef ROARING_TYPES_H #define ROARING_TYPES_H #include #include #ifdef __cplusplus extern "C" { namespace roaring { namespace api { #endif /** * When building .c files as C++, there's added compile-time checking if the * container types are derived from a `container_t` base class. So long as * such a base class is empty, the struct will behave compatibly with C structs * despite the derivation. This is due to the Empty Base Class Optimization: * * https://en.cppreference.com/w/cpp/language/ebo * * But since C isn't namespaced, taking `container_t` globally might collide * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` */ #if defined(__cplusplus) extern "C++" { struct container_s {}; } #define ROARING_CONTAINER_T ::roaring::api::container_s #else #define ROARING_CONTAINER_T void // no compile-time checking #endif #define ROARING_FLAG_COW UINT8_C(0x1) #define ROARING_FLAG_FROZEN UINT8_C(0x2) /** * Roaring arrays are array-based key-value pairs having containers as values * and 16-bit integer keys. A roaring bitmap might be implemented as such. */ // parallel arrays. Element sizes quite different. // Alternative is array // of structs. Which would have better // cache performance through binary searches? typedef struct roaring_array_s { int32_t size; int32_t allocation_size; ROARING_CONTAINER_T **containers; // Use container_t in non-API files! uint16_t *keys; uint8_t *typecodes; uint8_t flags; } roaring_array_t; typedef bool (*roaring_iterator)(uint32_t value, void *param); typedef bool (*roaring_iterator64)(uint64_t value, void *param); /** * (For advanced users.) * The roaring_statistics_t can be used to collect detailed statistics about * the composition of a roaring bitmap. */ typedef struct roaring_statistics_s { uint32_t n_containers; /* number of containers */ uint32_t n_array_containers; /* number of array containers */ uint32_t n_run_containers; /* number of run containers */ uint32_t n_bitset_containers; /* number of bitmap containers */ uint32_t n_values_array_containers; /* number of values in array containers */ uint32_t n_values_run_containers; /* number of values in run containers */ uint32_t n_values_bitset_containers; /* number of values in bitmap containers */ uint32_t n_bytes_array_containers; /* number of allocated bytes in array containers */ uint32_t n_bytes_run_containers; /* number of allocated bytes in run containers */ uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap containers */ uint32_t max_value; /* the maximal value, undefined if cardinality is zero */ uint32_t min_value; /* the minimal value, undefined if cardinality is zero */ uint64_t sum_value; /* the sum of all values (could be used to compute average) */ uint64_t cardinality; /* total number of values stored in the bitmap */ // and n_values_arrays, n_values_rle, n_values_bitmap } roaring_statistics_t; /** * Roaring-internal type used to iterate within a roaring container. */ typedef struct roaring_container_iterator_s { // For bitset and array containers this is the index of the bit / entry. // For run containers this points at the run. int32_t index; } roaring_container_iterator_t; #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace api { #endif #endif /* ROARING_TYPES_H */ /* end file include/roaring/roaring_types.h */ /* begin file include/roaring/portability.h */ /* * portability.h * */ /** * All macros should be prefixed with either CROARING or ROARING. * The library uses both ROARING_... * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for * macros that are provided by the build system or that are closely * related to the format. The header macros may also use ROARING_. * The CROARING_ prefix is for internal macros that a user is unlikely * to ever interact with. */ #ifndef CROARING_INCLUDE_PORTABILITY_H_ #define CROARING_INCLUDE_PORTABILITY_H_ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS #ifdef _MSC_VER #define CROARING_VISUAL_STUDIO 1 /** * We want to differentiate carefully between * clang under visual studio and regular visual * studio. */ #ifdef __clang__ // clang under visual studio #define CROARING_CLANG_VISUAL_STUDIO 1 #else // just regular visual studio (best guess) #define CROARING_REGULAR_VISUAL_STUDIO 1 #endif // __clang__ #endif // _MSC_VER #ifndef CROARING_VISUAL_STUDIO #define CROARING_VISUAL_STUDIO 0 #endif #ifndef CROARING_CLANG_VISUAL_STUDIO #define CROARING_CLANG_VISUAL_STUDIO 0 #endif #ifndef CROARING_REGULAR_VISUAL_STUDIO #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif #if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) #undef _POSIX_C_SOURCE #endif #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #define _XOPEN_SOURCE 700 #endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #ifdef __illumos__ #define __EXTENSIONS__ #endif #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above #ifdef __GLIBC__ #include // this should never be needed but there are some reports that it is needed. #endif #ifdef __cplusplus extern "C" { // portability definitions are in global scope, not a namespace #endif #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 #error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. #endif #if CROARING_REGULAR_VISUAL_STUDIO #ifndef __restrict__ #define __restrict__ __restrict #endif // __restrict__ #endif // CROARING_REGULAR_VISUAL_STUDIO #if defined(__x86_64__) || defined(_M_X64) // we have an x64 processor #define CROARING_IS_X64 1 #if defined(_MSC_VER) && (_MSC_VER < 1910) // Old visual studio systems won't support AVX2 well. #undef CROARING_IS_X64 #endif #if defined(__clang_major__) && (__clang_major__ <= 8) && !defined(__AVX2__) // Older versions of clang have a bug affecting us // https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces #undef CROARING_IS_X64 #endif #ifdef ROARING_DISABLE_X64 #undef CROARING_IS_X64 #endif // we include the intrinsic header #if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler */ #include // on some recent GCC, this will declare posix_memalign #if CROARING_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ // To avoid reordering imports: // clang-format off #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include #if _MSC_VER >= 1920 // Important: we need the AVX-512 headers: #include #include #include #include #include #include #include #include // clang-format on #endif // _MSC_VER >= 1920 // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO #endif // CROARING_REGULAR_VISUAL_STUDIO #endif // defined(__x86_64__) || defined(_M_X64) #if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) #define CROARING_USENEON #endif #if defined(CROARING_USENEON) #include #endif #if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline * assembly */ #define CROARING_INLINE_ASM 1 #endif // _MSC_VER #if CROARING_REGULAR_VISUAL_STUDIO /* Microsoft C/C++-compatible compiler */ #include #ifndef __clang__ // if one compiles with MSVC *with* clang, then these // intrinsics are defined!!! #define CROARING_INTRINSICS 1 // sadly there is no way to check whether we are missing these intrinsics // specifically. /* wrappers for Visual Studio built-ins that look like gcc built-ins * __builtin_ctzll */ /** result might be undefined when input_num is zero */ inline int roaring_trailing_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanForward64(&index, input_num); #else // if we must support 32-bit Windows if ((uint32_t)input_num != 0) { _BitScanForward(&index, (uint32_t)input_num); } else { _BitScanForward(&index, (uint32_t)(input_num >> 32)); index += 32; } #endif // _WIN64 return index; } /* wrappers for Visual Studio built-ins that look like gcc built-ins * __builtin_clzll */ /** result might be undefined when input_num is zero */ inline int roaring_leading_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanReverse64(&index, input_num); #else // if we must support 32-bit Windows if (input_num > 0xFFFFFFFF) { _BitScanReverse(&index, (uint32_t)(input_num >> 32)); index += 32; } else { _BitScanReverse(&index, (uint32_t)(input_num)); } #endif // _WIN64 return 63 - index; } /* Use #define so this is effective even under /Ob0 (no inline) */ #define roaring_unreachable __assume(0) #endif // __clang__ #endif // CROARING_REGULAR_VISUAL_STUDIO #ifndef CROARING_INTRINSICS #define CROARING_INTRINSICS 1 #define roaring_unreachable __builtin_unreachable() /** result might be undefined when input_num is zero */ inline int roaring_trailing_zeroes(unsigned long long input_num) { return __builtin_ctzll(input_num); } /** result might be undefined when input_num is zero */ inline int roaring_leading_zeroes(unsigned long long input_num) { return __builtin_clzll(input_num); } #endif #if CROARING_REGULAR_VISUAL_STUDIO #define ALIGNED(x) __declspec(align(x)) #elif defined(__GNUC__) || defined(__clang__) #define ALIGNED(x) __attribute__((aligned(x))) #else #warning "Warning. Unrecognized compiler." #define ALIGNED(x) #endif #if defined(__GNUC__) || defined(__clang__) #define CROARING_WARN_UNUSED __attribute__((warn_unused_result)) #else #define CROARING_WARN_UNUSED #endif #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) #ifdef CROARING_USENEON // we can always compute the popcount fast. #elif (defined(_M_ARM) || defined(_M_ARM64)) && \ ((defined(_WIN64) || defined(_WIN32)) && \ defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO) // we will need this function: static inline int roaring_hamming_backup(uint64_t x) { uint64_t c1 = UINT64_C(0x5555555555555555); uint64_t c2 = UINT64_C(0x3333333333333333); uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F); x -= (x >> 1) & c1; x = ((x >> 2) & c2) + (x & c2); x = (x + (x >> 4)) & c4; x *= UINT64_C(0x0101010101010101); return x >> 56; } #endif static inline int roaring_hamming(uint64_t x) { #if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO #ifdef CROARING_USENEON return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); #elif defined(_M_ARM64) return roaring_hamming_backup(x); // (int) _CountOneBits64(x); is unavailable #else // _M_ARM64 return (int)__popcnt64(x); #endif // _M_ARM64 #elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO #ifdef _M_ARM return roaring_hamming_backup(x); // _CountOneBits is unavailable #else // _M_ARM return (int)__popcnt((unsigned int)x) + (int)__popcnt((unsigned int)(x >> 32)); #endif // _M_ARM #else return __builtin_popcountll(x); #endif } #ifndef UINT64_C #define UINT64_C(c) (c##ULL) #endif // UINT64_C #ifndef UINT32_C #define UINT32_C(c) (c##UL) #endif // UINT32_C #ifdef __cplusplus } // extern "C" { #endif // __cplusplus // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ #undef STRINGIFY #define STRINGIFY_IMPLEMENTATION_(a) #a #define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) // Our fast kernels require 64-bit systems. // // On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. // Furthermore, the number of SIMD registers is reduced. // // On 32-bit ARM, we would have smaller registers. // // The library should still have the fallback kernel. It is // slower, but it should run everywhere. // // Enable valid runtime implementations, and select // CROARING_BUILTIN_IMPLEMENTATION // // We are going to use runtime dispatch. #if CROARING_IS_X64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be // *outside* of a namespace. #define CROARING_TARGET_REGION(T) \ _Pragma(STRINGIFY(clang attribute push(__attribute__((target(T))), \ apply_to = function))) #define CROARING_UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier #define CROARING_TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) #define CROARING_UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc #endif // CROARING_IS_X64 // Default target region macros don't do anything. #ifndef CROARING_TARGET_REGION #define CROARING_TARGET_REGION(T) #define CROARING_UNTARGET_REGION #endif #define CROARING_TARGET_AVX2 \ CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") #define CROARING_TARGET_AVX512 \ CROARING_TARGET_REGION( \ "avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw," \ "avx512vbmi2,avx512bitalg,avx512vpopcntdq") #define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION #define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION #ifdef __AVX2__ // No need for runtime dispatching. // It is unnecessary and harmful to old clang to tag regions. #undef CROARING_TARGET_AVX2 #define CROARING_TARGET_AVX2 #undef CROARING_UNTARGET_AVX2 #define CROARING_UNTARGET_AVX2 #endif #if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && \ defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && \ defined(__AVX512VPOPCNTDQ__) // No need for runtime dispatching. // It is unnecessary and harmful to old clang to tag regions. #undef CROARING_TARGET_AVX512 #define CROARING_TARGET_AVX512 #undef CROARING_UNTARGET_AVX512 #define CROARING_UNTARGET_AVX512 #endif // Allow unaligned memory access #if defined(__GNUC__) || defined(__clang__) #define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) #else #define ALLOW_UNALIGNED #endif #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #elif defined(_WIN32) #define CROARING_IS_BIG_ENDIAN 0 #else #if defined(__APPLE__) || \ defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined // __ORDER_BIG_ENDIAN__ #include #elif defined(sun) || \ defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) #include #else // defined(__APPLE__) || defined(__FreeBSD__) #ifdef __has_include #if __has_include() #include #endif //__has_include() #endif //__has_include #endif // defined(__APPLE__) || defined(__FreeBSD__) #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) #define CROARING_IS_BIG_ENDIAN 0 #endif #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CROARING_IS_BIG_ENDIAN 0 #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CROARING_IS_BIG_ENDIAN 1 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #endif // Host <-> big endian conversion. #if CROARING_IS_BIG_ENDIAN #define croaring_htobe64(x) (x) #elif defined(_WIN32) || defined(_WIN64) // CROARING_IS_BIG_ENDIAN #include #define croaring_htobe64(x) _byteswap_uint64(x) #elif defined(__APPLE__) // CROARING_IS_BIG_ENDIAN #include #define croaring_htobe64(x) OSSwapInt64(x) #elif defined(__has_include) && \ __has_include( \ ) && (defined(__linux__) || defined(__FreeBSD__)) // CROARING_IS_BIG_ENDIAN #include #if defined(__linux__) #define croaring_htobe64(x) bswap_64(x) #elif defined(__FreeBSD__) #define croaring_htobe64(x) bswap64(x) #else #warning "Unknown platform, report as an error" #endif #else // CROARING_IS_BIG_ENDIAN // Gets compiled to bswap or equivalent on most compilers. #define croaring_htobe64(x) \ (((x & 0x00000000000000FFULL) << 56) | \ ((x & 0x000000000000FF00ULL) << 40) | \ ((x & 0x0000000000FF0000ULL) << 24) | \ ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \ ((x & 0x0000FF0000000000ULL) >> 24) | \ ((x & 0x00FF000000000000ULL) >> 40) | \ ((x & 0xFF00000000000000ULL) >> 56)) #endif // CROARING_IS_BIG_ENDIAN #define croaring_be64toh(x) croaring_htobe64(x) // End of host <-> big endian conversion. // Defines for the possible CROARING atomic implementations #define CROARING_ATOMIC_IMPL_NONE 1 #define CROARING_ATOMIC_IMPL_CPP 2 #define CROARING_ATOMIC_IMPL_C 3 #define CROARING_ATOMIC_IMPL_C_WINDOWS 4 // If the use has forced a specific implementation, use that, otherwise, // figure out the best implementation we can use. #if !defined(CROARING_ATOMIC_IMPL) #if defined(__cplusplus) && __cplusplus >= 201103L #ifdef __has_include #if __has_include() #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP #endif //__has_include() #else // We lack __has_include to check: #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP #endif //__has_include #elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C #elif CROARING_REGULAR_VISUAL_STUDIO // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/ #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS #endif #endif // !defined(CROARING_ATOMIC_IMPL) #if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C #include typedef _Atomic(uint32_t) croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { // Increasing the reference counter can always be done with // memory_order_relaxed: New references to an object can only be formed from // an existing reference, and passing an existing reference from one thread // to another must already provide any required synchronization. atomic_fetch_add_explicit(val, 1, memory_order_relaxed); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { // It is important to enforce any possible access to the object in one // thread (through an existing reference) to happen before deleting the // object in a different thread. This is achieved by a "release" operation // after dropping a reference (any access to the object through this // reference must obviously happened before), and an "acquire" operation // before deleting the object. bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1; if (is_zero) { atomic_thread_fence(memory_order_acquire); } return is_zero; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return atomic_load_explicit(val, memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP #include typedef std::atomic croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { val->fetch_add(1, std::memory_order_relaxed); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { // See above comments on the c11 atomic implementation for memory ordering bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1; if (is_zero) { std::atomic_thread_fence(std::memory_order_acquire); } return is_zero; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return val->load(std::memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS #include #pragma intrinsic(_InterlockedIncrement) #pragma intrinsic(_InterlockedDecrement) // _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and // overflow is defined to wrap, so we can pretend it is a uint32_t for our case typedef volatile long croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { _InterlockedIncrement(val); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return _InterlockedDecrement(val) == 0; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { // Per // https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access // > Simple reads and writes to properly-aligned 32-bit variables are atomic // > operations. In other words, you will not end up with only one portion // > of the variable updated; all bits are updated in an atomic fashion. return *val; } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE #include typedef uint32_t croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { *val += 1; } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { assert(*val > 0); *val -= 1; return val == 0; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return *val; } #else #error "Unknown atomic implementation" #endif #if defined(__GNUC__) || defined(__clang__) #define CROARING_DEPRECATED __attribute__((deprecated)) #else #define CROARING_DEPRECATED #endif // defined(__GNUC__) || defined(__clang__) // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). // See https://github.com/RoaringBitmap/CRoaring/issues/394 // There is no scenario where we want portability.h to // be included, but not isadetection.h: the latter is a // strict requirement. #endif /* INCLUDE_PORTABILITY_H_ */ /* end file include/roaring/portability.h */ /* begin file include/roaring/bitset/bitset.h */ #ifndef CROARING_CBITSET_BITSET_H #define CROARING_CBITSET_BITSET_H // For compatibility with MSVC with the use of `restrict` #if (__STDC_VERSION__ >= 199901L) || \ (defined(__GNUC__) && defined(__STDC_VERSION__)) #define CROARING_CBITSET_RESTRICT restrict #else #define CROARING_CBITSET_RESTRICT #endif // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && // defined(__STDC_VERSION__ )) #include #include #include #include #include #ifdef __cplusplus extern "C" { namespace roaring { namespace api { #endif struct bitset_s { uint64_t *CROARING_CBITSET_RESTRICT array; /* For simplicity and performance, we prefer to have a size and a capacity * that is a multiple of 64 bits. Thus we only track the size and the * capacity in terms of 64-bit words allocated */ size_t arraysize; size_t capacity; }; typedef struct bitset_s bitset_t; /* Create a new bitset. Return NULL in case of failure. */ bitset_t *bitset_create(void); /* Create a new bitset able to contain size bits. Return NULL in case of * failure. */ bitset_t *bitset_create_with_capacity(size_t size); /* Free memory. */ void bitset_free(bitset_t *bitset); /* Set all bits to zero. */ void bitset_clear(bitset_t *bitset); /* Set all bits to one. */ void bitset_fill(bitset_t *bitset); /* Create a copy */ bitset_t *bitset_copy(const bitset_t *bitset); /* For advanced users: Resize the bitset so that it can support newarraysize * * 64 bits. Return true in case of success, false for failure. Pad with zeroes * new buffer areas if requested. */ bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes); /* returns how many bytes of memory the backend buffer uses */ inline size_t bitset_size_in_bytes(const bitset_t *bitset) { return bitset->arraysize * sizeof(uint64_t); } /* returns how many bits can be accessed */ inline size_t bitset_size_in_bits(const bitset_t *bitset) { return bitset->arraysize * 64; } /* returns how many words (64-bit) of memory the backend buffer uses */ inline size_t bitset_size_in_words(const bitset_t *bitset) { return bitset->arraysize; } /* For advanced users: Grow the bitset so that it can support newarraysize * 64 * bits with padding. Return true in case of success, false for failure. */ bool bitset_grow(bitset_t *bitset, size_t newarraysize); /* attempts to recover unused memory, return false in case of * roaring_reallocation failure */ bool bitset_trim(bitset_t *bitset); /* shifts all bits by 's' positions so that the bitset representing values * 1,2,10 would represent values 1+s, 2+s, 10+s */ void bitset_shift_left(bitset_t *bitset, size_t s); /* shifts all bits by 's' positions so that the bitset representing values * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */ void bitset_shift_right(bitset_t *bitset, size_t s); /* Set the ith bit. Attempts to resize the bitset if needed (may silently fail) */ inline void bitset_set(bitset_t *bitset, size_t i) { size_t shiftedi = i / 64; if (shiftedi >= bitset->arraysize) { if (!bitset_grow(bitset, shiftedi + 1)) { return; } } bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64); } /* Set the ith bit to the specified value. Attempts to resize the bitset if * needed (may silently fail) */ inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) { size_t shiftedi = i / 64; uint64_t mask = ((uint64_t)1) << (i % 64); uint64_t dynmask = ((uint64_t)flag) << (i % 64); if (shiftedi >= bitset->arraysize) { if (!bitset_grow(bitset, shiftedi + 1)) { return; } } uint64_t w = bitset->array[shiftedi]; w &= ~mask; w |= dynmask; bitset->array[shiftedi] = w; } /* Get the value of the ith bit. */ inline bool bitset_get(const bitset_t *bitset, size_t i) { size_t shiftedi = i / 64; if (shiftedi >= bitset->arraysize) { return false; } return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0; } /* Count number of bits set. */ size_t bitset_count(const bitset_t *bitset); /* Find the index of the first bit set. Or zero if the bitset is empty. */ size_t bitset_minimum(const bitset_t *bitset); /* Find the index of the last bit set. Or zero if the bitset is empty. */ size_t bitset_maximum(const bitset_t *bitset); /* compute the union in-place (to b1), returns true if successful, to generate a * new bitset first call bitset_copy */ bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* report the size of the union (without materializing it) */ size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* compute the intersection in-place (to b1), to generate a new bitset first * call bitset_copy */ void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* report the size of the intersection (without materializing it) */ size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* returns true if the bitsets contain no common elements */ bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* returns true if the bitsets contain any common elements */ bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* returns true if b1 contains all of the set bits of b2 */ bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* compute the difference in-place (to b1), to generate a new bitset first call * bitset_copy */ void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* compute the size of the difference */ size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* compute the symmetric difference in-place (to b1), return true if successful, * to generate a new bitset first call bitset_copy */ bool bitset_inplace_symmetric_difference( bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* compute the size of the symmetric difference */ size_t bitset_symmetric_difference_count( const bitset_t *CROARING_CBITSET_RESTRICT b1, const bitset_t *CROARING_CBITSET_RESTRICT b2); /* iterate over the set bits like so : for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) { //..... } */ inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) { size_t x = *i / 64; if (x >= bitset->arraysize) { return false; } uint64_t w = bitset->array[x]; w >>= (*i & 63); if (w != 0) { *i += roaring_trailing_zeroes(w); return true; } x++; while (x < bitset->arraysize) { w = bitset->array[x]; if (w != 0) { *i = x * 64 + roaring_trailing_zeroes(w); return true; } x++; } return false; } /* iterate over the set bits like so : size_t buffer[256]; size_t howmany = 0; for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, &startfrom)) > 0 ; startfrom++) { //..... } */ inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, size_t capacity, size_t *startfrom) { if (capacity == 0) return 0; // sanity check size_t x = *startfrom / 64; if (x >= bitset->arraysize) { return 0; // nothing more to iterate over } uint64_t w = bitset->array[x]; w >>= (*startfrom & 63); size_t howmany = 0; size_t base = x << 6; while (howmany < capacity) { while (w != 0) { uint64_t t = w & (~w + 1); int r = roaring_trailing_zeroes(w); buffer[howmany++] = r + base; if (howmany == capacity) goto end; w ^= t; } x += 1; if (x == bitset->arraysize) { break; } base += 64; w = bitset->array[x]; } end: if (howmany > 0) { *startfrom = buffer[howmany - 1]; } return howmany; } typedef bool (*bitset_iterator)(size_t value, void *param); // return true if uninterrupted inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, void *ptr) { size_t base = 0; for (size_t i = 0; i < b->arraysize; ++i) { uint64_t w = b->array[i]; while (w != 0) { uint64_t t = w & (~w + 1); int r = roaring_trailing_zeroes(w); if (!iterator(r + base, ptr)) return false; w ^= t; } base += 64; } return true; } inline void bitset_print(const bitset_t *b) { printf("{"); for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { printf("%zu, ", i); } printf("}"); } #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace api { #endif #endif /* end file include/roaring/bitset/bitset.h */ /* begin file include/roaring/roaring.h */ /* * An implementation of Roaring Bitmaps in C. */ #ifndef ROARING_H #define ROARING_H #include #include // for `size_t` #include // Include other headers after roaring_types.h #ifdef __cplusplus extern "C" { namespace roaring { namespace api { #endif typedef struct roaring_bitmap_s { roaring_array_t high_low_container; } roaring_bitmap_t; /** * Dynamically allocates a new bitmap (initially empty). * Returns NULL if the allocation fails. * Capacity is a performance hint for how many "containers" the data will need. * Client is responsible for calling `roaring_bitmap_free()`. */ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); /** * Dynamically allocates a new bitmap (initially empty). * Returns NULL if the allocation fails. * Client is responsible for calling `roaring_bitmap_free()`. */ inline roaring_bitmap_t *roaring_bitmap_create(void) { return roaring_bitmap_create_with_capacity(0); } /** * Initialize a roaring bitmap structure in memory controlled by client. * Capacity is a performance hint for how many "containers" the data will need. * Can return false if auxiliary allocations fail when capacity greater than 0. */ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); /** * Initialize a roaring bitmap structure in memory controlled by client. * The bitmap will be in a "clear" state, with no auxiliary allocations. * Since this performs no allocations, the function will not fail. */ inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) { roaring_bitmap_init_with_capacity(r, 0); } /** * Add all the values between min (included) and max (excluded) that are at a * distance k*step from min. */ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); /** * Creates a new bitmap from a pointer of uint32_t integers */ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); /* * Whether you want to use copy-on-write. * Saves memory and avoids copies, but needs more care in a threaded context. * Most users should ignore this flag. * * Note: If you do turn this flag to 'true', enabling COW, then ensure that you * do so for all of your bitmaps, since interactions between bitmaps with and * without COW is unsafe. */ inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) { return r->high_low_container.flags & ROARING_FLAG_COW; } inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) { if (cow) { r->high_low_container.flags |= ROARING_FLAG_COW; } else { r->high_low_container.flags &= ~ROARING_FLAG_COW; } } roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset); /** * Describe the inner structure of the bitmap. */ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); /** * Creates a new bitmap from a list of uint32_t integers * * This function is deprecated, use `roaring_bitmap_from` instead, which * doesn't require the number of elements to be passed in. * * @see roaring_bitmap_from */ CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); #ifdef __cplusplus /** * Creates a new bitmap which contains all values passed in as arguments. * * To create a bitmap from a variable number of arguments, use the * `roaring_bitmap_of_ptr` function instead. */ // Use an immediately invoked closure, capturing by reference // (in case __VA_ARGS__ refers to context outside the closure) // Include a 0 at the beginning of the array to make the array length > 0 // (zero sized arrays are not valid in standard c/c++) #define roaring_bitmap_from(...) \ [&]() { \ const uint32_t roaring_bitmap_from_array[] = {0, __VA_ARGS__}; \ return roaring_bitmap_of_ptr((sizeof(roaring_bitmap_from_array) / \ sizeof(roaring_bitmap_from_array[0])) - \ 1, \ &roaring_bitmap_from_array[1]); \ }() #else /** * Creates a new bitmap which contains all values passed in as arguments. * * To create a bitmap from a variable number of arguments, use the * `roaring_bitmap_of_ptr` function instead. */ // While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof // expression, which is an unevaluated context, so it's even safe in the case // where expressions passed have side effects (roaring64_bitmap_from(my_func(), // ++i)) // Include a 0 at the beginning of the array to make the array length > 0 // (zero sized arrays are not valid in standard c/c++) #define roaring_bitmap_from(...) \ roaring_bitmap_of_ptr( \ (sizeof((const uint32_t[]){0, __VA_ARGS__}) / sizeof(uint32_t)) - 1, \ &((const uint32_t[]){0, __VA_ARGS__})[1]) #endif /** * Copies a bitmap (this does memory allocation). * The caller is responsible for memory management. */ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); /** * Copies a bitmap from src to dest. It is assumed that the pointer dest * is to an already allocated bitmap. The content of the dest bitmap is * freed/deleted. * * It might be preferable and simpler to call roaring_bitmap_copy except * that roaring_bitmap_overwrite can save on memory allocations. * * Returns true if successful, or false if there was an error. On failure, * the dest bitmap is left in a valid, empty state (even if it was not empty * before). */ bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, const roaring_bitmap_t *src); /** * Print the content of the bitmap. */ void roaring_bitmap_printf(const roaring_bitmap_t *r); /** * Computes the intersection between two bitmaps and returns new bitmap. The * caller is responsible for memory management. * * Performance hint: if you are computing the intersection between several * bitmaps, two-by-two, it is best to start with the smallest bitmap. * You may also rely on roaring_bitmap_and_inplace to avoid creating * many temporary bitmaps. */ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the intersection between two bitmaps. */ uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Check whether two bitmaps intersect. */ bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Check whether a bitmap and an open range intersect. */ bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, uint64_t y); /** * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto * distance, or the Jaccard similarity coefficient) * * The Jaccard index is undefined if both bitmaps are empty. */ double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the union between two bitmaps. */ uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the difference (andnot) between two bitmaps. */ uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the size of the symmetric difference (xor) between two bitmaps. */ uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of `roaring_bitmap_and()`, modifies r1 * r1 == r2 is allowed. * * Performance hint: if you are computing the intersection between several * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. */ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of `roaring_bitmap_or(), modifies r1. * TODO: decide whether r1 == r2 ok */ void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Compute the union of 'number' bitmaps. * Caller is responsible for freeing the result. * See also `roaring_bitmap_or_many_heap()` */ roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **rs); /** * Compute the union of 'number' bitmaps using a heap. This can sometimes be * faster than `roaring_bitmap_or_many() which uses a naive algorithm. * Caller is responsible for freeing the result. */ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, const roaring_bitmap_t **rs); /** * Computes the symmetric difference (xor) between two bitmaps * and returns new bitmap. The caller is responsible for memory management. */ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2. */ void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Compute the xor of 'number' bitmaps. * Caller is responsible for freeing the result. */ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, const roaring_bitmap_t **rs); /** * Computes the difference (andnot) between two bitmaps and returns new bitmap. * Caller is responsible for freeing the result. */ roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2. */ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * TODO: consider implementing: * * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is * responsible for freeing the result."" * * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, * const roaring_bitmap_t **rs); */ /** * Frees the memory. */ void roaring_bitmap_free(const roaring_bitmap_t *r); /** * A bit of context usable with `roaring_bitmap_*_bulk()` functions * * Should be initialized with `{0}` (or `memset()` to all zeros). * Callers should treat it as an opaque type. * * A context may only be used with a single bitmap * (unless re-initialized to zero), and any modification to a bitmap * (other than modifications performed with `_bulk()` functions with the context * passed) will invalidate any contexts associated with that bitmap. */ typedef struct roaring_bulk_context_s { ROARING_CONTAINER_T *container; int idx; uint16_t key; uint8_t typecode; } roaring_bulk_context_t; /** * Add an item, using context from a previous insert for speed optimization. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first * call to this function. * * Modifying the bitmap in any way (other than `-bulk` suffixed functions) * will invalidate the stored context, calling this function with a non-zero * context after doing any modification invokes undefined behavior. * * In order to exploit this optimization, the caller should call this function * with values with the same "key" (high 16 bits of the value) consecutively. */ void roaring_bitmap_add_bulk(roaring_bitmap_t *r, roaring_bulk_context_t *context, uint32_t val); /** * Add value n_args from pointer vals, faster than repeatedly calling * `roaring_bitmap_add()` * * In order to exploit this optimization, the caller should attempt to keep * values with the same "key" (high 16 bits of the value) as consecutive * elements in `vals` */ void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); /** * Add value x */ void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); /** * Add value x * Returns true if a new value was added, false if the value already existed. */ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); /** * Add all values in range [min, max] */ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max); /** * Add all values in range [min, max) */ inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { if (max <= min) return; roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } /** * Remove value x */ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); /** * Remove all values in range [min, max] */ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max); /** * Remove all values in range [min, max) */ inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { if (max <= min) return; roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } /** * Remove multiple values */ void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); /** * Remove value x * Returns true if a new value was removed, false if the value was not existing. */ bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); /** * Check if value is present */ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); /** * Check whether a range of values from range_start (included) * to range_end (excluded) is present */ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); /** * Check if an items is present, using context from a previous insert or search * for speed optimization. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first * call to this function. * * Modifying the bitmap in any way (other than `-bulk` suffixed functions) * will invalidate the stored context, calling this function with a non-zero * context after doing any modification invokes undefined behavior. * * In order to exploit this optimization, the caller should call this function * with values with the same "key" (high 16 bits of the value) consecutively. */ bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, roaring_bulk_context_t *context, uint32_t val); /** * Get the cardinality of the bitmap (number of elements). */ uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); /** * Returns the number of elements in the range [range_start, range_end). */ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); /** * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap * was initialized in client memory via roaring_bitmap_init(), then a call to * roaring_bitmap_clear() would be enough to "free" it) */ void roaring_bitmap_clear(roaring_bitmap_t *r); /** * Convert the bitmap to a sorted array, output in `ans`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t)); */ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); /** * Store the bitmap to a bitset. This can be useful for people * who need the performance and simplicity of a standard bitset. * We assume that the input bitset is originally empty (does not * have any set bit). * * bitset_t * out = bitset_create(); * // if the bitset has content in it, call "bitset_clear(out)" * bool success = roaring_bitmap_to_bitset(mybitmap, out); * // on failure, success will be false. * // You can then query the bitset: * bool is_present = bitset_get(out, 10011 ); * // you must free the memory: * bitset_free(out); * */ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset); /** * Convert the bitmap to a sorted array from `offset` by `limit`, output in * `ans`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t)); * * Return false in case of failure (e.g., insufficient memory) */ bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, size_t limit, uint32_t *ans); /** * Remove run-length encoding even when it is more space efficient. * Return whether a change was applied. */ bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); /** * Convert array and bitmap containers to run containers when it is more * efficient; also convert from run containers when more space efficient. * * Returns true if the result has at least one run container. * Additional savings might be possible by calling `shrinkToFit()`. */ bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); /** * If needed, reallocate memory to shrink the memory usage. * Returns the number of bytes saved. */ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); /** * Write the bitmap to an output pointer, this output buffer should refer to * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes. * * See `roaring_bitmap_portable_serialize()` if you want a format that's * compatible with Java and Go implementations. This format can sometimes be * more space efficient than the portable form, e.g. when the data is sparse. * * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); /** * Use with `roaring_bitmap_serialize()`. * * (See `roaring_bitmap_portable_deserialize()` if you want a format that's * compatible with Java and Go implementations). * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); /** * Use with `roaring_bitmap_serialize()`. * * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's * compatible with Java and Go implementations). * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. * * The difference with `roaring_bitmap_deserialize()` is that this function * checks that the input buffer is a valid bitmap. If the buffer is too small, * NULL is returned. */ roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes); /** * How many bytes are required to serialize this bitmap (NOT compatible * with Java and Go versions) */ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); /** * Read bitmap from a serialized buffer. * In case of failure, NULL is returned. * * This function is unsafe in the sense that if there is no valid serialized * bitmap at the pointer, then many bytes could be read, possibly causing a * buffer overflow. See also roaring_bitmap_portable_deserialize_safe(). * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); /** * Read bitmap from a serialized buffer safely (reading up to maxbytes). * In case of failure, NULL is returned. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * * The function itself is safe in the sense that it will not cause buffer * overflows. However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to * sensible results. In particular, the serialized array containers need to be * in sorted order, and the run containers should be in sorted non-overlapping * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * * You may use roaring_bitmap_internal_validate to check the validity of the * bitmap prior to using it. You may also use other strategies to check for * corrupted inputs (e.g., checksums). * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); /** * Read bitmap from a serialized buffer. * In case of failure, NULL is returned. * * Bitmap returned by this function can be used in all readonly contexts. * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. * * The function is unsafe in the following ways: * 1) It may execute unaligned memory accesses. * 2) A buffer overflow may occur if buf does not point to a valid serialized * bitmap. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); /** * Check how many bytes would be read (up to maxbytes) at this pointer if there * is a bitmap, returns zero if there is no valid bitmap. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** * How many bytes are required to serialize this bitmap. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); /** * Write a bitmap to a char buffer. The output buffer should refer to at least * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. * * Returns how many bytes were written which should match * `roaring_bitmap_portable_size_in_bytes(r)`. * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); /* * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. * Deserialized bitmap is a constant view of the underlying buffer. * This significantly reduces amount of allocations and copying required during * deserialization. * It can be used with memory mapped files. * Example can be found in benchmarks/frozen_benchmark.c * * [#####] const roaring_bitmap_t * * | | | * +----+ | +-+ * | | | * [#####################################] underlying buffer * * Note that because frozen serialization format imitates C memory layout * of roaring_bitmap_t, it is not fixed. It is different on big/little endian * platforms and can be changed in future. */ /** * Returns number of bytes required to serialize bitmap using frozen format. */ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); /** * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); /** * Creates constant bitmap that is a view of a given buffer. * Buffer data should have been written by `roaring_bitmap_frozen_serialize()` * Its beginning must also be aligned by 32 bytes. * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`. * In case of failure, NULL is returned. * * Bitmap returned by this function can be used in all readonly contexts. * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); /** * Iterate over the bitmap elements. The function iterator is called once for * all the values with ptr (can be NULL) as the second parameter of each call. * * `roaring_iterator` is simply a pointer to a function that returns bool * (true means that the iteration should continue while false means that it * should stop), and takes (uint32_t,void*) as inputs. * * Returns true if the roaring_iterator returned true throughout (so that all * data points were necessarily visited). * * Iteration is ordered: from the smallest to the largest elements. */ bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, void *ptr); bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); /** * Return true if the two bitmaps contain the same elements. */ bool roaring_bitmap_equals(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2. */ bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2, and r2 is strictly * greater than r1. */ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * (For expert users who seek high performance.) * * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. * * The lazy version defers some computations such as the maintenance of the * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` * after executing "lazy" computations. * * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. * * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. */ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2, const bool bitsetconversion); /** * (For expert users who seek high performance.) * * Inplace version of roaring_bitmap_lazy_or, modifies r1. * * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. */ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2, const bool bitsetconversion); /** * (For expert users who seek high performance.) * * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()` * or modified with `roaring_bitmap_lazy_or_inplace()`. */ void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); /** * Computes the symmetric difference between two bitmaps and returns new bitmap. * The caller is responsible for memory management. * * The lazy version defers some computations such as the maintenance of the * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` * after executing "lazy" computations. * * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on * the result. */ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * (For expert users who seek high performance.) * * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2 */ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); /** * Compute the negation of the bitmap in the interval [range_start, range_end). * The number of negated values is range_end - range_start. * Areas outside the range are passed through unchanged. */ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** * compute (in place) the negation of the roaring bitmap within a specified * interval: [range_start, range_end). The number of negated values is * range_end - range_start. * Areas outside the range are passed through unchanged. */ void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the roaring bitmap is strictly greater than rank, then this * function returns true and sets element to the element of given rank. * Otherwise, it returns false. */ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint32_t *element); /** * roaring_bitmap_rank returns the number of integers that are smaller or equal * to x. Thus if x is the first element, this function will return 1. If * x is smaller than the smallest element, this function will return 0. * * The indexing convention differs between roaring_bitmap_select and * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking * the smallest value. */ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); /** * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank` * it puts rank value of each element in `[begin .. end)` to `ans[]` * * the values in `[begin .. end)` must be sorted in Ascending order; * Caller is responsible to ensure that there is enough memory allocated, e.g. * * ans = malloc((end-begin) * sizeof(uint64_t)); */ void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t *begin, const uint32_t *end, uint64_t *ans); /** * Returns the index of x in the given roaring bitmap. * If the roaring bitmap doesn't contain x , this function will return -1. * The difference with rank function is that this function will return -1 when x * is not the element of roaring bitmap, but the rank function will return a * non-negative number. */ int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x); /** * Returns the smallest value in the set, or UINT32_MAX if the set is empty. */ uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); /** * Returns the greatest value in the set, or 0 if the set is empty. */ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); /** * (For advanced users.) * * Collect statistics about the bitmap, see roaring_types.h for * a description of roaring_statistics_t */ void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat); /** * Perform internal consistency checks. Returns true if the bitmap is * consistent. It may be useful to call this after deserializing bitmaps from * untrusted sources. If roaring_bitmap_internal_validate returns true, then the * bitmap should be consistent and can be trusted not to cause crashes or memory * corruption. * * Note that some operations intentionally leave bitmaps in an inconsistent * state temporarily, for example, `roaring_bitmap_lazy_*` functions, until * `roaring_bitmap_repair_after_lazy` is called. * * If reason is non-null, it will be set to a string describing the first * inconsistency found if any. */ bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason); /********************* * What follows is code use to iterate through values in a roaring bitmap roaring_bitmap_t *r =... roaring_uint32_iterator_t i; roaring_iterator_create(r, &i); while(i.has_value) { printf("value = %d\n", i.current_value); roaring_uint32_iterator_advance(&i); } Obviously, if you modify the underlying bitmap, the iterator becomes invalid. So don't. */ /** * A struct used to keep iterator state. Users should only access * `current_value` and `has_value`, the rest of the type should be treated as * opaque. */ typedef struct roaring_uint32_iterator_s { const roaring_bitmap_t *parent; // Owner const ROARING_CONTAINER_T *container; // Current container uint8_t typecode; // Typecode of current container int32_t container_index; // Current container index uint32_t highbits; // High 16 bits of the current value roaring_container_iterator_t container_it; uint32_t current_value; bool has_value; } roaring_uint32_iterator_t; /** * Initialize an iterator object that can be used to iterate through the values. * If there is a value, then this iterator points to the first value and * `it->has_value` is true. The value is in `it->current_value`. */ void roaring_iterator_init(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** DEPRECATED, use `roaring_iterator_init`. */ CROARING_DEPRECATED static inline void roaring_init_iterator( const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) { roaring_iterator_init(r, newit); } /** * Initialize an iterator object that can be used to iterate through the values. * If there is a value, then this iterator points to the last value and * `it->has_value` is true. The value is in `it->current_value`. */ void roaring_iterator_init_last(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** DEPRECATED, use `roaring_iterator_init_last`. */ CROARING_DEPRECATED static inline void roaring_init_iterator_last( const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) { roaring_iterator_init_last(r, newit); } /** * Create an iterator object that can be used to iterate through the values. * Caller is responsible for calling `roaring_free_iterator()`. * * The iterator is initialized (this function calls `roaring_iterator_init()`) * If there is a value, then this iterator points to the first value and * `it->has_value` is true. The value is in `it->current_value`. */ roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r); /** DEPRECATED, use `roaring_iterator_create`. */ CROARING_DEPRECATED static inline roaring_uint32_iterator_t * roaring_create_iterator(const roaring_bitmap_t *r) { return roaring_iterator_create(r); } /** * Advance the iterator. If there is a new value, then `it->has_value` is true. * The new value is in `it->current_value`. Values are traversed in increasing * orders. For convenience, returns `it->has_value`. * * Once `it->has_value` is false, `roaring_uint32_iterator_advance` should not * be called on the iterator again. Calling `roaring_uint32_iterator_previous` * is allowed. */ bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it); /** DEPRECATED, use `roaring_uint32_iterator_advance`. */ CROARING_DEPRECATED static inline bool roaring_advance_uint32_iterator( roaring_uint32_iterator_t *it) { return roaring_uint32_iterator_advance(it); } /** * Decrement the iterator. If there's a new value, then `it->has_value` is true. * The new value is in `it->current_value`. Values are traversed in decreasing * order. For convenience, returns `it->has_value`. * * Once `it->has_value` is false, `roaring_uint32_iterator_previous` should not * be called on the iterator again. Calling `roaring_uint32_iterator_advance` is * allowed. */ bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it); /** DEPRECATED, use `roaring_uint32_iterator_previous`. */ CROARING_DEPRECATED static inline bool roaring_previous_uint32_iterator( roaring_uint32_iterator_t *it) { return roaring_uint32_iterator_previous(it); } /** * Move the iterator to the first value >= `val`. If there is a such a value, * then `it->has_value` is true. The new value is in `it->current_value`. * For convenience, returns `it->has_value`. */ bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val); /** DEPRECATED, use `roaring_uint32_iterator_move_equalorlarger`. */ CROARING_DEPRECATED static inline bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { return roaring_uint32_iterator_move_equalorlarger(it, val); } /** * Creates a copy of an iterator. * Caller must free it. */ roaring_uint32_iterator_t *roaring_uint32_iterator_copy( const roaring_uint32_iterator_t *it); /** DEPRECATED, use `roaring_uint32_iterator_copy`. */ CROARING_DEPRECATED static inline roaring_uint32_iterator_t * roaring_copy_uint32_iterator(const roaring_uint32_iterator_t *it) { return roaring_uint32_iterator_copy(it); } /** * Free memory following `roaring_iterator_create()` */ void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it); /** DEPRECATED, use `roaring_uint32_iterator_free`. */ CROARING_DEPRECATED static inline void roaring_free_uint32_iterator( roaring_uint32_iterator_t *it) { roaring_uint32_iterator_free(it); } /* * Reads next ${count} values from iterator into user-supplied ${buf}. * Returns the number of read elements. * This number can be smaller than ${count}, which means that iterator is * drained. * * This function satisfies semantics of iteration and can be used together with * other iterator functions. * - first value is copied from ${it}->current_value * - after function returns, iterator is positioned at the next element */ uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count); /** DEPRECATED, use `roaring_uint32_iterator_read`. */ CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator( roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) { return roaring_uint32_iterator_read(it, buf, count); } #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace api { #endif #endif /* ROARING_H */ #ifdef __cplusplus /** * Best practices for C++ headers is to avoid polluting global scope. * But for C compatibility when just `roaring.h` is included building as * C++, default to global access for the C public API. * * BUT when `roaring.hh` is included instead, it sets this flag. That way * explicit namespacing must be used to get the C functions. * * This is outside the include guard so that if you include BOTH headers, * the order won't matter; you still get the global definitions. */ #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) using namespace ::roaring::api; #endif #endif /* end file include/roaring/roaring.h */ /* begin file include/roaring/memory.h */ #ifndef INCLUDE_ROARING_MEMORY_H_ #define INCLUDE_ROARING_MEMORY_H_ #ifdef __cplusplus extern "C" { #endif #include // for size_t typedef void* (*roaring_malloc_p)(size_t); typedef void* (*roaring_realloc_p)(void*, size_t); typedef void* (*roaring_calloc_p)(size_t, size_t); typedef void (*roaring_free_p)(void*); typedef void* (*roaring_aligned_malloc_p)(size_t, size_t); typedef void (*roaring_aligned_free_p)(void*); typedef struct roaring_memory_s { roaring_malloc_p malloc; roaring_realloc_p realloc; roaring_calloc_p calloc; roaring_free_p free; roaring_aligned_malloc_p aligned_malloc; roaring_aligned_free_p aligned_free; } roaring_memory_t; void roaring_init_memory_hook(roaring_memory_t memory_hook); void* roaring_malloc(size_t); void* roaring_realloc(void*, size_t); void* roaring_calloc(size_t, size_t); void roaring_free(void*); void* roaring_aligned_malloc(size_t, size_t); void roaring_aligned_free(void*); #ifdef __cplusplus } #endif #endif // INCLUDE_ROARING_MEMORY_H_ /* end file include/roaring/memory.h */ /* begin file include/roaring/roaring64.h */ #ifndef ROARING64_H #define ROARING64_H #include #include #include #ifdef __cplusplus extern "C" { namespace roaring { namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; typedef struct roaring64_leaf_s roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** * A bit of context usable with `roaring64_bitmap_*_bulk()` functions. * * Should be initialized with `{0}` (or `memset()` to all zeros). * Callers should treat it as an opaque type. * * A context may only be used with a single bitmap (unless re-initialized to * zero), and any modification to a bitmap (other than modifications performed * with `_bulk()` functions with the context passed) will invalidate any * contexts associated with that bitmap. */ typedef struct roaring64_bulk_context_s { uint8_t high_bytes[6]; roaring64_leaf_t *leaf; } roaring64_bulk_context_t; /** * Dynamically allocates a new bitmap (initially empty). * Client is responsible for calling `roaring64_bitmap_free()`. */ roaring64_bitmap_t *roaring64_bitmap_create(void); void roaring64_bitmap_free(roaring64_bitmap_t *r); /** * Returns a copy of a bitmap. */ roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r); /** * Creates a new bitmap of a pointer to N 64-bit integers. */ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, const uint64_t *vals); #ifdef __cplusplus /** * Creates a new bitmap which contains all values passed in as arguments. * * To create a bitmap from a variable number of arguments, use the * `roaring64_bitmap_of_ptr` function instead. */ // Use an immediately invoked closure, capturing by reference // (in case __VA_ARGS__ refers to context outside the closure) // Include a 0 at the beginning of the array to make the array length > 0 // (zero sized arrays are not valid in standard c/c++) #define roaring64_bitmap_from(...) \ [&]() { \ const uint64_t roaring64_bitmap_from_array[] = {0, __VA_ARGS__}; \ return roaring64_bitmap_of_ptr( \ (sizeof(roaring64_bitmap_from_array) / \ sizeof(roaring64_bitmap_from_array[0])) - \ 1, \ &roaring64_bitmap_from_array[1]); \ }() #else /** * Creates a new bitmap which contains all values passed in as arguments. * * To create a bitmap from a variable number of arguments, use the * `roaring64_bitmap_of_ptr` function instead. */ // While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof // expression, which is an unevaluated context, so it's even safe in the case // where expressions passed have side effects (roaring64_bitmap_from(my_func(), // ++i)) // Include a 0 at the beginning of the array to make the array length > 0 // (zero sized arrays are not valid in standard c/c++) #define roaring64_bitmap_from(...) \ roaring64_bitmap_of_ptr( \ (sizeof((const uint64_t[]){0, __VA_ARGS__}) / sizeof(uint64_t)) - 1, \ &((const uint64_t[]){0, __VA_ARGS__})[1]) #endif /** * Create a new bitmap containing all the values in [min, max) that are at a * distance k*step from min. */ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint64_t step); /** * Adds the provided value to the bitmap. */ void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val); /** * Adds the provided value to the bitmap. * Returns true if a new value was added, false if the value already existed. */ bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val); /** * Add an item, using context from a previous insert for faster insertion. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first * call to this function. * * Modifying the bitmap in any way (other than `-bulk` suffixed functions) * will invalidate the stored context, calling this function with a non-zero * context after doing any modification invokes undefined behavior. * * In order to exploit this optimization, the caller should call this function * with values with the same high 48 bits of the value consecutively. */ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, roaring64_bulk_context_t *context, uint64_t val); /** * Add `n_args` values from `vals`, faster than repeatedly calling * `roaring64_bitmap_add()` * * In order to exploit this optimization, the caller should attempt to keep * values with the same high 48 bits of the value as consecutive elements in * `vals`. */ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals); /** * Add all values in range [min, max). */ void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Add all values in range [min, max]. */ void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Removes a value from the bitmap if present. */ void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val); /** * Removes a value from the bitmap if present, returns true if the value was * removed and false if the value was not present. */ bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val); /** * Remove an item, using context from a previous insert for faster removal. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first * call to this function. * * Modifying the bitmap in any way (other than `-bulk` suffixed functions) * will invalidate the stored context, calling this function with a non-zero * context after doing any modification invokes undefined behavior. * * In order to exploit this optimization, the caller should call this function * with values with the same high 48 bits of the value consecutively. */ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, roaring64_bulk_context_t *context, uint64_t val); /** * Remove `n_args` values from `vals`, faster than repeatedly calling * `roaring64_bitmap_remove()` * * In order to exploit this optimization, the caller should attempt to keep * values with the same high 48 bits of the value as consecutive elements in * `vals`. */ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals); /** * Remove all values in range [min, max). */ void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Remove all values in range [min, max]. */ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Returns true if the provided value is present. */ bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val); /** * Returns true if all values in the range [min, max) are present. */ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Check if an item is present using context from a previous insert or search * for faster search. * * `context` will be used to store information between calls to make bulk * operations faster. `*context` should be zero-initialized before the first * call to this function. * * Modifying the bitmap in any way (other than `-bulk` suffixed functions) * will invalidate the stored context, calling this function with a non-zero * context after doing any modification invokes undefined behavior. * * In order to exploit this optimization, the caller should call this function * with values with the same high 48 bits of the value consecutively. */ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, roaring64_bulk_context_t *context, uint64_t val); /** * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the bitmap is strictly greater than rank, then this function * returns true and sets element to the element of given rank. Otherwise, it * returns false. */ bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element); /** * Returns the number of integers that are smaller or equal to x. Thus if x is * the first element, this function will return 1. If x is smaller than the * smallest element, this function will return 0. * * The indexing convention differs between roaring64_bitmap_select and * roaring64_bitmap_rank: roaring_bitmap64_select refers to the smallest value * as having index 0, whereas roaring64_bitmap_rank returns 1 when ranking * the smallest value. */ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val); /** * Returns true if the given value is in the bitmap, and sets `out_index` to the * (0-based) index of the value in the bitmap. Returns false if the value is not * in the bitmap. */ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint64_t *out_index); /** * Returns the number of values in the bitmap. */ uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r); /** * Returns the number of elements in the range [min, max). */ uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Returns true if the bitmap is empty (cardinality is zero). */ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r); /** * Returns the smallest value in the set, or UINT64_MAX if the set is empty. */ uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r); /** * Returns the largest value in the set, or 0 if empty. */ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); /** * Returns true if the result has at least one run container. */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); /** * Perform internal consistency checks. * * Returns true if the bitmap is consistent. It may be useful to call this * after deserializing bitmaps from untrusted sources. If * roaring64_bitmap_internal_validate returns true, then the bitmap is * consistent and can be trusted not to cause crashes or memory corruption. * * If reason is non-null, it will be set to a string describing the first * inconsistency found if any. */ bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason); /** * Return true if the two bitmaps contain the same elements. */ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2. */ bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Return true if all the elements of r1 are also in r2, and r2 is strictly * greater than r1. */ bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the intersection between two bitmaps and returns new bitmap. The * caller is responsible for free-ing the result. * * Performance hint: if you are computing the intersection between several * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary * bitmaps. */ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the size of the intersection between two bitmaps. */ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * In-place version of `roaring64_bitmap_and()`, modifies `r1`. `r1` and `r2` * are allowed to be equal. * * Performance hint: if you are computing the intersection between several * bitmaps, two-by-two, it is best to start with the smallest bitmaps. */ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Check whether two bitmaps intersect. */ bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Check whether a bitmap intersects the range [min, max). */ bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto * distance, or the Jaccard similarity coefficient) * * The Jaccard index is undefined if both bitmaps are empty. */ double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for free-ing the result. */ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the size of the union between two bitmaps. */ uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * In-place version of `roaring64_bitmap_or(), modifies `r1`. */ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the symmetric difference (xor) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. */ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the size of the symmetric difference (xor) between two bitmaps. */ uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * In-place version of `roaring64_bitmap_xor()`, modifies `r1`. `r1` and `r2` * are not allowed to be equal (that would result in an empty bitmap). */ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the difference (andnot) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. */ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Computes the size of the difference (andnot) between two bitmaps. */ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * In-place version of `roaring64_bitmap_andnot()`, modifies `r1`. `r1` and `r2` * are not allowed to be equal (that would result in an empty bitmap). */ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); /** * Compute the negation of the bitmap in the interval [min, max). * The number of negated values is `max - min`. Areas outside the range are * passed through unchanged. */ roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * Compute the negation of the bitmap in the interval [min, max]. * The number of negated values is `max - min + 1`. Areas outside the range are * passed through unchanged. */ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * In-place version of `roaring64_bitmap_flip`. Compute the negation of the * bitmap in the interval [min, max). The number of negated values is `max - * min`. Areas outside the range are passed through unchanged. */ void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * In-place version of `roaring64_bitmap_flip_closed`. Compute the negation of * the bitmap in the interval [min, max]. The number of negated values is `max - * min + 1`. Areas outside the range are passed through unchanged. */ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max); /** * How many bytes are required to serialize this bitmap. * * This is meant to be compatible with other languages: * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations */ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r); /** * Write a bitmap to a buffer. The output buffer should refer to at least * `roaring64_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. * * Returns how many bytes were written, which should match * `roaring64_bitmap_portable_size_in_bytes(r)`. * * This is meant to be compatible with other languages: * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf); /** * Check how many bytes would be read (up to maxbytes) at this pointer if there * is a valid bitmap, returns zero if there is no valid bitmap. * * This is meant to be compatible with other languages * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations */ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** * Read a bitmap from a serialized buffer safely (reading up to maxbytes). * In case of failure, NULL is returned. * * This is meant to be compatible with other languages * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations * * The function itself is safe in the sense that it will not cause buffer * overflows. However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to * sensible results. In particular, the serialized array containers need to be * in sorted order, and the run containers should be in sorted non-overlapping * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. */ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call. * * `roaring_iterator64` is simply a pointer to a function that returns a bool * and takes `(uint64_t, void*)` as inputs. True means that the iteration should * continue, while false means that it should stop. * * Returns true if the `roaring64_iterator` returned true throughout (so that * all data points were necessarily visited). * * Iteration is ordered from the smallest to the largest elements. */ bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr); /** * Convert the bitmap to a sorted array `out`. * * Caller is responsible to ensure that there is enough memory allocated, e.g. * ``` * out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t)); * ``` */ void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, uint64_t *out); /** * Create an iterator object that can be used to iterate through the values. * Caller is responsible for calling `roaring64_iterator_free()`. * * The iterator is initialized. If there is a value, then this iterator points * to the first value and `roaring64_iterator_has_value()` returns true. The * value can be retrieved with `roaring64_iterator_value()`. */ roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r); /** * Create an iterator object that can be used to iterate through the values. * Caller is responsible for calling `roaring64_iterator_free()`. * * The iterator is initialized. If there is a value, then this iterator points * to the last value and `roaring64_iterator_has_value()` returns true. The * value can be retrieved with `roaring64_iterator_value()`. */ roaring64_iterator_t *roaring64_iterator_create_last( const roaring64_bitmap_t *r); /** * Re-initializes an existing iterator. Functionally the same as * `roaring64_iterator_create` without a allocation. */ void roaring64_iterator_reinit(const roaring64_bitmap_t *r, roaring64_iterator_t *it); /** * Re-initializes an existing iterator. Functionally the same as * `roaring64_iterator_create_last` without a allocation. */ void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, roaring64_iterator_t *it); /** * Creates a copy of the iterator. Caller is responsible for calling * `roaring64_iterator_free()` on the resulting iterator. */ roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it); /** * Free the iterator. */ void roaring64_iterator_free(roaring64_iterator_t *it); /** * Returns true if the iterator currently points to a value. If so, calling * `roaring64_iterator_value()` returns the value. */ bool roaring64_iterator_has_value(const roaring64_iterator_t *it); /** * Returns the value the iterator currently points to. Should only be called if * `roaring64_iterator_has_value()` returns true. */ uint64_t roaring64_iterator_value(const roaring64_iterator_t *it); /** * Advance the iterator. If there is a new value, then * `roaring64_iterator_has_value()` returns true. Values are traversed in * increasing order. For convenience, returns the result of * `roaring64_iterator_has_value()`. * * Once this returns false, `roaring64_iterator_advance` should not be called on * the iterator again. Calling `roaring64_iterator_previous` is allowed. */ bool roaring64_iterator_advance(roaring64_iterator_t *it); /** * Decrement the iterator. If there is a new value, then * `roaring64_iterator_has_value()` returns true. Values are traversed in * decreasing order. For convenience, returns the result of * `roaring64_iterator_has_value()`. * * Once this returns false, `roaring64_iterator_previous` should not be called * on the iterator again. Calling `roaring64_iterator_advance` is allowed. */ bool roaring64_iterator_previous(roaring64_iterator_t *it); /** * Move the iterator to the first value greater than or equal to `val`, if it * exists at or after the current position of the iterator. If there is a new * value, then `roaring64_iterator_has_value()` returns true. Values are * traversed in increasing order. For convenience, returns the result of * `roaring64_iterator_has_value()`. */ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint64_t val); /** * Reads up to `count` values from the iterator into the given `buf`. Returns * the number of elements read. The number of elements read can be smaller than * `count`, which means that there are no more elements in the bitmap. * * This function can be used together with other iterator functions. */ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t count); #ifdef __cplusplus } // extern "C" } // namespace roaring } // namespace api #endif #endif /* ROARING64_H */ /* end file include/roaring/roaring64.h */