#if defined(__GNUC__) && (__GNUC__ >= 4) // Default to using the GCC builtin popcount. On architectures with // -march popcnt, this compiles to a single popcnt instruction. #ifndef popcountll #define popcountll __builtin_popcountll #else #define popcountll sux_popcountll #endif #else /* * * from https://github.com/efficient/rankselect/popcount.h * * licensed under Apache 2 */ /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef _FASTRANK_POPCOUNT_H_ #define _FASTRANK_POPCOUNT_H_ #include #include #define L8 0x0101010101010101ULL // Every lowest 8th bit set: 00000001... #define G2 0xAAAAAAAAAAAAAAAAULL // Every highest 2nd bit: 101010... #define G4 0x3333333333333333ULL // 00110011 ... used to group the sum of 4 bits. #define G8 0x0F0F0F0F0F0F0F0FULL #define H8 0x8080808080808080ULL #define L9 0x0040201008040201ULL #define H9 (L9 << 8) #define L16 0x0001000100010001ULL #define H16 0x8000800080008000ULL #define ONES_STEP_4 ( 0x1111111111111111ULL ) #define ONES_STEP_8 ( 0x0101010101010101ULL ) #define ONES_STEP_9 ( 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 | 1ULL << 36 | 1ULL << 45 | 1ULL << 54 ) #define ONES_STEP_16 ( 1ULL << 0 | 1ULL << 16 | 1ULL << 32 | 1ULL << 48 ) #define MSBS_STEP_4 ( 0x8ULL * ONES_STEP_4 ) #define MSBS_STEP_8 ( 0x80ULL * ONES_STEP_8 ) #define MSBS_STEP_9 ( 0x100ULL * ONES_STEP_9 ) #define MSBS_STEP_16 ( 0x8000ULL * ONES_STEP_16 ) #define INCR_STEP_8 ( 0x80ULL << 56 | 0x40ULL << 48 | 0x20ULL << 40 | 0x10ULL << 32 | 0x8ULL << 24 | 0x4ULL << 16 | 0x2ULL << 8 | 0x1 ) #define ONES_STEP_32 ( 0x0000000100000001ULL ) #define MSBS_STEP_32 ( 0x8000000080000000ULL ) #define COMPARE_STEP_8(x,y) ( ( ( ( ( (x) | MSBS_STEP_8 ) - ( (y) & ~MSBS_STEP_8 ) ) ^ (x) ^ ~(y) ) & MSBS_STEP_8 ) >> 7 ) #define LEQ_STEP_8(x,y) ( ( ( ( ( (y) | MSBS_STEP_8 ) - ( (x) & ~MSBS_STEP_8 ) ) ^ (x) ^ (y) ) & MSBS_STEP_8 ) >> 7 ) #define UCOMPARE_STEP_9(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_9 ) - ( (y) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_9 ) >> 8 ) #define UCOMPARE_STEP_16(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_16 ) - ( (y) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_16 ) >> 15 ) #define ULEQ_STEP_9(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_9 ) - ( (x) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_9 ) >> 8 ) #define ULEQ_STEP_16(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_16 ) - ( (x) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_16 ) >> 15 ) #define ZCOMPARE_STEP_8(x) ( ( ( x | ( ( x | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >> 7 ) // Population count of a 64 bit integer in SWAR (SIMD within a register) style // From Sebastiano Vigna, "Broadword Implementation of Rank/Select Queries" // http://sux.dsi.unimi.it/paper.pdf p4 // This variant uses multiplication for the last summation instead of // continuing the shift/mask/addition chain. inline int sux_popcountll(uint64_t x) { // Step 1: 00 - 00 = 0; 01 - 00 = 01; 10 - 01 = 01; 11 - 01 = 10; x = x - ((x & G2) >> 1); // step 2: add 2 groups of 2. x = (x & G4) + ((x >> 2) & G4); // 2 groups of 4. x = (x + (x >> 4)) & G8; // Using a multiply to collect the 8 groups of 8 together. x = x * L8 >> 56; return x; } #endif /* _FASTRANK_POPCOUNT_H_ */ #endif