75 lines
No EOL
3.2 KiB
C
75 lines
No EOL
3.2 KiB
C
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
|
|
|
// Default to using the GCC builtin popcount. On architectures with
|
|
// -march popcnt, this compiles to a single popcnt instruction.
|
|
#ifndef popcountll
|
|
#define popcountll __builtin_popcountll
|
|
#else
|
|
#define popcountll sux_popcountll
|
|
#endif
|
|
#else
|
|
/*
|
|
*
|
|
* from https://github.com/efficient/rankselect/popcount.h
|
|
*
|
|
* licensed under Apache 2
|
|
*/
|
|
|
|
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
#ifndef _FASTRANK_POPCOUNT_H_
|
|
#define _FASTRANK_POPCOUNT_H_
|
|
|
|
#include <sys/types.h>
|
|
#include <stdio.h>
|
|
|
|
#define L8 0x0101010101010101ULL // Every lowest 8th bit set: 00000001...
|
|
#define G2 0xAAAAAAAAAAAAAAAAULL // Every highest 2nd bit: 101010...
|
|
#define G4 0x3333333333333333ULL // 00110011 ... used to group the sum of 4 bits.
|
|
#define G8 0x0F0F0F0F0F0F0F0FULL
|
|
#define H8 0x8080808080808080ULL
|
|
#define L9 0x0040201008040201ULL
|
|
#define H9 (L9 << 8)
|
|
#define L16 0x0001000100010001ULL
|
|
#define H16 0x8000800080008000ULL
|
|
|
|
#define ONES_STEP_4 ( 0x1111111111111111ULL )
|
|
#define ONES_STEP_8 ( 0x0101010101010101ULL )
|
|
#define ONES_STEP_9 ( 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 | 1ULL << 36 | 1ULL << 45 | 1ULL << 54 )
|
|
#define ONES_STEP_16 ( 1ULL << 0 | 1ULL << 16 | 1ULL << 32 | 1ULL << 48 )
|
|
#define MSBS_STEP_4 ( 0x8ULL * ONES_STEP_4 )
|
|
#define MSBS_STEP_8 ( 0x80ULL * ONES_STEP_8 )
|
|
#define MSBS_STEP_9 ( 0x100ULL * ONES_STEP_9 )
|
|
#define MSBS_STEP_16 ( 0x8000ULL * ONES_STEP_16 )
|
|
#define INCR_STEP_8 ( 0x80ULL << 56 | 0x40ULL << 48 | 0x20ULL << 40 | 0x10ULL << 32 | 0x8ULL << 24 | 0x4ULL << 16 | 0x2ULL << 8 | 0x1 )
|
|
|
|
#define ONES_STEP_32 ( 0x0000000100000001ULL )
|
|
#define MSBS_STEP_32 ( 0x8000000080000000ULL )
|
|
|
|
#define COMPARE_STEP_8(x,y) ( ( ( ( ( (x) | MSBS_STEP_8 ) - ( (y) & ~MSBS_STEP_8 ) ) ^ (x) ^ ~(y) ) & MSBS_STEP_8 ) >> 7 )
|
|
#define LEQ_STEP_8(x,y) ( ( ( ( ( (y) | MSBS_STEP_8 ) - ( (x) & ~MSBS_STEP_8 ) ) ^ (x) ^ (y) ) & MSBS_STEP_8 ) >> 7 )
|
|
|
|
#define UCOMPARE_STEP_9(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_9 ) - ( (y) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_9 ) >> 8 )
|
|
#define UCOMPARE_STEP_16(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_16 ) - ( (y) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_16 ) >> 15 )
|
|
#define ULEQ_STEP_9(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_9 ) - ( (x) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_9 ) >> 8 )
|
|
#define ULEQ_STEP_16(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_16 ) - ( (x) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_16 ) >> 15 )
|
|
#define ZCOMPARE_STEP_8(x) ( ( ( x | ( ( x | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >> 7 )
|
|
|
|
// Population count of a 64 bit integer in SWAR (SIMD within a register) style
|
|
// From Sebastiano Vigna, "Broadword Implementation of Rank/Select Queries"
|
|
// http://sux.dsi.unimi.it/paper.pdf p4
|
|
// This variant uses multiplication for the last summation instead of
|
|
// continuing the shift/mask/addition chain.
|
|
inline int sux_popcountll(uint64_t x) {
|
|
// Step 1: 00 - 00 = 0; 01 - 00 = 01; 10 - 01 = 01; 11 - 01 = 10;
|
|
x = x - ((x & G2) >> 1);
|
|
// step 2: add 2 groups of 2.
|
|
x = (x & G4) + ((x >> 2) & G4);
|
|
// 2 groups of 4.
|
|
x = (x + (x >> 4)) & G8;
|
|
// Using a multiply to collect the 8 groups of 8 together.
|
|
x = x * L8 >> 56;
|
|
return x;
|
|
}
|
|
|
|
#endif /* _FASTRANK_POPCOUNT_H_ */
|
|
#endif |