Cleanup
This commit is contained in:
parent
103766e41a
commit
d30133a87a
2 changed files with 37 additions and 38 deletions
|
@ -2,7 +2,7 @@
|
||||||
%% @reference [http://en.wikipedia.org/wiki/Bloom_filter]
|
%% @reference [http://en.wikipedia.org/wiki/Bloom_filter]
|
||||||
|
|
||||||
-module(bloom).
|
-module(bloom).
|
||||||
-export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1]).
|
-export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1, filter_size/1]).
|
||||||
-import(math, [log/1, pow/2]).
|
-import(math, [log/1, pow/2]).
|
||||||
-import(erlang, [phash2/2]).
|
-import(erlang, [phash2/2]).
|
||||||
|
|
||||||
|
@ -16,46 +16,46 @@
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-record(bloom, {
|
-record(bloom, {
|
||||||
m = 0, % The size of the bitmap in bits.
|
m = 0 :: non_neg_integer(), % The size of the bitmap in bits.
|
||||||
bitmap = <<>>, % The bitmap.
|
bitmap = <<>> :: binary(), % The bitmap.
|
||||||
k = 0, % The number of hashes.
|
k = 0 :: non_neg_integer(), % The number of hashes.
|
||||||
n = 0, % The maximum number of keys.
|
n = 0 :: non_neg_integer(), % The maximum number of keys.
|
||||||
keys = 0 % The current number of keys.
|
keys = 0 :: non_neg_integer() % The current number of keys.
|
||||||
}).
|
}).
|
||||||
|
|
||||||
%% @spec new(capacity) -> bloom().
|
|
||||||
%% @equiv new(capacity, 0.001)
|
%% @equiv new(capacity, 0.001)
|
||||||
|
-spec new(non_neg_integer()) -> #bloom{}.
|
||||||
new(N) -> new(N, 0.001).
|
new(N) -> new(N, 0.001).
|
||||||
|
|
||||||
%% @spec new(integer(), float()) -> bloom()
|
|
||||||
%% @doc Creates a new Bloom filter, given a maximum number of keys and a
|
%% @doc Creates a new Bloom filter, given a maximum number of keys and a
|
||||||
%% false-positive error rate.
|
%% false-positive error rate.
|
||||||
|
-spec new(non_neg_integer(), float()) -> #bloom{}.
|
||||||
new(N, E) when N > 0, is_float(E), E > 0, E =< 1 ->
|
new(N, E) when N > 0, is_float(E), E > 0, E =< 1 ->
|
||||||
{M, K} = calc_least_bits(N, E),
|
{M, K} = calc_least_bits(N, E),
|
||||||
#bloom{m=M, bitmap = <<0:((M+7) div 8 * 8)>>, k=K, n=N}.
|
#bloom{m=M, bitmap = <<0:((M+7) div 8 * 8)>>, k=K, n=N}.
|
||||||
|
|
||||||
%% @spec clear(bloom()) -> bloom().
|
|
||||||
%% @doc Creates a new empty Bloom filter from an existing one.
|
%% @doc Creates a new empty Bloom filter from an existing one.
|
||||||
clear(#bloom{#bitmap=Bitmap} = B) ->
|
-spec clear(#bloom{}) -> #bloom{}.
|
||||||
B#bloom{<<0:bit_size(Bitmap)>>, n=0}.
|
clear(#bloom{bitmap=Bitmap} = B) ->
|
||||||
|
B#bloom{bitmap = <<0:(erlang:bit_size(Bitmap))>>, n=0}.
|
||||||
|
|
||||||
%% @spec count(bloom()) -> unsigned().
|
|
||||||
%% @doc Returns the number of elements encoded into this Bloom filter.
|
%% @doc Returns the number of elements encoded into this Bloom filter.
|
||||||
count(#bloom{#keys=N}) ->
|
-spec count(#bloom{}) -> non_neg_integer().
|
||||||
|
count(#bloom{keys=N}) ->
|
||||||
N.
|
N.
|
||||||
|
|
||||||
%% @spec filter_size(bloom()) -> unsigned().
|
|
||||||
%% @doc Returns the number of bits used in this Bloom filter.
|
%% @doc Returns the number of bits used in this Bloom filter.
|
||||||
filter_size(#bloom{#bitmap=Bitmap}) ->
|
-spec filter_size(#bloom{}) -> non_neg_integer().
|
||||||
|
filter_size(#bloom{bitmap=Bitmap}) ->
|
||||||
bit_size(Bitmap).
|
bit_size(Bitmap).
|
||||||
|
|
||||||
%% @spec is_bloom(bloom()) -> bool()
|
|
||||||
%% @doc Determines if the given argument is a bloom record.
|
%% @doc Determines if the given argument is a bloom record.
|
||||||
|
-spec is_bloom(#bloom{}) -> true | false.
|
||||||
is_bloom(#bloom{}) -> true;
|
is_bloom(#bloom{}) -> true;
|
||||||
is_bloom(_) -> false.
|
is_bloom(_) -> false.
|
||||||
|
|
||||||
%% @spec is_element(string(), bloom()) -> bool()
|
|
||||||
%% @doc Determines if the key is (probably) an element of the filter.
|
%% @doc Determines if the key is (probably) an element of the filter.
|
||||||
|
-spec is_element(term(), #bloom{}) -> true | false.
|
||||||
is_element(Key, B) -> is_element(Key, B, calc_idxs(Key, B)).
|
is_element(Key, B) -> is_element(Key, B, calc_idxs(Key, B)).
|
||||||
is_element(_, _, []) -> true;
|
is_element(_, _, []) -> true;
|
||||||
is_element(Key, B, [Idx | T]) ->
|
is_element(Key, B, [Idx | T]) ->
|
||||||
|
@ -67,8 +67,8 @@ is_element(Key, B, [Idx | T]) ->
|
||||||
false -> false
|
false -> false
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @spec add_element(string(), bloom()) -> bloom()
|
|
||||||
%% @doc Adds the key to the filter.
|
%% @doc Adds the key to the filter.
|
||||||
|
-spec add_element(term(), #bloom{}) -> #bloom{}.
|
||||||
add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N ->
|
add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N ->
|
||||||
Idxs = calc_idxs(Key, B),
|
Idxs = calc_idxs(Key, B),
|
||||||
Bitmap0 = set_bits(Bitmap, Idxs),
|
Bitmap0 = set_bits(Bitmap, Idxs),
|
||||||
|
@ -77,6 +77,9 @@ add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N ->
|
||||||
false -> B#bloom{bitmap=Bitmap0, keys=Keys+1}
|
false -> B#bloom{bitmap=Bitmap0, keys=Keys+1}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
%% @internal
|
||||||
|
%% @doc Set the bits at the provided index(s) to "1" in the binary.
|
||||||
|
-spec set_bits(binary(), list(non_neg_integer())) -> binary().
|
||||||
set_bits(Bin, []) -> Bin;
|
set_bits(Bin, []) -> Bin;
|
||||||
set_bits(Bin, [Idx | Idxs]) ->
|
set_bits(Bin, [Idx | Idxs]) ->
|
||||||
ByteIdx = Idx div 8,
|
ByteIdx = Idx div 8,
|
||||||
|
@ -85,16 +88,9 @@ set_bits(Bin, [Idx | Idxs]) ->
|
||||||
Byte0 = Byte bor Mask,
|
Byte0 = Byte bor Mask,
|
||||||
set_bits(<<Pre/binary, Byte0:8, Post/binary>>, Idxs).
|
set_bits(<<Pre/binary, Byte0:8, Post/binary>>, Idxs).
|
||||||
|
|
||||||
%% set2(N, Bin) ->
|
%% @internal
|
||||||
%% <<L:N/bits, _:1, R/bits>> = Bin,
|
%% @doc Find the optimal bitmap size and number of hashes.
|
||||||
%% <<L/bits, 1:1, R/bits>>.
|
%TODO -spec(non_neg_integer(), number()) -> non_neg_integer().
|
||||||
|
|
||||||
%% a(N, B) ->
|
|
||||||
%% fun (<<L:N/bits, _:1, R/bits>>) ->
|
|
||||||
%% <<L/bits, 1:1, R/bits>>
|
|
||||||
%% end(B).
|
|
||||||
|
|
||||||
% Find the optimal bitmap size and number of hashes.
|
|
||||||
calc_least_bits(N, E) -> calc_least_bits(N, E, 1, 0, 0).
|
calc_least_bits(N, E) -> calc_least_bits(N, E, 1, 0, 0).
|
||||||
calc_least_bits(N, E, K, MinM, BestK) ->
|
calc_least_bits(N, E, K, MinM, BestK) ->
|
||||||
M = -1 * K * N / log(1 - pow(E, 1/K)),
|
M = -1 * K * N / log(1 - pow(E, 1/K)),
|
||||||
|
@ -105,8 +101,10 @@ calc_least_bits(N, E, K, MinM, BestK) ->
|
||||||
_ -> calc_least_bits(N, E, K+1, CurM, CurK)
|
_ -> calc_least_bits(N, E, K+1, CurM, CurK)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
% This uses the "enhanced double hashing" algorithm.
|
%% @internal
|
||||||
% Todo: handle case of m > 2^32.
|
%% @doc This uses the "enhanced double hashing" algorithm.
|
||||||
|
%% TODO: handle case of m > 2^32.
|
||||||
|
%TODO -spec(term(), #bloom{}) -> list(non_neg_integer()).
|
||||||
calc_idxs(Key, #bloom{m=M, k=K}) ->
|
calc_idxs(Key, #bloom{m=M, k=K}) ->
|
||||||
X = phash2(Key, M),
|
X = phash2(Key, M),
|
||||||
Y = phash2({"salt", Key}, M),
|
Y = phash2({"salt", Key}, M),
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
%% the ebloom module.
|
%% the ebloom module.
|
||||||
|
|
||||||
-module(ebloom).
|
-module(ebloom).
|
||||||
-author('Dave Smith <dizzyd@dizzyd.com>').
|
-author('Greg Burd <greg@burd.me>').
|
||||||
|
|
||||||
-export([new/3,
|
-export([new/3,
|
||||||
insert/2,
|
insert/2,
|
||||||
|
@ -40,15 +40,16 @@
|
||||||
|
|
||||||
-spec new(integer(), float(), integer()) -> {ok, reference()}.
|
-spec new(integer(), float(), integer()) -> {ok, reference()}.
|
||||||
new(Count, FalseProb, _Seed) ->
|
new(Count, FalseProb, _Seed) ->
|
||||||
bloom:new(Count, FalseProb).
|
{ok, bloom:new(Count, FalseProb)}.
|
||||||
|
|
||||||
-spec insert(reference(), binary()) -> ok.
|
-spec insert(reference(), binary()) -> ok.
|
||||||
insert(Ref, Bin) ->
|
insert(Ref, Bin) ->
|
||||||
bloom:add_element(Key, Ref).
|
bloom:add_element(Bin, Ref),
|
||||||
|
ok.
|
||||||
|
|
||||||
-spec contains(reference(), binary()) -> true | false.
|
-spec contains(reference(), binary()) -> true | false.
|
||||||
contains(Ref, Bin) ->
|
contains(Ref, Bin) ->
|
||||||
is_element(Bin, Ref).
|
bloom:is_element(Bin, Ref).
|
||||||
|
|
||||||
-spec clear(reference()) -> ok.
|
-spec clear(reference()) -> ok.
|
||||||
clear(Ref) ->
|
clear(Ref) ->
|
||||||
|
@ -64,19 +65,19 @@ elements(Ref) ->
|
||||||
|
|
||||||
-spec effective_fpp(reference()) -> float().
|
-spec effective_fpp(reference()) -> float().
|
||||||
effective_fpp(_Ref) ->
|
effective_fpp(_Ref) ->
|
||||||
raise not_yet_implemented.
|
throw(not_yet_implemented).
|
||||||
|
|
||||||
-spec intersect(reference(), reference()) -> ok.
|
-spec intersect(reference(), reference()) -> ok.
|
||||||
intersect(_Ref, _OtherRef) ->
|
intersect(_Ref, _OtherRef) ->
|
||||||
raise not_yet_implemented.
|
throw(not_yet_implemented).
|
||||||
|
|
||||||
-spec union(reference(), reference()) -> ok.
|
-spec union(reference(), reference()) -> ok.
|
||||||
union(_Ref, _OtherRef) ->
|
union(_Ref, _OtherRef) ->
|
||||||
raise not_yet_implemented.
|
throw(not_yet_implemented).
|
||||||
|
|
||||||
-spec difference(reference(), reference()) -> ok.
|
-spec difference(reference(), reference()) -> ok.
|
||||||
difference(_Ref, _OtherRef) ->
|
difference(_Ref, _OtherRef) ->
|
||||||
raise not_yet_implemented.
|
throw(not_yet_implemented).
|
||||||
|
|
||||||
-spec serialize(reference()) -> binary().
|
-spec serialize(reference()) -> binary().
|
||||||
serialize(Ref) ->
|
serialize(Ref) ->
|
||||||
|
|
Loading…
Reference in a new issue