Cleanup
This commit is contained in:
parent
103766e41a
commit
d30133a87a
2 changed files with 37 additions and 38 deletions
|
@ -2,7 +2,7 @@
|
|||
%% @reference [http://en.wikipedia.org/wiki/Bloom_filter]
|
||||
|
||||
-module(bloom).
|
||||
-export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1]).
|
||||
-export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1, filter_size/1]).
|
||||
-import(math, [log/1, pow/2]).
|
||||
-import(erlang, [phash2/2]).
|
||||
|
||||
|
@ -16,46 +16,46 @@
|
|||
-endif.
|
||||
|
||||
-record(bloom, {
|
||||
m = 0, % The size of the bitmap in bits.
|
||||
bitmap = <<>>, % The bitmap.
|
||||
k = 0, % The number of hashes.
|
||||
n = 0, % The maximum number of keys.
|
||||
keys = 0 % The current number of keys.
|
||||
m = 0 :: non_neg_integer(), % The size of the bitmap in bits.
|
||||
bitmap = <<>> :: binary(), % The bitmap.
|
||||
k = 0 :: non_neg_integer(), % The number of hashes.
|
||||
n = 0 :: non_neg_integer(), % The maximum number of keys.
|
||||
keys = 0 :: non_neg_integer() % The current number of keys.
|
||||
}).
|
||||
|
||||
%% @spec new(capacity) -> bloom().
|
||||
%% @equiv new(capacity, 0.001)
|
||||
-spec new(non_neg_integer()) -> #bloom{}.
|
||||
new(N) -> new(N, 0.001).
|
||||
|
||||
%% @spec new(integer(), float()) -> bloom()
|
||||
%% @doc Creates a new Bloom filter, given a maximum number of keys and a
|
||||
%% false-positive error rate.
|
||||
-spec new(non_neg_integer(), float()) -> #bloom{}.
|
||||
new(N, E) when N > 0, is_float(E), E > 0, E =< 1 ->
|
||||
{M, K} = calc_least_bits(N, E),
|
||||
#bloom{m=M, bitmap = <<0:((M+7) div 8 * 8)>>, k=K, n=N}.
|
||||
|
||||
%% @spec clear(bloom()) -> bloom().
|
||||
%% @doc Creates a new empty Bloom filter from an existing one.
|
||||
clear(#bloom{#bitmap=Bitmap} = B) ->
|
||||
B#bloom{<<0:bit_size(Bitmap)>>, n=0}.
|
||||
-spec clear(#bloom{}) -> #bloom{}.
|
||||
clear(#bloom{bitmap=Bitmap} = B) ->
|
||||
B#bloom{bitmap = <<0:(erlang:bit_size(Bitmap))>>, n=0}.
|
||||
|
||||
%% @spec count(bloom()) -> unsigned().
|
||||
%% @doc Returns the number of elements encoded into this Bloom filter.
|
||||
count(#bloom{#keys=N}) ->
|
||||
-spec count(#bloom{}) -> non_neg_integer().
|
||||
count(#bloom{keys=N}) ->
|
||||
N.
|
||||
|
||||
%% @spec filter_size(bloom()) -> unsigned().
|
||||
%% @doc Returns the number of bits used in this Bloom filter.
|
||||
filter_size(#bloom{#bitmap=Bitmap}) ->
|
||||
-spec filter_size(#bloom{}) -> non_neg_integer().
|
||||
filter_size(#bloom{bitmap=Bitmap}) ->
|
||||
bit_size(Bitmap).
|
||||
|
||||
%% @spec is_bloom(bloom()) -> bool()
|
||||
%% @doc Determines if the given argument is a bloom record.
|
||||
-spec is_bloom(#bloom{}) -> true | false.
|
||||
is_bloom(#bloom{}) -> true;
|
||||
is_bloom(_) -> false.
|
||||
|
||||
%% @spec is_element(string(), bloom()) -> bool()
|
||||
%% @doc Determines if the key is (probably) an element of the filter.
|
||||
-spec is_element(term(), #bloom{}) -> true | false.
|
||||
is_element(Key, B) -> is_element(Key, B, calc_idxs(Key, B)).
|
||||
is_element(_, _, []) -> true;
|
||||
is_element(Key, B, [Idx | T]) ->
|
||||
|
@ -67,8 +67,8 @@ is_element(Key, B, [Idx | T]) ->
|
|||
false -> false
|
||||
end.
|
||||
|
||||
%% @spec add_element(string(), bloom()) -> bloom()
|
||||
%% @doc Adds the key to the filter.
|
||||
-spec add_element(term(), #bloom{}) -> #bloom{}.
|
||||
add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N ->
|
||||
Idxs = calc_idxs(Key, B),
|
||||
Bitmap0 = set_bits(Bitmap, Idxs),
|
||||
|
@ -77,6 +77,9 @@ add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N ->
|
|||
false -> B#bloom{bitmap=Bitmap0, keys=Keys+1}
|
||||
end.
|
||||
|
||||
%% @internal
|
||||
%% @doc Set the bits at the provided index(s) to "1" in the binary.
|
||||
-spec set_bits(binary(), list(non_neg_integer())) -> binary().
|
||||
set_bits(Bin, []) -> Bin;
|
||||
set_bits(Bin, [Idx | Idxs]) ->
|
||||
ByteIdx = Idx div 8,
|
||||
|
@ -85,16 +88,9 @@ set_bits(Bin, [Idx | Idxs]) ->
|
|||
Byte0 = Byte bor Mask,
|
||||
set_bits(<<Pre/binary, Byte0:8, Post/binary>>, Idxs).
|
||||
|
||||
%% set2(N, Bin) ->
|
||||
%% <<L:N/bits, _:1, R/bits>> = Bin,
|
||||
%% <<L/bits, 1:1, R/bits>>.
|
||||
|
||||
%% a(N, B) ->
|
||||
%% fun (<<L:N/bits, _:1, R/bits>>) ->
|
||||
%% <<L/bits, 1:1, R/bits>>
|
||||
%% end(B).
|
||||
|
||||
% Find the optimal bitmap size and number of hashes.
|
||||
%% @internal
|
||||
%% @doc Find the optimal bitmap size and number of hashes.
|
||||
%TODO -spec(non_neg_integer(), number()) -> non_neg_integer().
|
||||
calc_least_bits(N, E) -> calc_least_bits(N, E, 1, 0, 0).
|
||||
calc_least_bits(N, E, K, MinM, BestK) ->
|
||||
M = -1 * K * N / log(1 - pow(E, 1/K)),
|
||||
|
@ -105,8 +101,10 @@ calc_least_bits(N, E, K, MinM, BestK) ->
|
|||
_ -> calc_least_bits(N, E, K+1, CurM, CurK)
|
||||
end.
|
||||
|
||||
% This uses the "enhanced double hashing" algorithm.
|
||||
% Todo: handle case of m > 2^32.
|
||||
%% @internal
|
||||
%% @doc This uses the "enhanced double hashing" algorithm.
|
||||
%% TODO: handle case of m > 2^32.
|
||||
%TODO -spec(term(), #bloom{}) -> list(non_neg_integer()).
|
||||
calc_idxs(Key, #bloom{m=M, k=K}) ->
|
||||
X = phash2(Key, M),
|
||||
Y = phash2({"salt", Key}, M),
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
%% the ebloom module.
|
||||
|
||||
-module(ebloom).
|
||||
-author('Dave Smith <dizzyd@dizzyd.com>').
|
||||
-author('Greg Burd <greg@burd.me>').
|
||||
|
||||
-export([new/3,
|
||||
insert/2,
|
||||
|
@ -40,15 +40,16 @@
|
|||
|
||||
-spec new(integer(), float(), integer()) -> {ok, reference()}.
|
||||
new(Count, FalseProb, _Seed) ->
|
||||
bloom:new(Count, FalseProb).
|
||||
{ok, bloom:new(Count, FalseProb)}.
|
||||
|
||||
-spec insert(reference(), binary()) -> ok.
|
||||
insert(Ref, Bin) ->
|
||||
bloom:add_element(Key, Ref).
|
||||
bloom:add_element(Bin, Ref),
|
||||
ok.
|
||||
|
||||
-spec contains(reference(), binary()) -> true | false.
|
||||
contains(Ref, Bin) ->
|
||||
is_element(Bin, Ref).
|
||||
bloom:is_element(Bin, Ref).
|
||||
|
||||
-spec clear(reference()) -> ok.
|
||||
clear(Ref) ->
|
||||
|
@ -64,19 +65,19 @@ elements(Ref) ->
|
|||
|
||||
-spec effective_fpp(reference()) -> float().
|
||||
effective_fpp(_Ref) ->
|
||||
raise not_yet_implemented.
|
||||
throw(not_yet_implemented).
|
||||
|
||||
-spec intersect(reference(), reference()) -> ok.
|
||||
intersect(_Ref, _OtherRef) ->
|
||||
raise not_yet_implemented.
|
||||
throw(not_yet_implemented).
|
||||
|
||||
-spec union(reference(), reference()) -> ok.
|
||||
union(_Ref, _OtherRef) ->
|
||||
raise not_yet_implemented.
|
||||
throw(not_yet_implemented).
|
||||
|
||||
-spec difference(reference(), reference()) -> ok.
|
||||
difference(_Ref, _OtherRef) ->
|
||||
raise not_yet_implemented.
|
||||
throw(not_yet_implemented).
|
||||
|
||||
-spec serialize(reference()) -> binary().
|
||||
serialize(Ref) ->
|
||||
|
|
Loading…
Reference in a new issue