From d30133a87a29a18e93935e103c0fe1cb3aff7a12 Mon Sep 17 00:00:00 2001 From: Gregory Burd Date: Wed, 20 Jun 2012 11:04:10 +0100 Subject: [PATCH] Cleanup --- src/bloom.erl | 58 ++++++++++++++++++++++++-------------------------- src/ebloom.erl | 17 ++++++++------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/bloom.erl b/src/bloom.erl index f7d8036..057464d 100644 --- a/src/bloom.erl +++ b/src/bloom.erl @@ -2,7 +2,7 @@ %% @reference [http://en.wikipedia.org/wiki/Bloom_filter] -module(bloom). --export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1]). +-export([new/1, new/2, is_bloom/1, is_element/2, add_element/2, clear/1, count/1, filter_size/1]). -import(math, [log/1, pow/2]). -import(erlang, [phash2/2]). @@ -16,46 +16,46 @@ -endif. -record(bloom, { - m = 0, % The size of the bitmap in bits. - bitmap = <<>>, % The bitmap. - k = 0, % The number of hashes. - n = 0, % The maximum number of keys. - keys = 0 % The current number of keys. + m = 0 :: non_neg_integer(), % The size of the bitmap in bits. + bitmap = <<>> :: binary(), % The bitmap. + k = 0 :: non_neg_integer(), % The number of hashes. + n = 0 :: non_neg_integer(), % The maximum number of keys. + keys = 0 :: non_neg_integer() % The current number of keys. }). -%% @spec new(capacity) -> bloom(). %% @equiv new(capacity, 0.001) +-spec new(non_neg_integer()) -> #bloom{}. new(N) -> new(N, 0.001). -%% @spec new(integer(), float()) -> bloom() %% @doc Creates a new Bloom filter, given a maximum number of keys and a %% false-positive error rate. +-spec new(non_neg_integer(), float()) -> #bloom{}. new(N, E) when N > 0, is_float(E), E > 0, E =< 1 -> {M, K} = calc_least_bits(N, E), #bloom{m=M, bitmap = <<0:((M+7) div 8 * 8)>>, k=K, n=N}. -%% @spec clear(bloom()) -> bloom(). %% @doc Creates a new empty Bloom filter from an existing one. -clear(#bloom{#bitmap=Bitmap} = B) -> - B#bloom{<<0:bit_size(Bitmap)>>, n=0}. +-spec clear(#bloom{}) -> #bloom{}. +clear(#bloom{bitmap=Bitmap} = B) -> + B#bloom{bitmap = <<0:(erlang:bit_size(Bitmap))>>, n=0}. -%% @spec count(bloom()) -> unsigned(). %% @doc Returns the number of elements encoded into this Bloom filter. -count(#bloom{#keys=N}) -> +-spec count(#bloom{}) -> non_neg_integer(). +count(#bloom{keys=N}) -> N. -%% @spec filter_size(bloom()) -> unsigned(). %% @doc Returns the number of bits used in this Bloom filter. -filter_size(#bloom{#bitmap=Bitmap}) -> +-spec filter_size(#bloom{}) -> non_neg_integer(). +filter_size(#bloom{bitmap=Bitmap}) -> bit_size(Bitmap). -%% @spec is_bloom(bloom()) -> bool() %% @doc Determines if the given argument is a bloom record. +-spec is_bloom(#bloom{}) -> true | false. is_bloom(#bloom{}) -> true; is_bloom(_) -> false. -%% @spec is_element(string(), bloom()) -> bool() %% @doc Determines if the key is (probably) an element of the filter. +-spec is_element(term(), #bloom{}) -> true | false. is_element(Key, B) -> is_element(Key, B, calc_idxs(Key, B)). is_element(_, _, []) -> true; is_element(Key, B, [Idx | T]) -> @@ -67,8 +67,8 @@ is_element(Key, B, [Idx | T]) -> false -> false end. -%% @spec add_element(string(), bloom()) -> bloom() %% @doc Adds the key to the filter. +-spec add_element(term(), #bloom{}) -> #bloom{}. add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N -> Idxs = calc_idxs(Key, B), Bitmap0 = set_bits(Bitmap, Idxs), @@ -77,6 +77,9 @@ add_element(Key, #bloom{keys=Keys, n=N, bitmap=Bitmap} = B) when Keys < N -> false -> B#bloom{bitmap=Bitmap0, keys=Keys+1} end. +%% @internal +%% @doc Set the bits at the provided index(s) to "1" in the binary. +-spec set_bits(binary(), list(non_neg_integer())) -> binary(). set_bits(Bin, []) -> Bin; set_bits(Bin, [Idx | Idxs]) -> ByteIdx = Idx div 8, @@ -85,16 +88,9 @@ set_bits(Bin, [Idx | Idxs]) -> Byte0 = Byte bor Mask, set_bits(<
>, Idxs).
 
-%% set2(N, Bin) ->
-%%     <> = Bin,
-%%     <>.
-
-%% a(N, B) ->
-%%     fun (<>) ->
-%%             <>
-%%     end(B).
-
-% Find the optimal bitmap size and number of hashes.
+%% @internal
+%% @doc Find the optimal bitmap size and number of hashes.
+%TODO -spec(non_neg_integer(), number()) -> non_neg_integer().
 calc_least_bits(N, E) -> calc_least_bits(N, E, 1, 0, 0).
 calc_least_bits(N, E, K, MinM, BestK) ->
     M = -1 * K * N / log(1 - pow(E, 1/K)),
@@ -105,8 +101,10 @@ calc_least_bits(N, E, K, MinM, BestK) ->
           _ -> calc_least_bits(N, E, K+1, CurM, CurK)
     end.
 
-% This uses the "enhanced double hashing" algorithm.
-% Todo: handle case of m > 2^32.
+%% @internal
+%% @doc This uses the "enhanced double hashing" algorithm.
+%% TODO: handle case of m > 2^32.
+%TODO -spec(term(), #bloom{}) -> list(non_neg_integer()).
 calc_idxs(Key, #bloom{m=M, k=K}) ->
     X = phash2(Key, M),
     Y = phash2({"salt", Key}, M),
diff --git a/src/ebloom.erl b/src/ebloom.erl
index ce60a4b..d0f4ccd 100644
--- a/src/ebloom.erl
+++ b/src/ebloom.erl
@@ -22,7 +22,7 @@
 %% the ebloom module.
 
 -module(ebloom).
--author('Dave Smith ').
+-author('Greg Burd ').
 
 -export([new/3,
          insert/2,
@@ -40,15 +40,16 @@
 
 -spec new(integer(), float(), integer()) -> {ok, reference()}.
 new(Count, FalseProb, _Seed) ->
-    bloom:new(Count, FalseProb).
+    {ok, bloom:new(Count, FalseProb)}.
 
 -spec insert(reference(), binary()) -> ok.
 insert(Ref, Bin) ->
-    bloom:add_element(Key, Ref).
+    bloom:add_element(Bin, Ref),
+    ok.
 
 -spec contains(reference(), binary()) -> true | false.
 contains(Ref, Bin) ->
-    is_element(Bin, Ref).
+    bloom:is_element(Bin, Ref).
 
 -spec clear(reference()) -> ok.
 clear(Ref) ->
@@ -64,19 +65,19 @@ elements(Ref) ->
 
 -spec effective_fpp(reference()) -> float().
 effective_fpp(_Ref) ->
-    raise not_yet_implemented.
+    throw(not_yet_implemented).
 
 -spec intersect(reference(), reference()) -> ok.
 intersect(_Ref, _OtherRef) ->
-    raise not_yet_implemented.
+    throw(not_yet_implemented).
 
 -spec union(reference(), reference()) -> ok.
 union(_Ref, _OtherRef) ->
-    raise not_yet_implemented.
+    throw(not_yet_implemented).
 
 -spec difference(reference(), reference()) -> ok.
 difference(_Ref, _OtherRef) ->
-    raise not_yet_implemented.
+    throw(not_yet_implemented).
 
 -spec serialize(reference()) -> binary().
 serialize(Ref) ->