Bloom filter: Add faster, ETS-based build representation for dense bitmaps.
This commit is contained in:
parent
d727be2fa7
commit
12148a7af9
2 changed files with 63 additions and 4 deletions
|
@ -187,16 +187,30 @@ set_bits(Mask, I1, I, [H|T], Acc) ->
|
||||||
|
|
||||||
%%%========== Dispatch to appropriate representation:
|
%%%========== Dispatch to appropriate representation:
|
||||||
bitmask_new(LogN) ->
|
bitmask_new(LogN) ->
|
||||||
bitarray_new(1 bsl LogN).
|
if LogN >= 20 -> % Use sparse representation.
|
||||||
|
bitarray_new(1 bsl LogN);
|
||||||
|
true -> % Use dense representation.
|
||||||
|
hanoidb_dense_bitmap:new(1 bsl LogN)
|
||||||
|
end.
|
||||||
|
|
||||||
bitmask_set(I, BM) ->
|
bitmask_set(I, BM) ->
|
||||||
case element(1,BM) of
|
case element(1,BM) of
|
||||||
array -> bitarray_set(I, BM)
|
array -> bitarray_set(I, BM);
|
||||||
|
dense_bitmap_ets -> hanoidb_dense_bitmap:set(I, BM)
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%% Convert to external form.
|
||||||
|
bitmask_build(BM) ->
|
||||||
|
case element(1,BM) of
|
||||||
|
array -> BM;
|
||||||
|
dense_bitmap_ets -> hanoidb_dense_bitmap:build(BM)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
bitmask_get(I, BM) ->
|
bitmask_get(I, BM) ->
|
||||||
case element(1,BM) of
|
case element(1,BM) of
|
||||||
array -> bitarray_get(I, BM)
|
array -> bitarray_get(I, BM);
|
||||||
|
dense_bitmap_ets -> hanoidb_dense_bitmap:member(I, BM);
|
||||||
|
dense_bitmap -> hanoidb_dense_bitmap:member(I, BM)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%%%========== Bitarray representation - suitable for sparse arrays ==========
|
%%%========== Bitarray representation - suitable for sparse arrays ==========
|
||||||
|
@ -216,11 +230,17 @@ bitarray_get(I, A) ->
|
||||||
%%%^^^^^^^^^^ Bitarray representation - suitable for sparse arrays ^^^^^^^^^^
|
%%%^^^^^^^^^^ Bitarray representation - suitable for sparse arrays ^^^^^^^^^^
|
||||||
|
|
||||||
encode(Bloom) ->
|
encode(Bloom) ->
|
||||||
zlib:gzip(term_to_binary(Bloom)).
|
zlib:gzip(term_to_binary(bloom_build(Bloom))).
|
||||||
|
|
||||||
decode(Bin) ->
|
decode(Bin) ->
|
||||||
binary_to_term(zlib:gunzip(Bin)).
|
binary_to_term(zlib:gunzip(Bin)).
|
||||||
|
|
||||||
|
%%% Convert to external form.
|
||||||
|
bloom_build(Bloom=#bloom{a=Bitmasks}) ->
|
||||||
|
Bloom#bloom{a=[bitmask_build(X) || X <- Bitmasks]};
|
||||||
|
bloom_build(Sbf=#sbf{b=Blooms}) ->
|
||||||
|
Sbf#sbf{b=[bloom_build(X) || X <- Blooms]}.
|
||||||
|
|
||||||
%% UNIT TESTS
|
%% UNIT TESTS
|
||||||
|
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
|
39
src/hanoidb_dense_bitmap.erl
Normal file
39
src/hanoidb_dense_bitmap.erl
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
-module(hanoidb_dense_bitmap).
|
||||||
|
|
||||||
|
-export([new/1, set/2, build/1, member/2]).
|
||||||
|
-define(BITS_PER_CELL, 32).
|
||||||
|
|
||||||
|
-define(REPR_NAME, dense_bitmap).
|
||||||
|
|
||||||
|
new(N) ->
|
||||||
|
Tab = ets:new(dense_bitmap, [private, set]),
|
||||||
|
Width = 1 + (N-1) div ?BITS_PER_CELL,
|
||||||
|
Value = erlang:make_tuple(Width+1, 0, [{1,?REPR_NAME}]),
|
||||||
|
ets:insert(Tab, Value),
|
||||||
|
%io:format("DB| create(): ~p of width ~p\n", [Tab, Width]),
|
||||||
|
{dense_bitmap_ets, N, Width, Tab}.
|
||||||
|
|
||||||
|
%% Set a bit.
|
||||||
|
set(I, {dense_bitmap_ets, _,_, Tab}=DBM) ->
|
||||||
|
Cell = 2 + I div ?BITS_PER_CELL,
|
||||||
|
BitInCell = I rem ?BITS_PER_CELL,
|
||||||
|
Old = ets:lookup_element(Tab, ?REPR_NAME, Cell),
|
||||||
|
New = Old bor (1 bsl BitInCell),
|
||||||
|
ets:update_element(Tab, ?REPR_NAME, {Cell,New}),
|
||||||
|
DBM.
|
||||||
|
|
||||||
|
build({dense_bitmap_ets, _, _, Tab}) ->
|
||||||
|
[Row] = ets:lookup(Tab, ?REPR_NAME),
|
||||||
|
ets:delete(Tab),
|
||||||
|
Row.
|
||||||
|
|
||||||
|
member(I, Row) when element(1,Row)==?REPR_NAME ->
|
||||||
|
Cell = 2 + I div ?BITS_PER_CELL,
|
||||||
|
BitInCell = I rem ?BITS_PER_CELL,
|
||||||
|
CellValue = element(Cell, Row),
|
||||||
|
CellValue band (1 bsl BitInCell) =/= 0;
|
||||||
|
member(I, {dense_bitmap_ets, _,_, Tab}) ->
|
||||||
|
Cell = 2 + I div ?BITS_PER_CELL,
|
||||||
|
BitInCell = I rem ?BITS_PER_CELL,
|
||||||
|
CellValue = ets:lookup_element(Tab, ?REPR_NAME, Cell),
|
||||||
|
CellValue band (1 bsl BitInCell) =/= 0.
|
Loading…
Reference in a new issue