Use ebloom by default
This commit changes the default bloom filter to be basho’s bloom, which is significantly more stable w.r.t. performance. The code can still read the old bloom filters; new files are written with the new filters. The default is controlled in src/hanoidb.hrl using the USE_EBLOOM macro.
This commit is contained in:
parent
a1bbadfb34
commit
f4feca27e5
5 changed files with 66 additions and 25 deletions
|
@ -4,7 +4,7 @@
|
|||
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
|
||||
|
||||
{erl_opts, [%{d,'DEBUG',true},
|
||||
{d,'USE_SCALABLE_BLOOM',true},
|
||||
{d,'USE_EBLOOM',true},
|
||||
{parse_transform, lager_transform},
|
||||
fail_on_warning,
|
||||
warn_unused_vars,
|
||||
|
|
|
@ -70,22 +70,15 @@
|
|||
| value()
|
||||
| filepos().
|
||||
|
||||
|
||||
-ifdef(USE_SCALABLE_BLOOM).
|
||||
|
||||
-define(BLOOM_NEW(Size), {ok, hanoidb_bloom:bloom(Size, 0.01)}).
|
||||
-define(BLOOM_TO_BIN(Bloom), hanoidb_bloom:encode(Bloom)). %% -> Binary
|
||||
-define(BIN_TO_BLOOM(Bin), {ok, hanoidb_bloom:decode(Bin)}).
|
||||
-define(BLOOM_INSERT(Bloom, Key), {ok, hanoidb_bloom:add(Key,Bloom)}).
|
||||
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_bloom:member(Key, Bloom)). %% -> 'true' | 'false'
|
||||
|
||||
-ifdef(USE_EBLOOM).
|
||||
-define(HANOI_BLOOM_TYPE, ebloom).
|
||||
-else.
|
||||
|
||||
-define(BLOOM_NEW(Size), begin ebloom:new(Size, 0.01, Size) end).
|
||||
-define(BLOOM_TO_BIN(Bloom), begin ebloom:serialize(Bloom) end). %% -> Binary
|
||||
-define(BIN_TO_BLOOM(Bin), begin ebloom:deserialize(Bin) end).
|
||||
-define(BLOOM_INSERT(Bloom, Key), begin ok=ebloom:insert(Bloom, Key), {ok, Bloom} end).
|
||||
-define(BLOOM_CONTAINS(Bloom, Key), begin ebloom:member(Bloom, Key) end). %% -> 'true' | 'false'
|
||||
|
||||
-define(HANOI_BLOOM_TYPE, sbloom).
|
||||
-endif.
|
||||
|
||||
-define(BLOOM_NEW(Size), hanoidb_util:bloom_new(Size, ?HANOI_BLOOM_TYPE)).
|
||||
-define(BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_to_bin(Bloom)).
|
||||
-define(BIN_TO_BLOOM(Bin, Fmt), hanoidb_util:bin_to_bloom(Bin, Fmt)).
|
||||
-define(BLOOM_INSERT(Bloom, Key), hanoidb_util:bloom_insert(Bloom, Key)).
|
||||
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_util:bloom_contains(Bloom, Key)).
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ open(Name, Config) ->
|
|||
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size - 8, 8),
|
||||
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size - 12, 4),
|
||||
{ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize),
|
||||
{ok, Bloom} = ?BIN_TO_BLOOM(BloomData),
|
||||
{ok, Bloom} = hanoidb_util:bin_to_bloom(BloomData),
|
||||
|
||||
%% read in the root node
|
||||
Root =
|
||||
|
|
|
@ -38,7 +38,16 @@
|
|||
, tstamp/0
|
||||
, expiry_time/1
|
||||
, has_expired/1
|
||||
, ensure_expiry/1 ]).
|
||||
, ensure_expiry/1
|
||||
|
||||
, bloom_type/1
|
||||
, bloom_new/2
|
||||
, bloom_to_bin/1
|
||||
, bin_to_bloom/1
|
||||
, bin_to_bloom/2
|
||||
, bloom_insert/2
|
||||
, bloom_contains/2
|
||||
]).
|
||||
|
||||
-include("src/hanoidb.hrl").
|
||||
|
||||
|
@ -265,4 +274,43 @@ ensure_expiry(Opts) ->
|
|||
ok
|
||||
end.
|
||||
|
||||
bloom_type({ebloom, _}) ->
|
||||
ebloom;
|
||||
bloom_type({sbloom, _}) ->
|
||||
sbloom.
|
||||
|
||||
bloom_new(Size, sbloom) ->
|
||||
{ok, {sbloom, hanoidb_bloom:bloom(Size, 0.01)}};
|
||||
bloom_new(Size, ebloom) ->
|
||||
{ok, Bloom} = ebloom:new(Size, 0.01, Size),
|
||||
{ok, {ebloom, Bloom}}.
|
||||
|
||||
bloom_to_bin({sbloom, Bloom}) ->
|
||||
hanoidb_bloom:encode(Bloom);
|
||||
bloom_to_bin({ebloom, Bloom}) ->
|
||||
ebloom:serialize(Bloom).
|
||||
|
||||
bin_to_bloom(GZiped = <<16#1F, 16#8B, _/binary>>) ->
|
||||
bin_to_bloom(GZiped, sbloom);
|
||||
bin_to_bloom(TermBin = <<131, _/binary>>) ->
|
||||
erlang:term_to_binary(TermBin);
|
||||
bin_to_bloom(Blob) ->
|
||||
bin_to_bloom(Blob, ebloom).
|
||||
|
||||
bin_to_bloom(Binary, sbloom) ->
|
||||
{ok, {sbloom, hanoidb_bloom:decode(Binary)}};
|
||||
bin_to_bloom(Binary, ebloom) ->
|
||||
{ok, Bloom} = ebloom:deserialize(Binary),
|
||||
{ok, {ebloom, Bloom}}.
|
||||
|
||||
bloom_insert({sbloom, Bloom}, Key) ->
|
||||
{ok, {sbloom, hanoidb_bloom:add(Key, Bloom)}};
|
||||
bloom_insert({ebloom, Bloom}, Key) ->
|
||||
ok = ebloom:insert(Bloom, Key),
|
||||
{ok, {ebloom, Bloom}}.
|
||||
|
||||
bloom_contains({sbloom, Bloom}, Key) ->
|
||||
hanoidb_bloom:member(Key, Bloom);
|
||||
bloom_contains({ebloom, Bloom}, Key) ->
|
||||
ebloom:contains(Bloom, Key).
|
||||
|
||||
|
|
|
@ -55,9 +55,9 @@
|
|||
|
||||
name :: string(),
|
||||
|
||||
bloom :: term(),
|
||||
bloom :: {ebloom, term()} | {sbloom, term()},
|
||||
block_size = ?NODE_SIZE :: integer(),
|
||||
compress = none :: none | snappy | gzip, % | lz4,
|
||||
compress = none :: none | snappy | gzip | lz4,
|
||||
opts = [] :: list(any()),
|
||||
|
||||
value_count = 0 :: integer(),
|
||||
|
@ -170,11 +170,11 @@ serialize(#state{ bloom=Bloom, index_file=File, index_file_pos=Position }=State)
|
|||
exit({bad_position, Position, WrongPosition})
|
||||
end,
|
||||
ok = file:close(File),
|
||||
erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom) } ).
|
||||
erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_type(Bloom) } ).
|
||||
|
||||
deserialize(Binary) ->
|
||||
{State, Bin} = erlang:binary_to_term(Binary),
|
||||
{ok, Bloom} = ?BIN_TO_BLOOM(Bin),
|
||||
{State, Bin, Type} = erlang:binary_to_term(Binary),
|
||||
{ok, Bloom} = ?BIN_TO_BLOOM(Bin, Type),
|
||||
{ok, IdxFile} = do_open(State#state.name, State#state.opts, []),
|
||||
State#state{ bloom=Bloom, index_file=IdxFile }.
|
||||
|
||||
|
@ -200,7 +200,7 @@ archive_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastN
|
|||
_ ->
|
||||
LastNodePos
|
||||
end,
|
||||
Trailer = << 0:32/unsigned, BloomBin/binary, BloomSize:32/unsigned, RootPos:64/unsigned >>,
|
||||
Trailer = [ << 0:32/unsigned>> , BloomBin, << BloomSize:32/unsigned, RootPos:64/unsigned >> ],
|
||||
|
||||
ok = file:write(IdxFile, Trailer),
|
||||
ok = file:datasync(IdxFile),
|
||||
|
|
Loading…
Reference in a new issue