Use ebloom by default
This commit changes the default bloom filter to be basho’s bloom, which is significantly more stable w.r.t. performance. The code can still read the old bloom filters; new files are written with the new filters. The default is controlled in src/hanoidb.hrl using the USE_EBLOOM macro.
This commit is contained in:
parent
a1bbadfb34
commit
f4feca27e5
5 changed files with 66 additions and 25 deletions
|
@ -4,7 +4,7 @@
|
||||||
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
|
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
|
||||||
|
|
||||||
{erl_opts, [%{d,'DEBUG',true},
|
{erl_opts, [%{d,'DEBUG',true},
|
||||||
{d,'USE_SCALABLE_BLOOM',true},
|
{d,'USE_EBLOOM',true},
|
||||||
{parse_transform, lager_transform},
|
{parse_transform, lager_transform},
|
||||||
fail_on_warning,
|
fail_on_warning,
|
||||||
warn_unused_vars,
|
warn_unused_vars,
|
||||||
|
|
|
@ -70,22 +70,15 @@
|
||||||
| value()
|
| value()
|
||||||
| filepos().
|
| filepos().
|
||||||
|
|
||||||
|
-ifdef(USE_EBLOOM).
|
||||||
-ifdef(USE_SCALABLE_BLOOM).
|
-define(HANOI_BLOOM_TYPE, ebloom).
|
||||||
|
|
||||||
-define(BLOOM_NEW(Size), {ok, hanoidb_bloom:bloom(Size, 0.01)}).
|
|
||||||
-define(BLOOM_TO_BIN(Bloom), hanoidb_bloom:encode(Bloom)). %% -> Binary
|
|
||||||
-define(BIN_TO_BLOOM(Bin), {ok, hanoidb_bloom:decode(Bin)}).
|
|
||||||
-define(BLOOM_INSERT(Bloom, Key), {ok, hanoidb_bloom:add(Key,Bloom)}).
|
|
||||||
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_bloom:member(Key, Bloom)). %% -> 'true' | 'false'
|
|
||||||
|
|
||||||
-else.
|
-else.
|
||||||
|
-define(HANOI_BLOOM_TYPE, sbloom).
|
||||||
-define(BLOOM_NEW(Size), begin ebloom:new(Size, 0.01, Size) end).
|
|
||||||
-define(BLOOM_TO_BIN(Bloom), begin ebloom:serialize(Bloom) end). %% -> Binary
|
|
||||||
-define(BIN_TO_BLOOM(Bin), begin ebloom:deserialize(Bin) end).
|
|
||||||
-define(BLOOM_INSERT(Bloom, Key), begin ok=ebloom:insert(Bloom, Key), {ok, Bloom} end).
|
|
||||||
-define(BLOOM_CONTAINS(Bloom, Key), begin ebloom:member(Bloom, Key) end). %% -> 'true' | 'false'
|
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
|
-define(BLOOM_NEW(Size), hanoidb_util:bloom_new(Size, ?HANOI_BLOOM_TYPE)).
|
||||||
|
-define(BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_to_bin(Bloom)).
|
||||||
|
-define(BIN_TO_BLOOM(Bin, Fmt), hanoidb_util:bin_to_bloom(Bin, Fmt)).
|
||||||
|
-define(BLOOM_INSERT(Bloom, Key), hanoidb_util:bloom_insert(Bloom, Key)).
|
||||||
|
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_util:bloom_contains(Bloom, Key)).
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ open(Name, Config) ->
|
||||||
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size - 8, 8),
|
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size - 8, 8),
|
||||||
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size - 12, 4),
|
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size - 12, 4),
|
||||||
{ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize),
|
{ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize),
|
||||||
{ok, Bloom} = ?BIN_TO_BLOOM(BloomData),
|
{ok, Bloom} = hanoidb_util:bin_to_bloom(BloomData),
|
||||||
|
|
||||||
%% read in the root node
|
%% read in the root node
|
||||||
Root =
|
Root =
|
||||||
|
|
|
@ -38,7 +38,16 @@
|
||||||
, tstamp/0
|
, tstamp/0
|
||||||
, expiry_time/1
|
, expiry_time/1
|
||||||
, has_expired/1
|
, has_expired/1
|
||||||
, ensure_expiry/1 ]).
|
, ensure_expiry/1
|
||||||
|
|
||||||
|
, bloom_type/1
|
||||||
|
, bloom_new/2
|
||||||
|
, bloom_to_bin/1
|
||||||
|
, bin_to_bloom/1
|
||||||
|
, bin_to_bloom/2
|
||||||
|
, bloom_insert/2
|
||||||
|
, bloom_contains/2
|
||||||
|
]).
|
||||||
|
|
||||||
-include("src/hanoidb.hrl").
|
-include("src/hanoidb.hrl").
|
||||||
|
|
||||||
|
@ -265,4 +274,43 @@ ensure_expiry(Opts) ->
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
bloom_type({ebloom, _}) ->
|
||||||
|
ebloom;
|
||||||
|
bloom_type({sbloom, _}) ->
|
||||||
|
sbloom.
|
||||||
|
|
||||||
|
bloom_new(Size, sbloom) ->
|
||||||
|
{ok, {sbloom, hanoidb_bloom:bloom(Size, 0.01)}};
|
||||||
|
bloom_new(Size, ebloom) ->
|
||||||
|
{ok, Bloom} = ebloom:new(Size, 0.01, Size),
|
||||||
|
{ok, {ebloom, Bloom}}.
|
||||||
|
|
||||||
|
bloom_to_bin({sbloom, Bloom}) ->
|
||||||
|
hanoidb_bloom:encode(Bloom);
|
||||||
|
bloom_to_bin({ebloom, Bloom}) ->
|
||||||
|
ebloom:serialize(Bloom).
|
||||||
|
|
||||||
|
bin_to_bloom(GZiped = <<16#1F, 16#8B, _/binary>>) ->
|
||||||
|
bin_to_bloom(GZiped, sbloom);
|
||||||
|
bin_to_bloom(TermBin = <<131, _/binary>>) ->
|
||||||
|
erlang:term_to_binary(TermBin);
|
||||||
|
bin_to_bloom(Blob) ->
|
||||||
|
bin_to_bloom(Blob, ebloom).
|
||||||
|
|
||||||
|
bin_to_bloom(Binary, sbloom) ->
|
||||||
|
{ok, {sbloom, hanoidb_bloom:decode(Binary)}};
|
||||||
|
bin_to_bloom(Binary, ebloom) ->
|
||||||
|
{ok, Bloom} = ebloom:deserialize(Binary),
|
||||||
|
{ok, {ebloom, Bloom}}.
|
||||||
|
|
||||||
|
bloom_insert({sbloom, Bloom}, Key) ->
|
||||||
|
{ok, {sbloom, hanoidb_bloom:add(Key, Bloom)}};
|
||||||
|
bloom_insert({ebloom, Bloom}, Key) ->
|
||||||
|
ok = ebloom:insert(Bloom, Key),
|
||||||
|
{ok, {ebloom, Bloom}}.
|
||||||
|
|
||||||
|
bloom_contains({sbloom, Bloom}, Key) ->
|
||||||
|
hanoidb_bloom:member(Key, Bloom);
|
||||||
|
bloom_contains({ebloom, Bloom}, Key) ->
|
||||||
|
ebloom:contains(Bloom, Key).
|
||||||
|
|
||||||
|
|
|
@ -55,9 +55,9 @@
|
||||||
|
|
||||||
name :: string(),
|
name :: string(),
|
||||||
|
|
||||||
bloom :: term(),
|
bloom :: {ebloom, term()} | {sbloom, term()},
|
||||||
block_size = ?NODE_SIZE :: integer(),
|
block_size = ?NODE_SIZE :: integer(),
|
||||||
compress = none :: none | snappy | gzip, % | lz4,
|
compress = none :: none | snappy | gzip | lz4,
|
||||||
opts = [] :: list(any()),
|
opts = [] :: list(any()),
|
||||||
|
|
||||||
value_count = 0 :: integer(),
|
value_count = 0 :: integer(),
|
||||||
|
@ -170,11 +170,11 @@ serialize(#state{ bloom=Bloom, index_file=File, index_file_pos=Position }=State)
|
||||||
exit({bad_position, Position, WrongPosition})
|
exit({bad_position, Position, WrongPosition})
|
||||||
end,
|
end,
|
||||||
ok = file:close(File),
|
ok = file:close(File),
|
||||||
erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom) } ).
|
erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_type(Bloom) } ).
|
||||||
|
|
||||||
deserialize(Binary) ->
|
deserialize(Binary) ->
|
||||||
{State, Bin} = erlang:binary_to_term(Binary),
|
{State, Bin, Type} = erlang:binary_to_term(Binary),
|
||||||
{ok, Bloom} = ?BIN_TO_BLOOM(Bin),
|
{ok, Bloom} = ?BIN_TO_BLOOM(Bin, Type),
|
||||||
{ok, IdxFile} = do_open(State#state.name, State#state.opts, []),
|
{ok, IdxFile} = do_open(State#state.name, State#state.opts, []),
|
||||||
State#state{ bloom=Bloom, index_file=IdxFile }.
|
State#state{ bloom=Bloom, index_file=IdxFile }.
|
||||||
|
|
||||||
|
@ -200,7 +200,7 @@ archive_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastN
|
||||||
_ ->
|
_ ->
|
||||||
LastNodePos
|
LastNodePos
|
||||||
end,
|
end,
|
||||||
Trailer = << 0:32/unsigned, BloomBin/binary, BloomSize:32/unsigned, RootPos:64/unsigned >>,
|
Trailer = [ << 0:32/unsigned>> , BloomBin, << BloomSize:32/unsigned, RootPos:64/unsigned >> ],
|
||||||
|
|
||||||
ok = file:write(IdxFile, Trailer),
|
ok = file:write(IdxFile, Trailer),
|
||||||
ok = file:datasync(IdxFile),
|
ok = file:datasync(IdxFile),
|
||||||
|
|
Loading…
Reference in a new issue