Use ebloom by default

This commit changes the default bloom filter to
be basho’s bloom, which is significantly more
stable w.r.t. performance.  The code can still
read the old bloom filters; new files are written
with the new filters.

The default is controlled in src/hanoidb.hrl
using the USE_EBLOOM macro.
This commit is contained in:
Kresten Krab Thorup 2014-11-28 16:15:20 +01:00
parent a1bbadfb34
commit f4feca27e5
5 changed files with 66 additions and 25 deletions

View file

@ -4,7 +4,7 @@
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}. {eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
{erl_opts, [%{d,'DEBUG',true}, {erl_opts, [%{d,'DEBUG',true},
{d,'USE_SCALABLE_BLOOM',true}, {d,'USE_EBLOOM',true},
{parse_transform, lager_transform}, {parse_transform, lager_transform},
fail_on_warning, fail_on_warning,
warn_unused_vars, warn_unused_vars,

View file

@ -70,22 +70,15 @@
| value() | value()
| filepos(). | filepos().
-ifdef(USE_EBLOOM).
-ifdef(USE_SCALABLE_BLOOM). -define(HANOI_BLOOM_TYPE, ebloom).
-define(BLOOM_NEW(Size), {ok, hanoidb_bloom:bloom(Size, 0.01)}).
-define(BLOOM_TO_BIN(Bloom), hanoidb_bloom:encode(Bloom)). %% -> Binary
-define(BIN_TO_BLOOM(Bin), {ok, hanoidb_bloom:decode(Bin)}).
-define(BLOOM_INSERT(Bloom, Key), {ok, hanoidb_bloom:add(Key,Bloom)}).
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_bloom:member(Key, Bloom)). %% -> 'true' | 'false'
-else. -else.
-define(HANOI_BLOOM_TYPE, sbloom).
-define(BLOOM_NEW(Size), begin ebloom:new(Size, 0.01, Size) end).
-define(BLOOM_TO_BIN(Bloom), begin ebloom:serialize(Bloom) end). %% -> Binary
-define(BIN_TO_BLOOM(Bin), begin ebloom:deserialize(Bin) end).
-define(BLOOM_INSERT(Bloom, Key), begin ok=ebloom:insert(Bloom, Key), {ok, Bloom} end).
-define(BLOOM_CONTAINS(Bloom, Key), begin ebloom:member(Bloom, Key) end). %% -> 'true' | 'false'
-endif. -endif.
-define(BLOOM_NEW(Size), hanoidb_util:bloom_new(Size, ?HANOI_BLOOM_TYPE)).
-define(BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_to_bin(Bloom)).
-define(BIN_TO_BLOOM(Bin, Fmt), hanoidb_util:bin_to_bloom(Bin, Fmt)).
-define(BLOOM_INSERT(Bloom, Key), hanoidb_util:bloom_insert(Bloom, Key)).
-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_util:bloom_contains(Bloom, Key)).

View file

@ -85,7 +85,7 @@ open(Name, Config) ->
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size - 8, 8), {ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size - 8, 8),
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size - 12, 4), {ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size - 12, 4),
{ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize), {ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize),
{ok, Bloom} = ?BIN_TO_BLOOM(BloomData), {ok, Bloom} = hanoidb_util:bin_to_bloom(BloomData),
%% read in the root node %% read in the root node
Root = Root =

View file

@ -38,7 +38,16 @@
, tstamp/0 , tstamp/0
, expiry_time/1 , expiry_time/1
, has_expired/1 , has_expired/1
, ensure_expiry/1 ]). , ensure_expiry/1
, bloom_type/1
, bloom_new/2
, bloom_to_bin/1
, bin_to_bloom/1
, bin_to_bloom/2
, bloom_insert/2
, bloom_contains/2
]).
-include("src/hanoidb.hrl"). -include("src/hanoidb.hrl").
@ -265,4 +274,43 @@ ensure_expiry(Opts) ->
ok ok
end. end.
bloom_type({ebloom, _}) ->
ebloom;
bloom_type({sbloom, _}) ->
sbloom.
bloom_new(Size, sbloom) ->
{ok, {sbloom, hanoidb_bloom:bloom(Size, 0.01)}};
bloom_new(Size, ebloom) ->
{ok, Bloom} = ebloom:new(Size, 0.01, Size),
{ok, {ebloom, Bloom}}.
bloom_to_bin({sbloom, Bloom}) ->
hanoidb_bloom:encode(Bloom);
bloom_to_bin({ebloom, Bloom}) ->
ebloom:serialize(Bloom).
bin_to_bloom(GZiped = <<16#1F, 16#8B, _/binary>>) ->
bin_to_bloom(GZiped, sbloom);
bin_to_bloom(TermBin = <<131, _/binary>>) ->
erlang:term_to_binary(TermBin);
bin_to_bloom(Blob) ->
bin_to_bloom(Blob, ebloom).
bin_to_bloom(Binary, sbloom) ->
{ok, {sbloom, hanoidb_bloom:decode(Binary)}};
bin_to_bloom(Binary, ebloom) ->
{ok, Bloom} = ebloom:deserialize(Binary),
{ok, {ebloom, Bloom}}.
bloom_insert({sbloom, Bloom}, Key) ->
{ok, {sbloom, hanoidb_bloom:add(Key, Bloom)}};
bloom_insert({ebloom, Bloom}, Key) ->
ok = ebloom:insert(Bloom, Key),
{ok, {ebloom, Bloom}}.
bloom_contains({sbloom, Bloom}, Key) ->
hanoidb_bloom:member(Key, Bloom);
bloom_contains({ebloom, Bloom}, Key) ->
ebloom:contains(Bloom, Key).

View file

@ -55,9 +55,9 @@
name :: string(), name :: string(),
bloom :: term(), bloom :: {ebloom, term()} | {sbloom, term()},
block_size = ?NODE_SIZE :: integer(), block_size = ?NODE_SIZE :: integer(),
compress = none :: none | snappy | gzip, % | lz4, compress = none :: none | snappy | gzip | lz4,
opts = [] :: list(any()), opts = [] :: list(any()),
value_count = 0 :: integer(), value_count = 0 :: integer(),
@ -170,11 +170,11 @@ serialize(#state{ bloom=Bloom, index_file=File, index_file_pos=Position }=State)
exit({bad_position, Position, WrongPosition}) exit({bad_position, Position, WrongPosition})
end, end,
ok = file:close(File), ok = file:close(File),
erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom) } ). erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_type(Bloom) } ).
deserialize(Binary) -> deserialize(Binary) ->
{State, Bin} = erlang:binary_to_term(Binary), {State, Bin, Type} = erlang:binary_to_term(Binary),
{ok, Bloom} = ?BIN_TO_BLOOM(Bin), {ok, Bloom} = ?BIN_TO_BLOOM(Bin, Type),
{ok, IdxFile} = do_open(State#state.name, State#state.opts, []), {ok, IdxFile} = do_open(State#state.name, State#state.opts, []),
State#state{ bloom=Bloom, index_file=IdxFile }. State#state{ bloom=Bloom, index_file=IdxFile }.
@ -200,7 +200,7 @@ archive_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastN
_ -> _ ->
LastNodePos LastNodePos
end, end,
Trailer = << 0:32/unsigned, BloomBin/binary, BloomSize:32/unsigned, RootPos:64/unsigned >>, Trailer = [ << 0:32/unsigned>> , BloomBin, << BloomSize:32/unsigned, RootPos:64/unsigned >> ],
ok = file:write(IdxFile, Trailer), ok = file:write(IdxFile, Trailer),
ok = file:datasync(IdxFile), ok = file:datasync(IdxFile),