From f4feca27e579dbbbb9b8a1530d66a106a555e0cd Mon Sep 17 00:00:00 2001 From: Kresten Krab Thorup Date: Fri, 28 Nov 2014 16:15:20 +0100 Subject: [PATCH] Use ebloom by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit changes the default bloom filter to be basho’s bloom, which is significantly more stable w.r.t. performance. The code can still read the old bloom filters; new files are written with the new filters. The default is controlled in src/hanoidb.hrl using the USE_EBLOOM macro. --- rebar.config | 2 +- src/hanoidb.hrl | 25 ++++++++------------- src/hanoidb_reader.erl | 2 +- src/hanoidb_util.erl | 50 +++++++++++++++++++++++++++++++++++++++++- src/hanoidb_writer.erl | 12 +++++----- 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/rebar.config b/rebar.config index 8479f49..1997063 100644 --- a/rebar.config +++ b/rebar.config @@ -4,7 +4,7 @@ {eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}. {erl_opts, [%{d,'DEBUG',true}, - {d,'USE_SCALABLE_BLOOM',true}, + {d,'USE_EBLOOM',true}, {parse_transform, lager_transform}, fail_on_warning, warn_unused_vars, diff --git a/src/hanoidb.hrl b/src/hanoidb.hrl index 57d364e..72cf08c 100644 --- a/src/hanoidb.hrl +++ b/src/hanoidb.hrl @@ -70,22 +70,15 @@ | value() | filepos(). - --ifdef(USE_SCALABLE_BLOOM). - --define(BLOOM_NEW(Size), {ok, hanoidb_bloom:bloom(Size, 0.01)}). --define(BLOOM_TO_BIN(Bloom), hanoidb_bloom:encode(Bloom)). %% -> Binary --define(BIN_TO_BLOOM(Bin), {ok, hanoidb_bloom:decode(Bin)}). --define(BLOOM_INSERT(Bloom, Key), {ok, hanoidb_bloom:add(Key,Bloom)}). --define(BLOOM_CONTAINS(Bloom, Key), hanoidb_bloom:member(Key, Bloom)). %% -> 'true' | 'false' - +-ifdef(USE_EBLOOM). +-define(HANOI_BLOOM_TYPE, ebloom). -else. - --define(BLOOM_NEW(Size), begin ebloom:new(Size, 0.01, Size) end). --define(BLOOM_TO_BIN(Bloom), begin ebloom:serialize(Bloom) end). %% -> Binary --define(BIN_TO_BLOOM(Bin), begin ebloom:deserialize(Bin) end). --define(BLOOM_INSERT(Bloom, Key), begin ok=ebloom:insert(Bloom, Key), {ok, Bloom} end). --define(BLOOM_CONTAINS(Bloom, Key), begin ebloom:member(Bloom, Key) end). %% -> 'true' | 'false' - +-define(HANOI_BLOOM_TYPE, sbloom). -endif. +-define(BLOOM_NEW(Size), hanoidb_util:bloom_new(Size, ?HANOI_BLOOM_TYPE)). +-define(BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_to_bin(Bloom)). +-define(BIN_TO_BLOOM(Bin, Fmt), hanoidb_util:bin_to_bloom(Bin, Fmt)). +-define(BLOOM_INSERT(Bloom, Key), hanoidb_util:bloom_insert(Bloom, Key)). +-define(BLOOM_CONTAINS(Bloom, Key), hanoidb_util:bloom_contains(Bloom, Key)). + diff --git a/src/hanoidb_reader.erl b/src/hanoidb_reader.erl index ced9e7b..940df57 100644 --- a/src/hanoidb_reader.erl +++ b/src/hanoidb_reader.erl @@ -85,7 +85,7 @@ open(Name, Config) -> {ok, <>} = file:pread(File, FileInfo#file_info.size - 8, 8), {ok, <>} = file:pread(File, FileInfo#file_info.size - 12, 4), {ok, BloomData} = file:pread(File, (FileInfo#file_info.size - 12 - BloomSize), BloomSize), - {ok, Bloom} = ?BIN_TO_BLOOM(BloomData), + {ok, Bloom} = hanoidb_util:bin_to_bloom(BloomData), %% read in the root node Root = diff --git a/src/hanoidb_util.erl b/src/hanoidb_util.erl index 47c7a9a..f327784 100644 --- a/src/hanoidb_util.erl +++ b/src/hanoidb_util.erl @@ -38,7 +38,16 @@ , tstamp/0 , expiry_time/1 , has_expired/1 - , ensure_expiry/1 ]). + , ensure_expiry/1 + + , bloom_type/1 + , bloom_new/2 + , bloom_to_bin/1 + , bin_to_bloom/1 + , bin_to_bloom/2 + , bloom_insert/2 + , bloom_contains/2 + ]). -include("src/hanoidb.hrl"). @@ -265,4 +274,43 @@ ensure_expiry(Opts) -> ok end. +bloom_type({ebloom, _}) -> + ebloom; +bloom_type({sbloom, _}) -> + sbloom. + +bloom_new(Size, sbloom) -> + {ok, {sbloom, hanoidb_bloom:bloom(Size, 0.01)}}; +bloom_new(Size, ebloom) -> + {ok, Bloom} = ebloom:new(Size, 0.01, Size), + {ok, {ebloom, Bloom}}. + +bloom_to_bin({sbloom, Bloom}) -> + hanoidb_bloom:encode(Bloom); +bloom_to_bin({ebloom, Bloom}) -> + ebloom:serialize(Bloom). + +bin_to_bloom(GZiped = <<16#1F, 16#8B, _/binary>>) -> + bin_to_bloom(GZiped, sbloom); +bin_to_bloom(TermBin = <<131, _/binary>>) -> + erlang:term_to_binary(TermBin); +bin_to_bloom(Blob) -> + bin_to_bloom(Blob, ebloom). + +bin_to_bloom(Binary, sbloom) -> + {ok, {sbloom, hanoidb_bloom:decode(Binary)}}; +bin_to_bloom(Binary, ebloom) -> + {ok, Bloom} = ebloom:deserialize(Binary), + {ok, {ebloom, Bloom}}. + +bloom_insert({sbloom, Bloom}, Key) -> + {ok, {sbloom, hanoidb_bloom:add(Key, Bloom)}}; +bloom_insert({ebloom, Bloom}, Key) -> + ok = ebloom:insert(Bloom, Key), + {ok, {ebloom, Bloom}}. + +bloom_contains({sbloom, Bloom}, Key) -> + hanoidb_bloom:member(Key, Bloom); +bloom_contains({ebloom, Bloom}, Key) -> + ebloom:contains(Bloom, Key). diff --git a/src/hanoidb_writer.erl b/src/hanoidb_writer.erl index 716718a..bfcf235 100644 --- a/src/hanoidb_writer.erl +++ b/src/hanoidb_writer.erl @@ -55,9 +55,9 @@ name :: string(), - bloom :: term(), + bloom :: {ebloom, term()} | {sbloom, term()}, block_size = ?NODE_SIZE :: integer(), - compress = none :: none | snappy | gzip, % | lz4, + compress = none :: none | snappy | gzip | lz4, opts = [] :: list(any()), value_count = 0 :: integer(), @@ -170,11 +170,11 @@ serialize(#state{ bloom=Bloom, index_file=File, index_file_pos=Position }=State) exit({bad_position, Position, WrongPosition}) end, ok = file:close(File), - erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom) } ). + erlang:term_to_binary( { State#state{ index_file=undefined, bloom=undefined }, ?BLOOM_TO_BIN(Bloom), hanoidb_util:bloom_type(Bloom) } ). deserialize(Binary) -> - {State, Bin} = erlang:binary_to_term(Binary), - {ok, Bloom} = ?BIN_TO_BLOOM(Bin), + {State, Bin, Type} = erlang:binary_to_term(Binary), + {ok, Bloom} = ?BIN_TO_BLOOM(Bin, Type), {ok, IdxFile} = do_open(State#state.name, State#state.opts, []), State#state{ bloom=Bloom, index_file=IdxFile }. @@ -200,7 +200,7 @@ archive_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastN _ -> LastNodePos end, - Trailer = << 0:32/unsigned, BloomBin/binary, BloomSize:32/unsigned, RootPos:64/unsigned >>, + Trailer = [ << 0:32/unsigned>> , BloomBin, << BloomSize:32/unsigned, RootPos:64/unsigned >> ], ok = file:write(IdxFile, Trailer), ok = file:datasync(IdxFile),