From 682191ce069f357c081b1e753ca6a0fc3df28043 Mon Sep 17 00:00:00 2001 From: Kresten Krab Thorup Date: Sat, 28 Apr 2012 18:35:35 +0200 Subject: [PATCH] Tree writing code was broken In some cases, inner nodes were not being emitted. This some times would cause queries (get / range_fold) to only include results in a right-most branch. --- src/hanoi_writer.erl | 22 +++++++++++++--------- test/hanoi_writer_tests.erl | 29 ++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/hanoi_writer.erl b/src/hanoi_writer.erl index a02bea2..aa84f77 100644 --- a/src/hanoi_writer.erl +++ b/src/hanoi_writer.erl @@ -49,7 +49,7 @@ last_node_pos :: pos_integer(), last_node_size :: pos_integer(), - nodes = [] :: [ #node{} ], % B-tree stack + nodes = [] :: [ #node{} ], name :: string(), @@ -126,7 +126,7 @@ code_change(_OldVsn, State, _Extra) -> %%%%% INTERNAL FUNCTIONS - +% @doc flush pending nodes and write trailer flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastNodeSize, bloom=Ref }=State) -> @@ -144,7 +144,8 @@ flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=_LastNod {ok, State#state{ index_file=undefined }}; -flush_nodes(State=#state{ nodes=[#node{level=N, members=[_]}] }) when N>0 -> +%% stack consists of one node with one {pos,len} member. Just ignore this node. +flush_nodes(State=#state{ nodes=[#node{level=N, members=[{_,{Pos,_Len}}]}], last_node_pos=Pos }) when N>0 -> flush_nodes(State#state{ nodes=[] }); flush_nodes(State) -> @@ -172,16 +173,19 @@ add_record(Level, Key, Value, ok = ebloom:insert( State#state.bloom, Key ), NodeMembers = [{Key,Value} | List], + State2 = State#state{ nodes=[CurrNode#node{ members=NodeMembers, size=NewSize} | RestNodes] }, if - NewSize >= ?NODE_SIZE -> - close_node(State#state{ nodes=[CurrNode#node{ members=NodeMembers, size=NewSize} | RestNodes] }); + NewSize >= State#state.block_size -> + close_node(State2); true -> - {ok, State#state{ nodes=[ CurrNode#node{ members=NodeMembers, size=NewSize } | RestNodes ] }} + {ok, State2} end; -add_record(Level, Key, Value, #state{ nodes=Nodes }=State) -> - %% There is no top-of-stack node, or it is not at the level we wish to insert at. - add_record(Level, Key, Value, State#state{ nodes = [ #node{ level=Level, members=[] } | Nodes ] }). +add_record(Level, Key, Value, State=#state{ nodes=[] }) -> + add_record(Level, Key, Value, State#state{ nodes=[ #node{ level=Level } ] }); + +add_record(Level, Key, Value, State=#state{ nodes=[ #node{level=Level2 } |_]=Stack }) when Level < Level2 -> + add_record(Level, Key, Value, State#state{ nodes=[ #node{ level=(Level2-1) } | Stack] }). diff --git a/test/hanoi_writer_tests.erl b/test/hanoi_writer_tests.erl index 55cb0ca..d66e949 100644 --- a/test/hanoi_writer_tests.erl +++ b/test/hanoi_writer_tests.erl @@ -29,10 +29,13 @@ -include_lib("eunit/include/eunit.hrl"). -endif. +-include("include/hanoi.hrl"). + -compile(export_all). simple_test() -> + file:delete("testdata"), {ok, BT} = hanoi_writer:open("testdata"), ok = hanoi_writer:add(BT, <<"A">>, <<"Avalue">>), ok = hanoi_writer:add(BT, <<"B">>, <<"Bvalue">>), @@ -47,9 +50,10 @@ simple_test() -> simple1_test() -> - {ok, BT} = hanoi_writer:open("testdata"), + file:delete("testdata"), + {ok, BT} = hanoi_writer:open("testdata", [{block_size, 1024}]), - Max = 30*1024, + Max = 1024, Seq = lists:seq(0, Max), {Time1,_} = timer:tc( @@ -66,7 +70,8 @@ simple1_test() -> error_logger:info_msg("time to insert: ~p/sec~n", [1000000/(Time1/Max)]), {ok, IN} = hanoi_reader:open("testdata"), - {ok, <<"valuevalue/", 2048:128>>} = hanoi_reader:lookup(IN, <<2048:128>>), + Middle = Max div 2, + {ok, <<"valuevalue/", Middle:128>>} = hanoi_reader:lookup(IN, <>), {Time2,Count} = timer:tc( @@ -82,7 +87,21 @@ simple1_test() -> Max = Count-1, + {Time3,{done,Count2}} = timer:tc( + fun() -> hanoi_reader:range_fold(fun(Key, <<"valuevalue/", Key/binary>>, N) -> + N+1 + end, + 0, + IN, + #btree_range{ from_key= <<>>, to_key=undefined }) + end, + []), - ok = hanoi_reader:close(IN), + error_logger:info_msg("time to range_fold: ~p/sec~n", [1000000/(Time3/Max)]), + + error_logger:info_msg("count2=~p~n", [Count2]), + + Max = Count2-1, + + ok = hanoi_reader:close(IN). - ok = file:delete("testdata").