Store child-refs as {Pos,Size} so we can pread

This allows us to use file:pread to read a
child-node, rather than two separate reads
(one for node block size, and then one for 
the node block itself).

Also, encode the level# in node header, so that
scanning leafs doesn't need to decode the
node contents for inner nodes.
This commit is contained in:
Kresten Krab Thorup 2012-01-06 00:29:05 +01:00
parent 2f985d8576
commit b21e253324
3 changed files with 42 additions and 21 deletions

View file

@ -37,7 +37,7 @@ fold0(File,Fun,_InnerNode,Acc0) ->
fold1(File,Fun,Acc0). fold1(File,Fun,Acc0).
fold1(File,Fun,Acc0) -> fold1(File,Fun,Acc0) ->
case read_node(File) of case read_leaf_node(File) of
eof -> eof ->
Acc0; Acc0;
{ok, Node} -> {ok, Node} ->
@ -50,12 +50,12 @@ first_node(#index{file=File}) ->
{node, Members} {node, Members}
end. end.
next_node(#index{file=File}=Index) -> next_node(#index{file=File}=_Index) ->
case read_node(File) of case read_leaf_node(File) of
{ok, #node{level=0, members=Members}} -> {ok, #node{level=0, members=Members}} ->
{node, Members}; {node, Members};
{ok, #node{level=N}} when N>0 -> % {ok, #node{level=N}} when N>0 ->
next_node(Index); % next_node(Index);
eof -> eof ->
end_of_data end_of_data
end. end.
@ -82,8 +82,8 @@ lookup_in_node(_File,#node{level=0,members=Members},Key) ->
lookup_in_node(File,#node{members=Members},Key) -> lookup_in_node(File,#node{members=Members},Key) ->
case find(Key, Members) of case find(Key, Members) of
{ok, Pos} -> {ok, {Pos,Size}} ->
{ok, Node} = read_node(File, Pos), {ok, Node} = read_node(File, {Pos,Size}),
lookup_in_node(File, Node, Key); lookup_in_node(File, Node, Key);
notfound -> notfound ->
notfound notfound
@ -100,21 +100,37 @@ find(_, _) ->
notfound. notfound.
read_node(File,{Pos,Size}) ->
{ok, <<_:32, Level:16/unsigned, Data/binary>>} = file:pread(File, Pos, Size),
fractal_btree_util:decode_index_node(Level, Data);
read_node(File,Pos) -> read_node(File,Pos) ->
{ok, Pos} = file:position(File, Pos), {ok, Pos} = file:position(File, Pos),
Result = read_node(File), Result = read_node(File),
% error_logger:info_msg("decoded ~p ~p~n", [Pos, Result]), % error_logger:info_msg("decoded ~p ~p~n", [Pos, Result]),
Result. Result.
read_node(File) -> read_node(File) ->
{ok, <<Len:32>>} = file:read(File, 4), {ok, <<Len:32, Level:16/unsigned>>} = file:read(File, 6),
case Len of case Len of
0 -> eof; 0 -> eof;
_ -> _ ->
{ok, Data} = file:read(File, Len), {ok, Data} = file:read(File, Len-2),
{ok, Node} = fractal_btree_util:decode_index_node(Data), {ok, Node} = fractal_btree_util:decode_index_node(Level, Data),
{ok, Node} {ok, Node}
end. end.
read_leaf_node(File) ->
case file:read(File, 6) of
{ok, <<0:32, _:16>>} ->
eof;
{ok, <<Len:32, 0:16>>} ->
{ok, Data} = file:read(File, Len-2),
fractal_btree_util:decode_index_node(0, Data);
{ok, <<Len:32, _:16>>} ->
{ok, _} = file:position(File, {cur,Len-2}),
read_leaf_node(File)
end.

View file

@ -15,17 +15,19 @@ estimate_node_size_increment(_KVList,Key,Value) ->
is_binary(Value) -> is_binary(Value) ->
5 + byte_size(Value); 5 + byte_size(Value);
is_atom(Value) -> is_atom(Value) ->
8 8;
is_tuple(Value) ->
13
end. end.
encode_index_node(Level, KVList) -> encode_index_node(Level, KVList) ->
Data = %zlib:zip( Data = %zlib:zip(
erlang:term_to_binary({Level, KVList}) erlang:term_to_binary(KVList)
% ) % )
, ,
Size = byte_size(Data), Size = byte_size(Data)+2,
{ok, Size+4, [ <<Size:32>> | Data ] }. {ok, Size+4, [ <<Size:32/unsigned, Level:16/unsigned>> | Data ] }.
decode_index_node(Data) -> decode_index_node(Level, <<Data/binary>>) ->
{Level,KVList} = erlang:binary_to_term(Data), %zlib:unzip(Data)), KVList = erlang:binary_to_term(Data), %zlib:unzip(Data)),
{ok, {node, Level, KVList}}. {ok, {node, Level, KVList}}.

View file

@ -22,6 +22,7 @@
index_file_pos, index_file_pos,
last_node_pos :: pos_integer(), last_node_pos :: pos_integer(),
last_node_size :: pos_integer(),
nodes = [] :: [ #node{} ], nodes = [] :: [ #node{} ],
@ -97,12 +98,12 @@ code_change(_OldVsn, State, _Extra) ->
flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, bloom=Ref }=State) -> flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=LastNodeSize, bloom=Ref }=State) ->
Bloom = zlib:zip(ebloom:serialize(Ref)), Bloom = zlib:zip(ebloom:serialize(Ref)),
BloomSize = byte_size(Bloom), BloomSize = byte_size(Bloom),
Trailer = << 0:32, Bloom/binary, BloomSize:32/unsigned, LastNodePos:64/unsigned >>, Trailer = << 0:32, Bloom/binary, BloomSize:32/unsigned, LastNodePos:64/unsigned >>,
IdxFile = State#state.index_file, IdxFile = State#state.index_file,
ok = file:write(IdxFile, Trailer), ok = file:write(IdxFile, Trailer),
@ -155,7 +156,9 @@ close_node(#state{nodes=[#node{ level=Level, members=NodeMembers }|RestNodes]} =
ok = file:write(State#state.index_file, Data), ok = file:write(State#state.index_file, Data),
{FirstKey, _} = hd(OrderedMembers), {FirstKey, _} = hd(OrderedMembers),
add_record(Level+1, FirstKey, NodePos, add_record(Level+1, FirstKey, {NodePos, DataSize},
State#state{ nodes = RestNodes, State#state{ nodes = RestNodes,
index_file_pos = NodePos + DataSize, index_file_pos = NodePos + DataSize,
last_node_pos = NodePos}). last_node_pos = NodePos,
last_node_size = DataSize
}).