Store child-refs as {Pos,Size} so we can pread
This allows us to use file:pread to read a child-node, rather than two separate reads (one for node block size, and then one for the node block itself). Also, encode the level# in node header, so that scanning leafs doesn't need to decode the node contents for inner nodes.
This commit is contained in:
parent
2f985d8576
commit
b21e253324
3 changed files with 42 additions and 21 deletions
|
@ -37,7 +37,7 @@ fold0(File,Fun,_InnerNode,Acc0) ->
|
||||||
fold1(File,Fun,Acc0).
|
fold1(File,Fun,Acc0).
|
||||||
|
|
||||||
fold1(File,Fun,Acc0) ->
|
fold1(File,Fun,Acc0) ->
|
||||||
case read_node(File) of
|
case read_leaf_node(File) of
|
||||||
eof ->
|
eof ->
|
||||||
Acc0;
|
Acc0;
|
||||||
{ok, Node} ->
|
{ok, Node} ->
|
||||||
|
@ -50,12 +50,12 @@ first_node(#index{file=File}) ->
|
||||||
{node, Members}
|
{node, Members}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
next_node(#index{file=File}=Index) ->
|
next_node(#index{file=File}=_Index) ->
|
||||||
case read_node(File) of
|
case read_leaf_node(File) of
|
||||||
{ok, #node{level=0, members=Members}} ->
|
{ok, #node{level=0, members=Members}} ->
|
||||||
{node, Members};
|
{node, Members};
|
||||||
{ok, #node{level=N}} when N>0 ->
|
% {ok, #node{level=N}} when N>0 ->
|
||||||
next_node(Index);
|
% next_node(Index);
|
||||||
eof ->
|
eof ->
|
||||||
end_of_data
|
end_of_data
|
||||||
end.
|
end.
|
||||||
|
@ -82,8 +82,8 @@ lookup_in_node(_File,#node{level=0,members=Members},Key) ->
|
||||||
|
|
||||||
lookup_in_node(File,#node{members=Members},Key) ->
|
lookup_in_node(File,#node{members=Members},Key) ->
|
||||||
case find(Key, Members) of
|
case find(Key, Members) of
|
||||||
{ok, Pos} ->
|
{ok, {Pos,Size}} ->
|
||||||
{ok, Node} = read_node(File, Pos),
|
{ok, Node} = read_node(File, {Pos,Size}),
|
||||||
lookup_in_node(File, Node, Key);
|
lookup_in_node(File, Node, Key);
|
||||||
notfound ->
|
notfound ->
|
||||||
notfound
|
notfound
|
||||||
|
@ -100,21 +100,37 @@ find(_, _) ->
|
||||||
notfound.
|
notfound.
|
||||||
|
|
||||||
|
|
||||||
|
read_node(File,{Pos,Size}) ->
|
||||||
|
{ok, <<_:32, Level:16/unsigned, Data/binary>>} = file:pread(File, Pos, Size),
|
||||||
|
fractal_btree_util:decode_index_node(Level, Data);
|
||||||
|
|
||||||
read_node(File,Pos) ->
|
read_node(File,Pos) ->
|
||||||
|
|
||||||
{ok, Pos} = file:position(File, Pos),
|
{ok, Pos} = file:position(File, Pos),
|
||||||
Result = read_node(File),
|
Result = read_node(File),
|
||||||
% error_logger:info_msg("decoded ~p ~p~n", [Pos, Result]),
|
% error_logger:info_msg("decoded ~p ~p~n", [Pos, Result]),
|
||||||
Result.
|
Result.
|
||||||
|
|
||||||
read_node(File) ->
|
read_node(File) ->
|
||||||
{ok, <<Len:32>>} = file:read(File, 4),
|
{ok, <<Len:32, Level:16/unsigned>>} = file:read(File, 6),
|
||||||
case Len of
|
case Len of
|
||||||
0 -> eof;
|
0 -> eof;
|
||||||
_ ->
|
_ ->
|
||||||
{ok, Data} = file:read(File, Len),
|
{ok, Data} = file:read(File, Len-2),
|
||||||
{ok, Node} = fractal_btree_util:decode_index_node(Data),
|
{ok, Node} = fractal_btree_util:decode_index_node(Level, Data),
|
||||||
{ok, Node}
|
{ok, Node}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
read_leaf_node(File) ->
|
||||||
|
case file:read(File, 6) of
|
||||||
|
{ok, <<0:32, _:16>>} ->
|
||||||
|
eof;
|
||||||
|
{ok, <<Len:32, 0:16>>} ->
|
||||||
|
{ok, Data} = file:read(File, Len-2),
|
||||||
|
fractal_btree_util:decode_index_node(0, Data);
|
||||||
|
{ok, <<Len:32, _:16>>} ->
|
||||||
|
{ok, _} = file:position(File, {cur,Len-2}),
|
||||||
|
read_leaf_node(File)
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
|
@ -15,17 +15,19 @@ estimate_node_size_increment(_KVList,Key,Value) ->
|
||||||
is_binary(Value) ->
|
is_binary(Value) ->
|
||||||
5 + byte_size(Value);
|
5 + byte_size(Value);
|
||||||
is_atom(Value) ->
|
is_atom(Value) ->
|
||||||
8
|
8;
|
||||||
|
is_tuple(Value) ->
|
||||||
|
13
|
||||||
end.
|
end.
|
||||||
|
|
||||||
encode_index_node(Level, KVList) ->
|
encode_index_node(Level, KVList) ->
|
||||||
Data = %zlib:zip(
|
Data = %zlib:zip(
|
||||||
erlang:term_to_binary({Level, KVList})
|
erlang:term_to_binary(KVList)
|
||||||
% )
|
% )
|
||||||
,
|
,
|
||||||
Size = byte_size(Data),
|
Size = byte_size(Data)+2,
|
||||||
{ok, Size+4, [ <<Size:32>> | Data ] }.
|
{ok, Size+4, [ <<Size:32/unsigned, Level:16/unsigned>> | Data ] }.
|
||||||
|
|
||||||
decode_index_node(Data) ->
|
decode_index_node(Level, <<Data/binary>>) ->
|
||||||
{Level,KVList} = erlang:binary_to_term(Data), %zlib:unzip(Data)),
|
KVList = erlang:binary_to_term(Data), %zlib:unzip(Data)),
|
||||||
{ok, {node, Level, KVList}}.
|
{ok, {node, Level, KVList}}.
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
index_file_pos,
|
index_file_pos,
|
||||||
|
|
||||||
last_node_pos :: pos_integer(),
|
last_node_pos :: pos_integer(),
|
||||||
|
last_node_size :: pos_integer(),
|
||||||
|
|
||||||
nodes = [] :: [ #node{} ],
|
nodes = [] :: [ #node{} ],
|
||||||
|
|
||||||
|
@ -97,12 +98,12 @@ code_change(_OldVsn, State, _Extra) ->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, bloom=Ref }=State) ->
|
flush_nodes(#state{ nodes=[], last_node_pos=LastNodePos, last_node_size=LastNodeSize, bloom=Ref }=State) ->
|
||||||
|
|
||||||
Bloom = zlib:zip(ebloom:serialize(Ref)),
|
Bloom = zlib:zip(ebloom:serialize(Ref)),
|
||||||
BloomSize = byte_size(Bloom),
|
BloomSize = byte_size(Bloom),
|
||||||
|
|
||||||
Trailer = << 0:32, Bloom/binary, BloomSize:32/unsigned, LastNodePos:64/unsigned >>,
|
Trailer = << 0:32, Bloom/binary, BloomSize:32/unsigned, LastNodePos:64/unsigned >>,
|
||||||
IdxFile = State#state.index_file,
|
IdxFile = State#state.index_file,
|
||||||
|
|
||||||
ok = file:write(IdxFile, Trailer),
|
ok = file:write(IdxFile, Trailer),
|
||||||
|
@ -155,7 +156,9 @@ close_node(#state{nodes=[#node{ level=Level, members=NodeMembers }|RestNodes]} =
|
||||||
ok = file:write(State#state.index_file, Data),
|
ok = file:write(State#state.index_file, Data),
|
||||||
|
|
||||||
{FirstKey, _} = hd(OrderedMembers),
|
{FirstKey, _} = hd(OrderedMembers),
|
||||||
add_record(Level+1, FirstKey, NodePos,
|
add_record(Level+1, FirstKey, {NodePos, DataSize},
|
||||||
State#state{ nodes = RestNodes,
|
State#state{ nodes = RestNodes,
|
||||||
index_file_pos = NodePos + DataSize,
|
index_file_pos = NodePos + DataSize,
|
||||||
last_node_pos = NodePos}).
|
last_node_pos = NodePos,
|
||||||
|
last_node_size = DataSize
|
||||||
|
}).
|
||||||
|
|
Loading…
Reference in a new issue