Implement hibernation for readers too

This enables all open files in a merge worker
to be closed while it is waiting for work to do.
This commit is contained in:
Kresten Krab Thorup 2012-05-01 02:12:02 +02:00
parent c998e8ca31
commit 01ea88b67c
4 changed files with 90 additions and 31 deletions

View file

@ -723,7 +723,7 @@ start_range_fold(FileName, WorkerPID, Range, State) ->
proc_lib:spawn( fun() ->
?log("start_range_fold ~p on ~p -> ~p", [self, FileName, WorkerPID]),
erlang:link(WorkerPID),
{ok, File} = hanoi_reader:open(FileName, [sequential|State#state.opts]),
{ok, File} = hanoi_reader:open(FileName, [folding|State#state.opts]),
do_range_fold(File, WorkerPID, self(), Range),
erlang:unlink(WorkerPID),
hanoi_reader:close(File),

View file

@ -33,6 +33,13 @@
-include("hanoi.hrl").
%% A merger which is inactive for this long will sleep
%% which means that it will close open files, and compress
%% current ebloom.
%%
-define(HIBERNATE_TIMEOUT, 5000).
%%
%% Most likely, there will be plenty of I/O being generated by
%% concurrent merges, so we default to running the entire merge
@ -77,7 +84,10 @@ hibernate_scan(Keep) ->
receive
{step, From, HowMany} ->
{BT1, BT2, OutBin, IsLastLevel, AKVs, BKVs, Count, N} = erlang:binary_to_term( zlib:gunzip( Keep ) ),
scan(BT1, BT2, hanoi_writer:deserialize(OutBin), IsLastLevel, AKVs, BKVs, Count, {N+HowMany, From})
scan(hanoi_reader:deserialize(BT1),
hanoi_reader:deserialize(BT2),
hanoi_writer:deserialize(OutBin),
IsLastLevel, AKVs, BKVs, Count, {N+HowMany, From})
end.
scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {N, FromPID}) when N < 1, AKVs =/= [], BKVs =/= [] ->
@ -91,10 +101,12 @@ scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {N, FromPID}) when N < 1, AK
receive
{step, From, HowMany} ->
scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {N+HowMany, From})
after 5000 ->
after ?HIBERNATE_TIMEOUT ->
case ?LOCAL_WRITER of
true ->
Args = {BT1, BT2, hanoi_writer:serialize(Out), IsLastLevel, AKVs, BKVs, Count, N},
Args = {hanoi_reader:serialize(BT1),
hanoi_reader:serialize(BT2),
hanoi_writer:serialize(Out), IsLastLevel, AKVs, BKVs, Count, N},
Keep = zlib:gzip ( erlang:term_to_binary( Args ) ),
hibernate_scan(Keep);
false ->
@ -161,7 +173,9 @@ hibernate_scan_only(Keep) ->
receive
{step, From, HowMany} ->
{BT, OutBin, IsLastLevel, KVs, Count, N} = erlang:binary_to_term( zlib:gunzip( Keep ) ),
scan_only(BT, hanoi_writer:deserialize(OutBin), IsLastLevel, KVs, Count, {N+HowMany, From})
scan_only(hanoi_reader:deserialize(BT),
hanoi_writer:deserialize(OutBin),
IsLastLevel, KVs, Count, {N+HowMany, From})
end.
@ -176,8 +190,9 @@ scan_only(BT, Out, IsLastLevel, KVs, Count, {N, FromPID}) when N < 1, KVs =/= []
receive
{step, From, HowMany} ->
scan_only(BT, Out, IsLastLevel, KVs, Count, {N+HowMany, From})
after 5000 ->
Args = {BT, hanoi_writer:serialize(Out), IsLastLevel, KVs, Count, N},
after ?HIBERNATE_TIMEOUT ->
Args = {hanoi_reader:serialize(BT),
hanoi_writer:serialize(Out), IsLastLevel, KVs, Count, N},
Keep = zlib:gzip ( erlang:term_to_binary( Args ) ),
hibernate_scan_only(Keep)
end;

View file

@ -32,9 +32,10 @@
-export([open/1, open/2,close/1,lookup/2,fold/3,range_fold/4]).
-export([first_node/1,next_node/1]).
-export([serialize/1, deserialize/1]).
-record(node, { level, members=[] }).
-record(index, {file, root, bloom}).
-record(index, {file, root, bloom, name, config=[]}).
-type read_file() :: #index{}.
@ -42,7 +43,7 @@
open(Name) ->
open(Name, [random]).
-type config() :: [sequential | random | {atom(), term()}].
-type config() :: [sequential | folding | random | {atom(), term()}].
-spec open(Name::string(), config()) -> read_file().
@ -50,24 +51,43 @@ open(Name, Config) ->
case proplists:get_bool(sequential, Config) of
true ->
ReadBufferSize = hanoi:get_opt(read_buffer_size, Config, 512 * 1024),
{ok, File} = file:open(Name, [raw,read,{read_ahead, ReadBufferSize},binary]);
{ok, File} = file:open(Name, [raw,read,{read_ahead, ReadBufferSize},binary]),
{ok, #index{file=File, name=Name, config=Config}};
false ->
{ok, File} = file:open(Name, [read,binary])
end,
case proplists:get_bool(folding, Config) of
true ->
ReadBufferSize = hanoi:get_opt(read_buffer_size, Config, 512 * 1024),
{ok, File} = file:open(Name, [read,{read_ahead, ReadBufferSize},binary]);
false ->
{ok, File} = file:open(Name, [read,binary])
end,
{ok, FileInfo} = file:read_file_info(Name),
{ok, FileInfo} = file:read_file_info(Name),
%% read root position
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size-8, 8),
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size-12, 4),
{ok, BloomData} = file:pread(File, FileInfo#file_info.size-12-BloomSize ,BloomSize),
%% read root position
{ok, <<RootPos:64/unsigned>>} = file:pread(File, FileInfo#file_info.size-8, 8),
{ok, <<BloomSize:32/unsigned>>} = file:pread(File, FileInfo#file_info.size-12, 4),
{ok, BloomData} = file:pread(File, FileInfo#file_info.size-12-BloomSize ,BloomSize),
{ok, Bloom} = ebloom:deserialize(zlib:unzip(BloomData)),
{ok, Bloom} = ebloom:deserialize(zlib:unzip(BloomData)),
%% suck in the root
{ok, Root} = read_node(File, RootPos),
%% suck in the root
{ok, Root} = read_node(File, RootPos),
{ok, #index{file=File, root=Root, bloom=Bloom, name=Name, config=Config}}
end.
serialize(#index{file=File, bloom=undefined }=Index) ->
{ok, Position} = file:position(File, cur),
ok = file:close(File),
{seq_read_file, Index, Position}.
deserialize({seq_read_file, Index, Position}) ->
{ok, #index{file=File}=Index2} = open(Index#index.name, Index#index.config),
{ok, Position} = file:position(File, {bof, Position}),
Index2.
{ok, #index{file=File, root=Root, bloom=Bloom}}.
fold(Fun, Acc0, #index{file=File}) ->
@ -222,7 +242,13 @@ lookup_in_node(File,#node{members=Members},Key) ->
plain_rpc:send_reply(From, Result)
end
end),
plain_rpc:call(PID, read);
try plain_rpc:call(PID, read)
catch
Class:Ex ->
error_logger:error_msg("crashX: ~p:~p ~p~n", [Class,Ex,erlang:get_stacktrace()]),
not_found
end;
not_found ->
not_found
end.

View file

@ -55,7 +55,9 @@
bloom,
block_size = ?NODE_SIZE,
compress = none :: none | snappy | gzip
compress = none :: none | snappy | gzip,
opts = []
}).
@ -80,19 +82,18 @@ close(Ref) ->
init([Name,Options]) ->
Size = proplists:get_value(size, Options, 2048),
WriteBufferSize = hanoi:get_opt(write_buffer_size, Options, 512 * 1024),
% io:format("got name: ~p~n", [Name]),
BlockSize = hanoi:get_opt(block_size, Options, ?NODE_SIZE),
case file:open( hanoi_util:index_file_name(Name),
[raw, exclusive, write, {delayed_write, WriteBufferSize, 2000}]) of
case do_open(Name, Options, [exclusive]) of
{ok, IdxFile} ->
{ok, BloomFilter} = ebloom:new(erlang:min(Size,16#ffffffff), 0.01, 123),
BlockSize = hanoi:get_opt(block_size, Options, ?NODE_SIZE),
{ok, #state{ name=Name,
index_file_pos=0, index_file=IdxFile,
bloom = BloomFilter,
block_size = BlockSize,
compress = hanoi:get_opt(compress, Options, none)
compress = hanoi:get_opt(compress, Options, none),
opts = Options
}};
{error, _}=Error ->
error_logger:error_msg("hanoi_writer cannot open ~p: ~p~n", [Name, Error]),
@ -127,13 +128,30 @@ code_change(_OldVsn, State, _Extra) ->
%%%%% INTERNAL FUNCTIONS
serialize(#state{ bloom=Bloom }=State) ->
erlang:term_to_binary( { State, ebloom:serialize(Bloom) } ).
serialize(#state{ bloom=Bloom, index_file=File }=State) ->
% io:format("serializing ~p @ ~p~n", [State#state.name,
% State#state.index_file_pos]),
%% assert that we're on track
Position = State#state.index_file_pos,
{ok, Position} = file:position(File, cur),
ok = file:close(File),
erlang:term_to_binary( { State#state{ index_file=closed }, ebloom:serialize(Bloom) } ).
deserialize(Binary) ->
{ State, BinBloom } = erlang:binary_to_term( Binary ),
% io:format("deserializing ~p @ ~p~n", [State#state.name,
% State#state.index_file_pos]),
{ok, Bloom } = ebloom:deserialize(BinBloom),
State#state{ bloom = Bloom }.
{ok, IdxFile} = do_open(State#state.name, State#state.opts, []),
State#state{ bloom = Bloom, index_file=IdxFile }.
do_open(Name, Options, OpenOpts) ->
WriteBufferSize = hanoi:get_opt(write_buffer_size, Options, 512 * 1024),
file:open( hanoi_util:index_file_name(Name),
[raw, append, {delayed_write, WriteBufferSize, 2000} | OpenOpts]).
% @doc flush pending nodes and write trailer