Improve incremental merge

This change makes incremental merge be concurrent 
with filling up the nursery. So in stead of waiting
for an incremental merge to complete before returning
from insert, it

- blocks waiting for a possible previous incremental merge to complete
- issues a new incremental merge.

This improves put latencies, but not throughput.
This commit is contained in:
Kresten Krab Thorup 2012-04-19 22:33:27 +02:00
parent ee90944c62
commit 6289602045
3 changed files with 42 additions and 24 deletions

View file

@ -25,8 +25,8 @@
-author('Kresten Krab Thorup <krab@trifork.com>'). -author('Kresten Krab Thorup <krab@trifork.com>').
%% smallest levels are 8192 entries %% smallest levels are 128 entries
-define(TOP_LEVEL, 5). -define(TOP_LEVEL, 7).
-define(BTREE_SIZE(Level), (1 bsl (Level))). -define(BTREE_SIZE(Level), (1 bsl (Level))).
-define(TOMBSTONE, 'deleted'). -define(TOMBSTONE, 'deleted').

View file

@ -42,7 +42,7 @@
-behavior(plain_fsm). -behavior(plain_fsm).
-export([data_vsn/0, code_change/3]). -export([data_vsn/0, code_change/3]).
-export([open/3, lookup/2, inject/2, close/1, async_range/3, sync_range/3, step/2]). -export([open/3, lookup/2, inject/2, close/1, async_range/3, sync_range/3, incremental_merge/2]).
-include_lib("kernel/include/file.hrl"). -include_lib("kernel/include/file.hrl").
@ -59,7 +59,7 @@ open(Dir,Level,Next) when Level>0 ->
process_flag(trap_exit,true), process_flag(trap_exit,true),
initialize(#state{dir=Dir,level=Level,next=Next}) initialize(#state{dir=Dir,level=Level,next=Next})
end), end),
step(PID, 2*?BTREE_SIZE(?TOP_LEVEL)), incremental_merge(PID, 2*?BTREE_SIZE(?TOP_LEVEL)),
{ok, PID}. {ok, PID}.
lookup(Ref, Key) -> lookup(Ref, Key) ->
@ -69,8 +69,8 @@ inject(Ref, FileName) ->
Result = call(Ref, {inject, FileName}), Result = call(Ref, {inject, FileName}),
Result. Result.
step(Ref,Howmuch) -> incremental_merge(Ref,HowMuch) ->
call(Ref,{step,Howmuch}). call(Ref, {incremental_merge, HowMuch}).
close(Ref) -> close(Ref) ->
try try
@ -241,11 +241,20 @@ main_loop(State = #state{ next=Next }) ->
reply(From, ok), reply(From, ok),
check_begin_merge_then_loop(setelement(SetPos, State, BT)); check_begin_merge_then_loop(setelement(SetPos, State, BT));
%% replies OK when there is no current step in progress
?REQ(From, {incremental_merge, HowMuch})
when State#state.step_merge_ref == undefined,
State#state.step_next_ref == undefined ->
reply(From, ok),
self() ! ?REQ(undefined, {step, HowMuch}),
main_loop(State);
%% accept step any time there is not an outstanding step %% accept step any time there is not an outstanding step
?REQ(StepFrom, {step, HowMuch}) ?REQ(StepFrom, {step, HowMuch})
when State#state.step_merge_ref == undefined, when State#state.step_merge_ref == undefined,
State#state.step_caller == undefined, State#state.step_caller == undefined,
State#state.step_next_ref == undefined State#state.step_next_ref == undefined
-> ->
%% delegate the step request to the next level %% delegate the step request to the next level
@ -266,8 +275,8 @@ main_loop(State = #state{ next=Next }) ->
if (Next =:= undefined) andalso (MergeRef =:= undefined) -> if (Next =:= undefined) andalso (MergeRef =:= undefined) ->
%% nothing to do ... just return OK %% nothing to do ... just return OK
reply(StepFrom, ok), State2 = reply_step_ok(State#state { step_caller = StepFrom }),
main_loop(State); main_loop(State2);
true -> true ->
main_loop(State#state{ step_next_ref=DelegateRef, main_loop(State#state{ step_next_ref=DelegateRef,
step_caller=StepFrom, step_caller=StepFrom,
@ -280,8 +289,7 @@ main_loop(State = #state{ next=Next }) ->
case State#state.step_next_ref of case State#state.step_next_ref of
undefined -> undefined ->
reply(State#state.step_caller, ok), State2 = reply_step_ok(State);
State2 = State#state{ step_caller=undefined };
_ -> _ ->
State2 = State State2 = State
end, end,
@ -290,10 +298,11 @@ main_loop(State = #state{ next=Next }) ->
{'DOWN', MRef, _, _, _} when MRef == State#state.step_merge_ref -> {'DOWN', MRef, _, _, _} when MRef == State#state.step_merge_ref ->
%% current merge worker died (or just finished)
case State#state.step_next_ref of case State#state.step_next_ref of
undefined -> undefined ->
reply(State#state.step_caller, ok), State2 = reply_step_ok(State);
State2 = State#state{ step_caller=undefined };
_ -> _ ->
State2 = State State2 = State
end, end,
@ -301,11 +310,15 @@ main_loop(State = #state{ next=Next }) ->
main_loop(State2#state{ step_merge_ref=undefined }); main_loop(State2#state{ step_merge_ref=undefined });
?REPLY(MRef, ok) when (MRef =:= State#state.step_next_ref) and ?REPLY(MRef, ok)
(State#state.step_merge_ref =:= undefined) -> when MRef =:= State#state.step_next_ref,
reply(State#state.step_caller, ok), State#state.step_merge_ref =:= undefined ->
main_loop(State#state{ step_next_ref=undefined,
step_caller=undefined }); %% this applies when we receive an OK from the next level,
%% and we have finished the incremental merge at this level
State2 = reply_step_ok(State),
main_loop(State2#state{ step_next_ref=undefined });
?REQ(From, close) -> ?REQ(From, close) ->
close_if_defined(State#state.a), close_if_defined(State#state.a),
@ -508,6 +521,13 @@ main_loop(State = #state{ next=Next }) ->
end. end.
reply_step_ok(State) ->
case State#state.step_caller of
undefined -> ok;
_ -> reply(State#state.step_caller, ok)
end,
State#state{ step_caller=undefined }.
do_lookup(_Key, []) -> do_lookup(_Key, []) ->
not_found; not_found;
do_lookup(_Key, [Pid]) when is_pid(Pid) -> do_lookup(_Key, [Pid]) when is_pid(Pid) ->

View file

@ -164,11 +164,9 @@ finish(#nursery{ dir=Dir, cache=Cache, log_file=LogFile, total_size=_TotalSize,
%% inject the B-Tree (blocking RPC) %% inject the B-Tree (blocking RPC)
ok = lsm_btree_level:inject(TopLevel, BTreeFileName), ok = lsm_btree_level:inject(TopLevel, BTreeFileName),
%% synchroneously do some work if this is a top-level inject %% issue some work if this is a top-level inject (blocks until previous such
%% incremental merge is finished).
% error_logger:info_msg("doing step ~p~n", [?BTREE_SIZE(?TOP_LEVEL)]), lsm_btree_level:incremental_merge(TopLevel, ?BTREE_SIZE(?TOP_LEVEL)),
lsm_btree_level:step(TopLevel, ?BTREE_SIZE(?TOP_LEVEL)),
ok; ok;
_ -> _ ->