Implement merge hibernation for tail scan

When scanning just one file (because all it's keys
are after the ones in the other file), we also
can need hibernation to save memory.  Especially
the bloom filters being built take a lot of mem.
This commit is contained in:
Kresten Krab Thorup 2012-04-30 21:28:33 +02:00
parent 6ce7101506
commit 74686b1380

View file

@ -155,6 +155,16 @@ scan(BT1, BT2, Out, IsLastLevel, [{Key1,Value1}|AT]=AKVs, [{Key2,Value2}|BT]=BKV
scan(BT1, BT2, Out2, IsLastLevel, AT, BT, Count+1, step(Step, 2)) scan(BT1, BT2, Out2, IsLastLevel, AT, BT, Count+1, step(Step, 2))
end. end.
hibernate_scan_only(Keep) ->
erlang:garbage_collect(),
receive
{step, From, HowMany} ->
{BT, OutBin, IsLastLevel, KVs, Count, N} = erlang:binary_to_term( zlib:gunzip( Keep ) ),
scan_only(BT, hanoi_writer:deserialize(OutBin), IsLastLevel, KVs, Count, {N+HowMany, From})
end.
scan_only(BT, Out, IsLastLevel, KVs, Count, {N, FromPID}) when N < 1, KVs =/= [] -> scan_only(BT, Out, IsLastLevel, KVs, Count, {N, FromPID}) when N < 1, KVs =/= [] ->
case FromPID of case FromPID of
none -> none ->
@ -166,6 +176,10 @@ scan_only(BT, Out, IsLastLevel, KVs, Count, {N, FromPID}) when N < 1, KVs =/= []
receive receive
{step, From, HowMany} -> {step, From, HowMany} ->
scan_only(BT, Out, IsLastLevel, KVs, Count, {N+HowMany, From}) scan_only(BT, Out, IsLastLevel, KVs, Count, {N+HowMany, From})
after 10000 ->
Args = {BT, hanoi_writer:serialize(Out), IsLastLevel, KVs, Count, N},
Keep = zlib:gzip ( erlang:term_to_binary( Args ) ),
hibernate_scan_only(Keep);
end; end;
scan_only(BT, Out, IsLastLevel, [], Count, {_, FromPID}=Step) -> scan_only(BT, Out, IsLastLevel, [], Count, {_, FromPID}=Step) ->