Fix file rollover problems
This commit is contained in:
parent
549963545f
commit
1e0bb4c404
3 changed files with 106 additions and 51 deletions
|
@ -71,7 +71,7 @@
|
||||||
code_change/3
|
code_change/3
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-define(TICK, 30*1000). %% XXX FIXME Should be something like 5 seconds
|
-define(TICK, 5*1000).
|
||||||
-define(TICK_THRESHOLD, 5). %% After this + 1 more quiescent ticks, shutdown
|
-define(TICK_THRESHOLD, 5). %% After this + 1 more quiescent ticks, shutdown
|
||||||
-define(TIMEOUT, 10*1000).
|
-define(TIMEOUT, 10*1000).
|
||||||
-define(TOO_MANY_ERRORS_RATIO, 50).
|
-define(TOO_MANY_ERRORS_RATIO, 50).
|
||||||
|
@ -91,6 +91,7 @@
|
||||||
csum_table :: machi_csum_table:table(),
|
csum_table :: machi_csum_table:table(),
|
||||||
eof_position = 0 :: non_neg_integer(),
|
eof_position = 0 :: non_neg_integer(),
|
||||||
max_file_size = ?DEFAULT_MAX_FILE_SIZE :: pos_integer(),
|
max_file_size = ?DEFAULT_MAX_FILE_SIZE :: pos_integer(),
|
||||||
|
rollover = false :: boolean(),
|
||||||
tref :: reference(), %% timer ref
|
tref :: reference(), %% timer ref
|
||||||
ticks = 0 :: non_neg_integer(), %% ticks elapsed with no new operations
|
ticks = 0 :: non_neg_integer(), %% ticks elapsed with no new operations
|
||||||
ops = 0 :: non_neg_integer(), %% sum of all ops
|
ops = 0 :: non_neg_integer(), %% sum of all ops
|
||||||
|
@ -449,11 +450,23 @@ handle_cast(Cast, State) ->
|
||||||
{noreply, State}.
|
{noreply, State}.
|
||||||
|
|
||||||
% @private
|
% @private
|
||||||
handle_info(tick, State = #state{eof_position = Eof,
|
handle_info(tick, State = #state{fluname = FluName,
|
||||||
|
filename = F,
|
||||||
|
eof_position = Eof,
|
||||||
max_file_size = MaxFileSize}) when Eof >= MaxFileSize ->
|
max_file_size = MaxFileSize}) when Eof >= MaxFileSize ->
|
||||||
lager:notice("Eof position ~p >= max file size ~p. Shutting down.",
|
%% Older code halted here with {stop, file_rollover, State}.
|
||||||
[Eof, MaxFileSize]),
|
%% However, there may be other requests in our mailbox already
|
||||||
{stop, file_rollover, State};
|
%% and/or not yet delivered but in a race with the
|
||||||
|
%% machi_flu_metadata_mgr. So we close our eleveldb instance (to
|
||||||
|
%% avoid double-open attempt by a new file proxy proc), tell
|
||||||
|
%% machi_flu_metadata_mgr that we request a rollover, then stop.
|
||||||
|
%% terminate() will take care of forwarding messages that are
|
||||||
|
%% caught in the race.
|
||||||
|
lager:notice("Eof ~s position ~p >= max file size ~p. Shutting down.",
|
||||||
|
[F, Eof, MaxFileSize]),
|
||||||
|
State2 = close_files(State),
|
||||||
|
machi_flu_metadata_mgr:stop_proxy_pid_rollover(FluName, {file, F}),
|
||||||
|
{stop, normal, State2#state{rollover = true}};
|
||||||
|
|
||||||
%% XXX Is this a good idea? Need to think this through a bit.
|
%% XXX Is this a good idea? Need to think this through a bit.
|
||||||
handle_info(tick, State = #state{wedged = true}) ->
|
handle_info(tick, State = #state{wedged = true}) ->
|
||||||
|
@ -526,9 +539,9 @@ handle_info(Req, State) ->
|
||||||
{noreply, State}.
|
{noreply, State}.
|
||||||
|
|
||||||
% @private
|
% @private
|
||||||
terminate(Reason, #state{filename = F,
|
terminate(Reason, State = #state{fluname = FluName,
|
||||||
data_filehandle = FHd,
|
filename = F,
|
||||||
csum_table = T,
|
rollover = Rollover_p,
|
||||||
reads = {RT, RE},
|
reads = {RT, RE},
|
||||||
writes = {WT, WE},
|
writes = {WT, WE},
|
||||||
appends = {AT, AE}
|
appends = {AT, AE}
|
||||||
|
@ -538,18 +551,11 @@ terminate(Reason, #state{filename = F,
|
||||||
lager:info(" Reads: ~p/~p", [RT, RE]),
|
lager:info(" Reads: ~p/~p", [RT, RE]),
|
||||||
lager:info(" Writes: ~p/~p", [WT, WE]),
|
lager:info(" Writes: ~p/~p", [WT, WE]),
|
||||||
lager:info("Appends: ~p/~p", [AT, AE]),
|
lager:info("Appends: ~p/~p", [AT, AE]),
|
||||||
case FHd of
|
close_files(State),
|
||||||
undefined ->
|
if Rollover_p ->
|
||||||
noop; %% file deleted
|
forward_late_messages(FluName, F, 500);
|
||||||
_ ->
|
true ->
|
||||||
ok = file:sync(FHd),
|
ok
|
||||||
ok = file:close(FHd)
|
|
||||||
end,
|
|
||||||
case T of
|
|
||||||
undefined ->
|
|
||||||
noop; %% file deleted
|
|
||||||
_ ->
|
|
||||||
ok = machi_csum_table:close(T)
|
|
||||||
end,
|
end,
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
@ -867,3 +873,36 @@ maybe_gc(Reply, S = #state{fluname=FluName,
|
||||||
false ->
|
false ->
|
||||||
{reply, Reply, S}
|
{reply, Reply, S}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
close_files(State = #state{data_filehandle = FHd,
|
||||||
|
csum_table = T}) ->
|
||||||
|
case FHd of
|
||||||
|
undefined ->
|
||||||
|
noop; %% file deleted
|
||||||
|
_ ->
|
||||||
|
ok = file:sync(FHd),
|
||||||
|
ok = file:close(FHd)
|
||||||
|
end,
|
||||||
|
case T of
|
||||||
|
undefined ->
|
||||||
|
noop; %% file deleted
|
||||||
|
_ ->
|
||||||
|
ok = machi_csum_table:close(T)
|
||||||
|
end,
|
||||||
|
State#state{data_filehandle = undefined, csum_table = undefined}.
|
||||||
|
|
||||||
|
forward_late_messages(FluName, F, Timeout) ->
|
||||||
|
receive
|
||||||
|
M ->
|
||||||
|
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}) of
|
||||||
|
{ok, Pid} ->
|
||||||
|
Pid ! M;
|
||||||
|
{error, trimmed} ->
|
||||||
|
lager:error("TODO: FLU ~p file ~p reports trimmed status "
|
||||||
|
"when forwarding ~P\n",
|
||||||
|
[FluName, F, M, 20])
|
||||||
|
end,
|
||||||
|
forward_late_messages(FluName, F, Timeout)
|
||||||
|
after Timeout ->
|
||||||
|
ok
|
||||||
|
end.
|
||||||
|
|
|
@ -157,8 +157,9 @@ handle_call({find_filename, _FluName, EpochId, NSInfo, Prefix}, _From, S = #stat
|
||||||
File = increment_and_cache_filename(Tid, DataDir, NSInfo, Prefix),
|
File = increment_and_cache_filename(Tid, DataDir, NSInfo, Prefix),
|
||||||
{reply, {file, File}, S#state{epoch = EpochId}};
|
{reply, {file, File}, S#state{epoch = EpochId}};
|
||||||
|
|
||||||
handle_call({increment_sequence, #ns_info{name=NS, locator=NSLocator}, Prefix}, _From, S = #state{ datadir = DataDir }) ->
|
handle_call({increment_sequence, #ns_info{name=NS, locator=NSLocator}, Prefix}, _From, S = #state{ datadir = DataDir, tid=Tid }) ->
|
||||||
ok = machi_util:increment_max_filenum(DataDir, NS, NSLocator, Prefix),
|
NSInfo = #ns_info{name=NS, locator=NSLocator},
|
||||||
|
_File = increment_and_cache_filename(Tid, DataDir, NSInfo, Prefix),
|
||||||
{reply, ok, S};
|
{reply, ok, S};
|
||||||
handle_call({list_files, Prefix}, From, S = #state{ datadir = DataDir }) ->
|
handle_call({list_files, Prefix}, From, S = #state{ datadir = DataDir }) ->
|
||||||
spawn(fun() ->
|
spawn(fun() ->
|
||||||
|
|
|
@ -63,6 +63,7 @@
|
||||||
lookup_proxy_pid/2,
|
lookup_proxy_pid/2,
|
||||||
start_proxy_pid/2,
|
start_proxy_pid/2,
|
||||||
stop_proxy_pid/2,
|
stop_proxy_pid/2,
|
||||||
|
stop_proxy_pid_rollover/2,
|
||||||
build_metadata_mgr_name/2,
|
build_metadata_mgr_name/2,
|
||||||
trim_file/2
|
trim_file/2
|
||||||
]).
|
]).
|
||||||
|
@ -100,7 +101,10 @@ start_proxy_pid(FluName, {file, Filename}) ->
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {start_proxy_pid, Filename}, ?TIMEOUT).
|
gen_server:call(get_manager_atom(FluName, Filename), {start_proxy_pid, Filename}, ?TIMEOUT).
|
||||||
|
|
||||||
stop_proxy_pid(FluName, {file, Filename}) ->
|
stop_proxy_pid(FluName, {file, Filename}) ->
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, Filename}, ?TIMEOUT).
|
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, false, Filename}, ?TIMEOUT).
|
||||||
|
|
||||||
|
stop_proxy_pid_rollover(FluName, {file, Filename}) ->
|
||||||
|
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, true, Filename}, ?TIMEOUT).
|
||||||
|
|
||||||
trim_file(FluName, {file, Filename}) ->
|
trim_file(FluName, {file, Filename}) ->
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {trim_file, Filename}, ?TIMEOUT).
|
gen_server:call(get_manager_atom(FluName, Filename), {trim_file, Filename}, ?TIMEOUT).
|
||||||
|
@ -151,7 +155,7 @@ handle_call({start_proxy_pid, Filename}, _From,
|
||||||
{reply, {error, trimmed}, State}
|
{reply, {error, trimmed}, State}
|
||||||
end;
|
end;
|
||||||
|
|
||||||
handle_call({stop_proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
|
handle_call({stop_proxy_pid, Rollover_p, Filename}, _From, State = #state{ tid = Tid }) ->
|
||||||
case lookup_md(Tid, Filename) of
|
case lookup_md(Tid, Filename) of
|
||||||
not_found ->
|
not_found ->
|
||||||
ok;
|
ok;
|
||||||
|
@ -159,8 +163,13 @@ handle_call({stop_proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
|
||||||
ok;
|
ok;
|
||||||
#md{ proxy_pid = Pid, mref = M } = R ->
|
#md{ proxy_pid = Pid, mref = M } = R ->
|
||||||
demonitor(M, [flush]),
|
demonitor(M, [flush]),
|
||||||
|
if Rollover_p ->
|
||||||
|
do_rollover(Filename, State);
|
||||||
|
true ->
|
||||||
machi_file_proxy:stop(Pid),
|
machi_file_proxy:stop(Pid),
|
||||||
update_ets(Tid, R#md{ proxy_pid = undefined, mref = undefined })
|
update_ets(Tid, R#md{ proxy_pid = undefined,
|
||||||
|
mref = undefined })
|
||||||
|
end
|
||||||
end,
|
end,
|
||||||
{reply, ok, State};
|
{reply, ok, State};
|
||||||
|
|
||||||
|
@ -182,27 +191,6 @@ handle_info({'DOWN', Mref, process, Pid, normal}, State = #state{ tid = Tid }) -
|
||||||
clear_ets(Tid, Mref),
|
clear_ets(Tid, Mref),
|
||||||
{noreply, State};
|
{noreply, State};
|
||||||
|
|
||||||
handle_info({'DOWN', Mref, process, Pid, file_rollover}, State = #state{ fluname = FluName,
|
|
||||||
tid = Tid }) ->
|
|
||||||
lager:info("file proxy ~p shutdown because of file rollover", [Pid]),
|
|
||||||
R = get_md_record_by_mref(Tid, Mref),
|
|
||||||
{Prefix, NS, NSLocator, _, _} =
|
|
||||||
machi_util:parse_filename(R#md.filename),
|
|
||||||
|
|
||||||
%% We only increment the counter here. The filename will be generated on the
|
|
||||||
%% next append request to that prefix and since the filename will have a new
|
|
||||||
%% sequence number it probably will be associated with a different metadata
|
|
||||||
%% manager. That's why we don't want to generate a new file name immediately
|
|
||||||
%% and use it to start a new file proxy.
|
|
||||||
NSInfo = #ns_info{name=NS, locator=NSLocator},
|
|
||||||
ok = machi_flu_filename_mgr:increment_prefix_sequence(FluName, NSInfo, {prefix, Prefix}),
|
|
||||||
|
|
||||||
%% purge our ets table of this entry completely since it is likely the
|
|
||||||
%% new filename (whenever it comes) will be in a different manager than
|
|
||||||
%% us.
|
|
||||||
purge_ets(Tid, R),
|
|
||||||
{noreply, State};
|
|
||||||
|
|
||||||
handle_info({'DOWN', Mref, process, Pid, wedged}, State = #state{ tid = Tid }) ->
|
handle_info({'DOWN', Mref, process, Pid, wedged}, State = #state{ tid = Tid }) ->
|
||||||
lager:error("file proxy ~p shutdown because it's wedged", [Pid]),
|
lager:error("file proxy ~p shutdown because it's wedged", [Pid]),
|
||||||
clear_ets(Tid, Mref),
|
clear_ets(Tid, Mref),
|
||||||
|
@ -275,8 +263,35 @@ get_md_record_by_mref(Tid, Mref) ->
|
||||||
[R] = ets:match_object(Tid, {md, '_', '_', Mref}),
|
[R] = ets:match_object(Tid, {md, '_', '_', Mref}),
|
||||||
R.
|
R.
|
||||||
|
|
||||||
|
get_md_record_by_filename(Tid, Filename) ->
|
||||||
|
[R] = ets:lookup(Tid, Filename),
|
||||||
|
R.
|
||||||
|
|
||||||
get_env(Setting, Default) ->
|
get_env(Setting, Default) ->
|
||||||
case application:get_env(machi, Setting) of
|
case application:get_env(machi, Setting) of
|
||||||
undefined -> Default;
|
undefined -> Default;
|
||||||
{ok, V} -> V
|
{ok, V} -> V
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
do_rollover(Filename, _State = #state{ fluname = FluName,
|
||||||
|
tid = Tid }) ->
|
||||||
|
R = get_md_record_by_filename(Tid, Filename),
|
||||||
|
lager:info("file ~p proxy ~p shutdown because of file rollover",
|
||||||
|
[Filename, R#md.proxy_pid]),
|
||||||
|
{Prefix, NS, NSLocator, _, _} =
|
||||||
|
machi_util:parse_filename(R#md.filename),
|
||||||
|
|
||||||
|
%% We only increment the counter here. The filename will be generated on the
|
||||||
|
%% next append request to that prefix and since the filename will have a new
|
||||||
|
%% sequence number it probably will be associated with a different metadata
|
||||||
|
%% manager. That's why we don't want to generate a new file name immediately
|
||||||
|
%% and use it to start a new file proxy.
|
||||||
|
NSInfo = #ns_info{name=NS, locator=NSLocator},
|
||||||
|
lager:warning("INCR: ~p ~p\n", [FluName, Prefix]),
|
||||||
|
ok = machi_flu_filename_mgr:increment_prefix_sequence(FluName, NSInfo, {prefix, Prefix}),
|
||||||
|
|
||||||
|
%% purge our ets table of this entry completely since it is likely the
|
||||||
|
%% new filename (whenever it comes) will be in a different manager than
|
||||||
|
%% us.
|
||||||
|
purge_ets(Tid, R),
|
||||||
|
ok.
|
||||||
|
|
Loading…
Reference in a new issue