WIP: bugfix in transition, just-in-case commit
This commit is contained in:
parent
9d4b4b1df6
commit
54b5014446
1 changed files with 61 additions and 13 deletions
|
@ -748,9 +748,9 @@ rank_projections(Projs, CurrentProj) ->
|
||||||
|
|
||||||
rank_projection(#projection_v1{upi=[]}, _MemberRank, _N) ->
|
rank_projection(#projection_v1{upi=[]}, _MemberRank, _N) ->
|
||||||
-100;
|
-100;
|
||||||
rank_projection(#projection_v1{author_server=Author,
|
rank_projection(#projection_v1{author_server=_Author,
|
||||||
upi=UPI_list,
|
upi=UPI_list,
|
||||||
repairing=Repairing_list}, MemberRank, N) ->
|
repairing=Repairing_list}, _MemberRank, N) ->
|
||||||
%% It's possible that there's "cross-talk" across projection
|
%% It's possible that there's "cross-talk" across projection
|
||||||
%% stores. For example, we were a chain of [a,b], then the
|
%% stores. For example, we were a chain of [a,b], then the
|
||||||
%% administrator sets a's members_dict to include only a.
|
%% administrator sets a's members_dict to include only a.
|
||||||
|
@ -1054,6 +1054,34 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
|
||||||
%% guarantee (yet?) that the [c,d] chain will be the UPI basis
|
%% guarantee (yet?) that the [c,d] chain will be the UPI basis
|
||||||
%% for repairs when the partition is healed: the quickest author
|
%% for repairs when the partition is healed: the quickest author
|
||||||
%% after the healing will make that choice for everyone.
|
%% after the healing will make that choice for everyone.
|
||||||
|
%%
|
||||||
|
%% 2015-07-06: Ha! This works, almost all of the time. But there
|
||||||
|
%% is a bug.
|
||||||
|
%%
|
||||||
|
%% The bug: if a repair has finished near the time that we fall
|
||||||
|
%% out of flapping mode and back to normal (one of the reasons
|
||||||
|
%% that we are here), then it's possible to have a situation like
|
||||||
|
%% this:
|
||||||
|
%% outer: {epoch,4638},{author,c},{upi,[e,c]},{repair,[d,a,b]}
|
||||||
|
%% inner: {epoch,4539},{author,e},{upi,[e,c,d]},{repair,[]}
|
||||||
|
%%
|
||||||
|
%% Code prior to today would simply use the inner projection and
|
||||||
|
%% only keep the outer's epoch number. However, if we do that,
|
||||||
|
%% then C100 will fail a sanity check: author e cannot add d to
|
||||||
|
%% the end of the UPI, only C is allowed to do that.
|
||||||
|
%%
|
||||||
|
%% After checking all 5 participants, they all agree with the
|
||||||
|
%% outer and inner shown above. But all 5 fail their C100
|
||||||
|
%% transition safety check, and so all 5 spin in an infinite loop,
|
||||||
|
%% cool!
|
||||||
|
%%
|
||||||
|
%% Fix for today: Send a signal (through a new func arg) to C100
|
||||||
|
%% that we're moving from inner to outer. If the
|
||||||
|
%% 'expected_author2' error is the only sanity check that fails at
|
||||||
|
%% C100, then that's OK, because: 1. We've lost track of the
|
||||||
|
%% author, so we can't satisfy the check 100% of the time. (We
|
||||||
|
%% have the option of picking the
|
||||||
|
|
||||||
%%
|
%%
|
||||||
%% TODO: Perhaps that quickest author should consult all of the
|
%% TODO: Perhaps that quickest author should consult all of the
|
||||||
%% other private stores, check their inner, and if there is a
|
%% other private stores, check their inner, and if there is a
|
||||||
|
@ -1063,12 +1091,19 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
|
||||||
%% in the down list, quite odd! Go investigate that.
|
%% in the down list, quite odd! Go investigate that.
|
||||||
|
|
||||||
P_inner2A = inner_projection_or_self(P_current),
|
P_inner2A = inner_projection_or_self(P_current),
|
||||||
|
ResetEpoch = P_newprop10#projection_v1.epoch_number,
|
||||||
|
ResetAuthor = P_current#projection_v1.author_server,
|
||||||
|
ClauseInfo2 = [{old_author, P_inner2A#projection_v1.author_server},
|
||||||
|
{reset_author, ResetAuthor},
|
||||||
|
{reset_epoch, ResetEpoch}],
|
||||||
P_inner2B =
|
P_inner2B =
|
||||||
P_inner2A#projection_v1{epoch_number=
|
P_inner2A#projection_v1{epoch_number=ResetEpoch,
|
||||||
P_newprop10#projection_v1.epoch_number,
|
author_server=ResetAuthor,
|
||||||
dbg=ClauseInfo},
|
dbg=ClauseInfo++ClauseInfo2},
|
||||||
react_to_env_C100(P_inner2B, P_latest, S);
|
ReactI = [{inner2b,machi_projection:make_summary(P_inner2B)}],
|
||||||
|
?REACT({a30, ?LINE, ReactI}),
|
||||||
|
io:format(user, "HEE30 ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse(get(react))]), timer:sleep(100),
|
||||||
|
react_to_env_C100(P_inner2B, P_latest, reset_loop_maybe, S);
|
||||||
true ->
|
true ->
|
||||||
?REACT({a30, ?LINE, []}),
|
?REACT({a30, ?LINE, []}),
|
||||||
react_to_env_A40(Retries, P_newprop10, P_latest,
|
react_to_env_A40(Retries, P_newprop10, P_latest,
|
||||||
|
@ -1246,7 +1281,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
]}),
|
]}),
|
||||||
put(b10_hack, false),
|
put(b10_hack, false),
|
||||||
|
|
||||||
react_to_env_C100(P_newprop, P_latest, S);
|
react_to_env_C100(P_newprop, P_latest, undefined, S);
|
||||||
|
|
||||||
P_newprop_flap_count >= FlapLimit ->
|
P_newprop_flap_count >= FlapLimit ->
|
||||||
%% I am flapping ... what else do I do?
|
%% I am flapping ... what else do I do?
|
||||||
|
@ -1339,13 +1374,10 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
react_to_env_C300(P_newprop, P_latest, S)
|
react_to_env_C300(P_newprop, P_latest, S)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
react_to_env_C100(P_newprop, P_latest,
|
react_to_env_C100(P_newprop, P_latest, PerhapsReset,
|
||||||
#ch_mgr{name=MyName, proj=P_current}=S) ->
|
#ch_mgr{name=MyName, proj=P_current}=S) ->
|
||||||
?REACT(c100),
|
?REACT(c100),
|
||||||
|
|
||||||
I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi),
|
|
||||||
I_am_Repairing_in_latest_p = lists:member(MyName,
|
|
||||||
P_latest#projection_v1.repairing),
|
|
||||||
Sane = projection_transition_is_sane(P_current, P_latest, MyName),
|
Sane = projection_transition_is_sane(P_current, P_latest, MyName),
|
||||||
%% put(xxx_hack, [{p_current, machi_projection:make_summary(P_current)},
|
%% put(xxx_hack, [{p_current, machi_projection:make_summary(P_current)},
|
||||||
%% {epoch_compare, P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number},
|
%% {epoch_compare, P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number},
|
||||||
|
@ -1356,11 +1388,28 @@ react_to_env_C100(P_newprop, P_latest,
|
||||||
_ when P_current#projection_v1.epoch_number == 0 ->
|
_ when P_current#projection_v1.epoch_number == 0 ->
|
||||||
%% Epoch == 0 is reserved for first-time, just booting conditions.
|
%% Epoch == 0 is reserved for first-time, just booting conditions.
|
||||||
?REACT({c100, ?LINE, [first_write]}),
|
?REACT({c100, ?LINE, [first_write]}),
|
||||||
|
erase(perhaps_reset_loop),
|
||||||
react_to_env_C110(P_latest, S);
|
react_to_env_C110(P_latest, S);
|
||||||
true ->
|
true ->
|
||||||
?REACT({c100, ?LINE, [sane]}),
|
?REACT({c100, ?LINE, [sane]}),
|
||||||
|
erase(perhaps_reset_loop),
|
||||||
react_to_env_C110(P_latest, S);
|
react_to_env_C110(P_latest, S);
|
||||||
_AnyOtherReturnValue ->
|
_AnyOtherReturnValue ->
|
||||||
|
io:format(user, "RESETLOOP: ~p ~w ~P", [MyName, get(perhaps_reset_loop), get(react), 70]),
|
||||||
|
if PerhapsReset == reset_loop_maybe ->
|
||||||
|
case get(perhaps_reset_loop) of
|
||||||
|
undefined ->
|
||||||
|
put(perhaps_reset_loop, 1);
|
||||||
|
X when X > 10 ->
|
||||||
|
Msg = lists:flatten(
|
||||||
|
io_lib:format("~P", [get(react), 200])),
|
||||||
|
exit({not_supposed_to_happen, ?MODULE, ?LINE, Msg});
|
||||||
|
X ->
|
||||||
|
put(perhaps_reset_loop, X+1)
|
||||||
|
end;
|
||||||
|
PerhapsReset == undefined ->
|
||||||
|
ok
|
||||||
|
end,
|
||||||
%% P_latest is not sane.
|
%% P_latest is not sane.
|
||||||
%% By process of elimination, P_newprop is best,
|
%% By process of elimination, P_newprop is best,
|
||||||
%% so let's write it.
|
%% so let's write it.
|
||||||
|
@ -1389,7 +1438,6 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) ->
|
||||||
{_,_,C} = os:timestamp(),
|
{_,_,C} = os:timestamp(),
|
||||||
MSec = trunc(C / 1000),
|
MSec = trunc(C / 1000),
|
||||||
{HH,MM,SS} = time(),
|
{HH,MM,SS} = time(),
|
||||||
io:format(user, "HEE120 ~w ~w ~P\n", [S#ch_mgr.name, self(), get(react), 150]),
|
|
||||||
case inner_projection_exists(P_latest2) of
|
case inner_projection_exists(P_latest2) of
|
||||||
false ->
|
false ->
|
||||||
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
||||||
|
|
Loading…
Reference in a new issue