Fix a state transition bug (chain manager infinite loop, oops)
%% We have a small problem for state transition sanity checking in the %% case where we are flapping *and* a repair has finished. One of the %% sanity checks in simple_chain_state_transition_is_sane(() is that %% the author of P2 in this case must be the tail of P1's UPI: i.e., %% it's the tail's responsibility to perform repair, therefore the tail %% must damn well be the author of any transition that says a repair %% finished successfully. %% %% The problem is that author_server of the inner projection does not %% reflect the actual author! See the comment with the text %% "The inner projection will have a fake author" in %react_to_env_A30(). %% %% So, there's a special return value that tells us to try to check for %% the correct authorship here.
This commit is contained in:
parent
920c0fc610
commit
8ee3377fa7
1 changed files with 61 additions and 20 deletions
|
@ -91,7 +91,7 @@
|
||||||
-define(REPAIR_START_STABILITY_TIME, 10).
|
-define(REPAIR_START_STABILITY_TIME, 10).
|
||||||
-endif. % TEST
|
-endif. % TEST
|
||||||
|
|
||||||
-define(RETURN2(X), begin put(why2, [?LINE|get(why2)]), X end).
|
-define(RETURN2(X), begin (catch put(why2, [?LINE|get(why2)])), X end).
|
||||||
|
|
||||||
%% API
|
%% API
|
||||||
-export([start_link/2, start_link/3, stop/1, ping/1,
|
-export([start_link/2, start_link/3, stop/1, ping/1,
|
||||||
|
@ -878,6 +878,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
|
||||||
{P_newprop1, S2} = calc_projection(S, MyName),
|
{P_newprop1, S2} = calc_projection(S, MyName),
|
||||||
?REACT({a30, ?LINE, [{current, machi_projection:make_summary(S#ch_mgr.proj)}]}),
|
?REACT({a30, ?LINE, [{current, machi_projection:make_summary(S#ch_mgr.proj)}]}),
|
||||||
?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}),
|
?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}),
|
||||||
|
?REACT({a30, ?LINE, [{latest, machi_projection:make_summary(P_latest)}]}),
|
||||||
|
|
||||||
%% Are we flapping yet?
|
%% Are we flapping yet?
|
||||||
{P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2),
|
{P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2),
|
||||||
|
@ -958,7 +959,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
|
||||||
InnerInfo = [{inner_summary,
|
InnerInfo = [{inner_summary,
|
||||||
machi_projection:make_summary(P_inner2)}],
|
machi_projection:make_summary(P_inner2)}],
|
||||||
DbgX = replace(P_newprop3#projection_v1.dbg, InnerInfo),
|
DbgX = replace(P_newprop3#projection_v1.dbg, InnerInfo),
|
||||||
?REACT({a30, ?LINE, [qqqwww|DbgX]}),
|
?REACT({a30, ?LINE, DbgX}),
|
||||||
{P_newprop3#projection_v1{dbg=DbgX,
|
{P_newprop3#projection_v1{dbg=DbgX,
|
||||||
inner=P_inner2}, S_i};
|
inner=P_inner2}, S_i};
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -1359,7 +1360,7 @@ react_to_env_C100(P_newprop, P_latest,
|
||||||
%% P_latest is not sane.
|
%% P_latest is not sane.
|
||||||
%% By process of elimination, P_newprop is best,
|
%% By process of elimination, P_newprop is best,
|
||||||
%% so let's write it.
|
%% so let's write it.
|
||||||
?REACT({c100, ?LINE, [not_sane]}),
|
?REACT({c100, ?LINE, [not_sane, get(why2), _AnyOtherReturnValue]}),
|
||||||
react_to_env_C300(P_newprop, P_latest, S)
|
react_to_env_C300(P_newprop, P_latest, S)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -1632,6 +1633,7 @@ projection_transition_is_sane(P1, P2, RelativeToServer) ->
|
||||||
projection_transition_is_sane(P1, P2, RelativeToServer, false).
|
projection_transition_is_sane(P1, P2, RelativeToServer, false).
|
||||||
|
|
||||||
projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) ->
|
projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) ->
|
||||||
|
put(why2, []),
|
||||||
case projection_transition_is_sane_with_si_epoch(
|
case projection_transition_is_sane_with_si_epoch(
|
||||||
P1, P2, RelativeToServer, RetrospectiveP) of
|
P1, P2, RelativeToServer, RetrospectiveP) of
|
||||||
true ->
|
true ->
|
||||||
|
@ -1641,19 +1643,55 @@ projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) ->
|
||||||
Inner1 = inner_projection_or_self(P1),
|
Inner1 = inner_projection_or_self(P1),
|
||||||
Inner2 = inner_projection_or_self(P2),
|
Inner2 = inner_projection_or_self(P2),
|
||||||
if HasInner1 andalso HasInner2 ->
|
if HasInner1 andalso HasInner2 ->
|
||||||
|
%% In case of inner->inner transition, we must allow
|
||||||
|
%% the epoch number to remain constant. Thus, we
|
||||||
|
%% call the function that does not check for a
|
||||||
|
%% strictly-increasing epoch.
|
||||||
|
?RETURN2(
|
||||||
|
projection_transition_is_sane_final_review(P1, P2,
|
||||||
projection_transition_is_sane_except_si_epoch(
|
projection_transition_is_sane_except_si_epoch(
|
||||||
Inner1, Inner2, RelativeToServer, RetrospectiveP);
|
Inner1, Inner2, RelativeToServer, RetrospectiveP)));
|
||||||
true ->
|
true ->
|
||||||
|
?RETURN2(
|
||||||
|
projection_transition_is_sane_final_review(P1, P2,
|
||||||
projection_transition_is_sane_with_si_epoch(
|
projection_transition_is_sane_with_si_epoch(
|
||||||
Inner1, Inner2, RelativeToServer, RetrospectiveP)
|
Inner1, Inner2, RelativeToServer, RetrospectiveP)))
|
||||||
end;
|
end;
|
||||||
true ->
|
true ->
|
||||||
true
|
?RETURN2(true)
|
||||||
end;
|
end;
|
||||||
Else ->
|
Else ->
|
||||||
Else
|
?RETURN2(Else)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
projection_transition_is_sane_final_review(_P1, P2,
|
||||||
|
{expected_author2,UPI1_tail}=Else) ->
|
||||||
|
%% Reminder: P1 & P2 are outer projections
|
||||||
|
%%
|
||||||
|
%% We have a small problem for state transition sanity checking in the
|
||||||
|
%% case where we are flapping *and* a repair has finished. One of the
|
||||||
|
%% sanity checks in simple_chain_state_transition_is_sane(() is that
|
||||||
|
%% the author of P2 in this case must be the tail of P1's UPI: i.e.,
|
||||||
|
%% it's the tail's responsibility to perform repair, therefore the tail
|
||||||
|
%% must damn well be the author of any transition that says a repair
|
||||||
|
%% finished successfully.
|
||||||
|
%%
|
||||||
|
%% The problem is that author_server of the inner projection does not
|
||||||
|
%% reflect the actual author! See the comment with the text
|
||||||
|
%% "The inner projection will have a fake author" in react_to_env_A30().
|
||||||
|
%%
|
||||||
|
%% So, there's a special return value that tells us to try to check for
|
||||||
|
%% the correct authorship here.
|
||||||
|
|
||||||
|
if UPI1_tail == P2#projection_v1.author_server ->
|
||||||
|
io:format(user, "\nset new projection by author ~w is OK!!!!\n", [UPI1_tail]),
|
||||||
|
?RETURN2(true);
|
||||||
|
true ->
|
||||||
|
?RETURN2(Else)
|
||||||
|
end;
|
||||||
|
projection_transition_is_sane_final_review(_P1, _P2, Else) ->
|
||||||
|
?RETURN2(Else).
|
||||||
|
|
||||||
projection_transition_is_sane_with_si_epoch(
|
projection_transition_is_sane_with_si_epoch(
|
||||||
#projection_v1{epoch_number=Epoch1} = P1,
|
#projection_v1{epoch_number=Epoch1} = P1,
|
||||||
#projection_v1{epoch_number=Epoch2} = P2,
|
#projection_v1{epoch_number=Epoch2} = P2,
|
||||||
|
@ -1662,9 +1700,9 @@ projection_transition_is_sane_with_si_epoch(
|
||||||
P1, P2, RelativeToServer, RetrospectiveP) of
|
P1, P2, RelativeToServer, RetrospectiveP) of
|
||||||
true ->
|
true ->
|
||||||
%% Must be a strictly increasing epoch.
|
%% Must be a strictly increasing epoch.
|
||||||
Epoch2 > Epoch1;
|
?RETURN2(Epoch2 > Epoch1);
|
||||||
Else ->
|
Else ->
|
||||||
Else
|
?RETURN2(Else)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
projection_transition_is_sane_except_si_epoch(
|
projection_transition_is_sane_except_si_epoch(
|
||||||
|
@ -1687,8 +1725,8 @@ projection_transition_is_sane_except_si_epoch(
|
||||||
repairing=Repairing_list2,
|
repairing=Repairing_list2,
|
||||||
dbg=Dbg2} = P2,
|
dbg=Dbg2} = P2,
|
||||||
RelativeToServer, __TODO_RetrospectiveP) ->
|
RelativeToServer, __TODO_RetrospectiveP) ->
|
||||||
|
?RETURN2(undefined),
|
||||||
try
|
try
|
||||||
put(why2, []),
|
|
||||||
%% General notes:
|
%% General notes:
|
||||||
%%
|
%%
|
||||||
%% I'm making no attempt to be "efficient" here. All of these data
|
%% I'm making no attempt to be "efficient" here. All of these data
|
||||||
|
@ -1729,6 +1767,9 @@ projection_transition_is_sane_except_si_epoch(
|
||||||
true = sets:is_disjoint(DownS2, RepairingS2),
|
true = sets:is_disjoint(DownS2, RepairingS2),
|
||||||
true = sets:is_disjoint(UPIS2, RepairingS2),
|
true = sets:is_disjoint(UPIS2, RepairingS2),
|
||||||
|
|
||||||
|
%% Hooray, all basic properties of the projection's elements are
|
||||||
|
%% not obviously bad. Now let's check if the UPI+Repairing->UPI
|
||||||
|
%% transition is good.
|
||||||
?RETURN2(
|
?RETURN2(
|
||||||
chain_state_transition_is_sane(AuthorServer1, UPI_list1, Repairing_list1,
|
chain_state_transition_is_sane(AuthorServer1, UPI_list1, Repairing_list1,
|
||||||
AuthorServer2, UPI_list2))
|
AuthorServer2, UPI_list2))
|
||||||
|
@ -1974,11 +2015,10 @@ remember_partition_hack(FLU) ->
|
||||||
%% {[keep,del],[error]} %% bad transition: 'bogus' not in Repair1'''
|
%% {[keep,del],[error]} %% bad transition: 'bogus' not in Repair1'''
|
||||||
|
|
||||||
simple_chain_state_transition_is_sane(UPI1, Repair1, UPI2) ->
|
simple_chain_state_transition_is_sane(UPI1, Repair1, UPI2) ->
|
||||||
simple_chain_state_transition_is_sane(undefined, UPI1, Repair1,
|
?RETURN2(simple_chain_state_transition_is_sane(undefined, UPI1, Repair1,
|
||||||
undefined, UPI2).
|
undefined, UPI2)).
|
||||||
|
|
||||||
simple_chain_state_transition_is_sane(_Author1, UPI1, Repair1, Author2, UPI2) ->
|
simple_chain_state_transition_is_sane(_Author1, UPI1, Repair1, Author2, UPI2) ->
|
||||||
put(why2, []),
|
|
||||||
{KeepsDels, Orders} = mk(UPI1, Repair1, UPI2),
|
{KeepsDels, Orders} = mk(UPI1, Repair1, UPI2),
|
||||||
NumKeeps = length([x || keep <- KeepsDels]),
|
NumKeeps = length([x || keep <- KeepsDels]),
|
||||||
NumOrders = length(Orders),
|
NumOrders = length(Orders),
|
||||||
|
@ -2003,8 +2043,8 @@ simple_chain_state_transition_is_sane(_Author1, UPI1, Repair1, Author2, UPI2) ->
|
||||||
case catch(lists:last(UPI1)) of
|
case catch(lists:last(UPI1)) of
|
||||||
UPI1_tail when UPI1_tail == Author2 ->
|
UPI1_tail when UPI1_tail == Author2 ->
|
||||||
?RETURN2(true);
|
?RETURN2(true);
|
||||||
_ ->
|
UPI1_tail ->
|
||||||
?RETURN2(false)
|
?RETURN2({expected_author2,UPI1_tail})
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
@ -2029,8 +2069,9 @@ chain_state_transition_is_sane(Author1, UPI1, Repair1, Author2, UPI2) ->
|
||||||
%% always optimal for highest availability).
|
%% always optimal for highest availability).
|
||||||
?RETURN2(true);
|
?RETURN2(true);
|
||||||
true ->
|
true ->
|
||||||
|
?RETURN2(
|
||||||
simple_chain_state_transition_is_sane(Author1, UPI1, Repair1,
|
simple_chain_state_transition_is_sane(Author1, UPI1, Repair1,
|
||||||
Author2, UPI2)
|
Author2, UPI2))
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Create a 2-tuple that describes how `UPI1' + `Repair1' are
|
%% @doc Create a 2-tuple that describes how `UPI1' + `Repair1' are
|
||||||
|
|
Loading…
Reference in a new issue