Fix repairs when everyone is in stable flapping state
This commit is contained in:
parent
2b2facaba2
commit
0414da783a
1 changed files with 94 additions and 30 deletions
|
@ -690,15 +690,18 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg,
|
||||||
NewUPI_list =
|
NewUPI_list =
|
||||||
[X || X <- OldUPI_list, lists:member(X, Up) andalso
|
[X || X <- OldUPI_list, lists:member(X, Up) andalso
|
||||||
not lists:member(X, OldWitness_list)],
|
not lists:member(X, OldWitness_list)],
|
||||||
#projection_v1{upi=CurrentUPI_list} = CurrentProj,
|
%% If we are not flapping (AllHosed /= [], which is a good enough proxy),
|
||||||
LastInCurrentUPI = case CurrentUPI_list of
|
%% then we do our repair checks based on the inner projection only. There
|
||||||
[] -> does_not_exist_because_upi_is_empty;
|
%% is no value in doing repairs during flapping.
|
||||||
[_|_] -> lists:last(CurrentUPI_list)
|
RepChk_Proj = if AllHosed == [] ->
|
||||||
end,
|
CurrentProj;
|
||||||
LastInNewUPI = case NewUPI_list of
|
true ->
|
||||||
[] -> does_not_exist_because_upi_is_empty;
|
inner_projection_or_self(CurrentProj)
|
||||||
[_|_] -> lists:last(NewUPI_list)
|
|
||||||
end,
|
end,
|
||||||
|
RepChk_LastInUPI = case RepChk_Proj#projection_v1.upi of
|
||||||
|
[] -> does_not_exist_because_upi_is_empty;
|
||||||
|
[_|_] -> lists:last(RepChk_Proj#projection_v1.upi)
|
||||||
|
end,
|
||||||
Repairing_list2 = [X || X <- OldRepairing_list,
|
Repairing_list2 = [X || X <- OldRepairing_list,
|
||||||
lists:member(X, Up),
|
lists:member(X, Up),
|
||||||
not lists:member(X, OldWitness_list)],
|
not lists:member(X, OldWitness_list)],
|
||||||
|
@ -709,7 +712,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg,
|
||||||
{[], []} ->
|
{[], []} ->
|
||||||
D_foo=[d_foo1],
|
D_foo=[d_foo1],
|
||||||
{NewUPI_list, [], RunEnv2};
|
{NewUPI_list, [], RunEnv2};
|
||||||
{[], [H|T]} when RelativeToServer == LastInNewUPI ->
|
{[], [H|T]} when RelativeToServer == RepChk_LastInUPI ->
|
||||||
%% The author is tail of the UPI list. Let's see if
|
%% The author is tail of the UPI list. Let's see if
|
||||||
%% *everyone* in the UPI+repairing lists are using our
|
%% *everyone* in the UPI+repairing lists are using our
|
||||||
%% projection. This is to simulate a requirement that repair
|
%% projection. This is to simulate a requirement that repair
|
||||||
|
@ -719,9 +722,9 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg,
|
||||||
%% TODO create a real API call for fetching this info?
|
%% TODO create a real API call for fetching this info?
|
||||||
SameEpoch_p = check_latest_private_projections_same_epoch(
|
SameEpoch_p = check_latest_private_projections_same_epoch(
|
||||||
NewUPI_list ++ Repairing_list2,
|
NewUPI_list ++ Repairing_list2,
|
||||||
S#ch_mgr.proj, Partitions, S),
|
RepChk_Proj, Partitions, S),
|
||||||
if Simulator_p andalso SimRepair_p andalso
|
if Simulator_p andalso SimRepair_p andalso
|
||||||
SameEpoch_p andalso RelativeToServer == LastInCurrentUPI ->
|
SameEpoch_p andalso RelativeToServer == RepChk_LastInUPI ->
|
||||||
D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}],
|
D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}],
|
||||||
{NewUPI_list ++ [H], T, RunEnv2};
|
{NewUPI_list ++ [H], T, RunEnv2};
|
||||||
not (Simulator_p andalso SimRepair_p)
|
not (Simulator_p andalso SimRepair_p)
|
||||||
|
@ -817,6 +820,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg,
|
||||||
{P3, S#ch_mgr{runenv=RunEnv3}, Up}.
|
{P3, S#ch_mgr{runenv=RunEnv3}, Up}.
|
||||||
|
|
||||||
check_latest_private_projections_same_epoch(FLUs, MyProj, Partitions, S) ->
|
check_latest_private_projections_same_epoch(FLUs, MyProj, Partitions, S) ->
|
||||||
|
#projection_v1{epoch_number=MyEpoch, epoch_csum=MyCSum} = MyProj,
|
||||||
%% NOTE: The caller must provide us with the FLUs list for all
|
%% NOTE: The caller must provide us with the FLUs list for all
|
||||||
%% FLUs that must be up & available right now. So any
|
%% FLUs that must be up & available right now. So any
|
||||||
%% failure of perhaps_call_t() means that we must return
|
%% failure of perhaps_call_t() means that we must return
|
||||||
|
@ -828,17 +832,17 @@ check_latest_private_projections_same_epoch(FLUs, MyProj, Partitions, S) ->
|
||||||
?FLU_PC:read_latest_projection(Pid, private, ?TO)
|
?FLU_PC:read_latest_projection(Pid, private, ?TO)
|
||||||
end,
|
end,
|
||||||
case perhaps_call_t(S, Partitions, FLU, F) of
|
case perhaps_call_t(S, Partitions, FLU, F) of
|
||||||
{ok, RemotePrivateProj} ->
|
{ok, RPJ} ->
|
||||||
if MyProj#projection_v1.epoch_number ==
|
#projection_v1{epoch_number=RemoteEpoch,
|
||||||
RemotePrivateProj#projection_v1.epoch_number
|
epoch_csum=RemoteCSum} =
|
||||||
andalso
|
inner_projection_or_self(RPJ),
|
||||||
MyProj#projection_v1.epoch_csum ==
|
if MyEpoch == RemoteEpoch,
|
||||||
RemotePrivateProj#projection_v1.epoch_csum ->
|
MyCSum == RemoteCSum ->
|
||||||
true;
|
true;
|
||||||
true ->
|
true ->
|
||||||
false
|
false
|
||||||
end;
|
end;
|
||||||
_ ->
|
_Else ->
|
||||||
false
|
false
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
|
@ -1344,7 +1348,19 @@ a30_make_inner_projection(P_current, P_newprop3, P_latest, Up,
|
||||||
ordsets:from_list(UPI_latest_i ++ Repairing_latest_i)),
|
ordsets:from_list(UPI_latest_i ++ Repairing_latest_i)),
|
||||||
if SameEnough_p ->
|
if SameEnough_p ->
|
||||||
?REACT({a30, ?LINE, []}),
|
?REACT({a30, ?LINE, []}),
|
||||||
P_latest_i;
|
case P_current_has_inner_p andalso
|
||||||
|
(UPI_current_x /= P_i3#projection_v1.upi orelse
|
||||||
|
Repairing_current_x /= P_i3#projection_v1.repairing)
|
||||||
|
of
|
||||||
|
true ->
|
||||||
|
%% Current proj is inner *and* our new
|
||||||
|
%% proposed inner proj differs substantially
|
||||||
|
%% from the current. Don't use latest or
|
||||||
|
%% current.
|
||||||
|
false;
|
||||||
|
false ->
|
||||||
|
P_latest_i
|
||||||
|
end;
|
||||||
CurrentHasInner_and_LatestIsDisjoint_p ->
|
CurrentHasInner_and_LatestIsDisjoint_p ->
|
||||||
?REACT({a30, ?LINE, []}),
|
?REACT({a30, ?LINE, []}),
|
||||||
P_current_ios;
|
P_current_ios;
|
||||||
|
@ -1541,9 +1557,40 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
|
|
||||||
true ->
|
true ->
|
||||||
?REACT({a40, ?LINE, [true]}),
|
?REACT({a40, ?LINE, [true]}),
|
||||||
|
GoTo50_p =
|
||||||
FinalProps = [{throttle_seconds, 0}],
|
case inner_projection_exists(P_current) andalso
|
||||||
react_to_env_A50(P_latest, FinalProps, S)
|
inner_projection_exists(P_newprop) andalso
|
||||||
|
inner_projection_exists(P_latest) of
|
||||||
|
true ->
|
||||||
|
%% All three projections are flapping ... do we have a
|
||||||
|
%% new projection (probably due to repair) that is
|
||||||
|
%% worth suggesting via C300?
|
||||||
|
#projection_v1{epoch_number=Epoch_currenti} =
|
||||||
|
inner_projection_or_self(P_current),
|
||||||
|
#projection_v1{epoch_number=Epoch_newpropi} =
|
||||||
|
inner_projection_or_self(P_newprop),
|
||||||
|
?REACT({a30, ?LINE, [{epoch_currenti,Epoch_currenti},
|
||||||
|
{epoch_newpropi,Epoch_newpropi}]}),
|
||||||
|
if Epoch_currenti > Epoch_newpropi ->
|
||||||
|
%% Inner has a newer epoch, don't go to A50.
|
||||||
|
?REACT({a30, ?LINE, []}),
|
||||||
|
false;
|
||||||
|
true ->
|
||||||
|
?REACT({a30, ?LINE, []}),
|
||||||
|
true
|
||||||
|
end;
|
||||||
|
false ->
|
||||||
|
?REACT({a30, ?LINE, []}),
|
||||||
|
true
|
||||||
|
end,
|
||||||
|
if GoTo50_p ->
|
||||||
|
?REACT({a30, ?LINE, []}),
|
||||||
|
FinalProps = [{throttle_seconds, 0}],
|
||||||
|
react_to_env_A50(P_latest, FinalProps, S);
|
||||||
|
true ->
|
||||||
|
?REACT({a30, ?LINE, []}),
|
||||||
|
react_to_env_C300(P_newprop, P_latest, S)
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
react_to_env_A50(P_latest, FinalProps, #ch_mgr{proj=P_current}=S) ->
|
react_to_env_A50(P_latest, FinalProps, #ch_mgr{proj=P_current}=S) ->
|
||||||
|
@ -1781,7 +1828,7 @@ react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName,
|
||||||
S2 = S#ch_mgr{not_sanes=NotSanesDict, sane_transitions=0},
|
S2 = S#ch_mgr{not_sanes=NotSanesDict, sane_transitions=0},
|
||||||
case orddict:fetch(Author_latest, NotSanesDict) of
|
case orddict:fetch(Author_latest, NotSanesDict) of
|
||||||
N when N > ?TOO_FREQUENT_BREAKER ->
|
N when N > ?TOO_FREQUENT_BREAKER ->
|
||||||
?V("\n\nYOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]),
|
?V("\n\nYOYO ~w breaking the cycle of:\n current: ~w\n new : ~w\n", [MyName, machi_projection:make_summary(S#ch_mgr.proj), machi_projection:make_summary(P_latest)]),
|
||||||
?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}),
|
?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}),
|
||||||
react_to_env_C103(P_latest, S2);
|
react_to_env_C103(P_latest, S2);
|
||||||
N ->
|
N ->
|
||||||
|
@ -1811,6 +1858,7 @@ react_to_env_C103(#projection_v1{epoch_number=Epoch_latest,
|
||||||
?REACT({c103, ?LINE,
|
?REACT({c103, ?LINE,
|
||||||
[{current_epoch, P_current#projection_v1.epoch_number},
|
[{current_epoch, P_current#projection_v1.epoch_number},
|
||||||
{none_projection_epoch, Epoch_latest}]}),
|
{none_projection_epoch, Epoch_latest}]}),
|
||||||
|
timer:sleep(5000), % Let someone else clean up
|
||||||
%% Reset the not_sanes count dictionary here, or else an already
|
%% Reset the not_sanes count dictionary here, or else an already
|
||||||
%% ?TOO_FREQUENT_BREAKER count for an author might prevent a
|
%% ?TOO_FREQUENT_BREAKER count for an author might prevent a
|
||||||
%% transition from C100_inner()->C300, which can lead to infinite
|
%% transition from C100_inner()->C300, which can lead to infinite
|
||||||
|
@ -2154,7 +2202,7 @@ if LeaveFlapping_p -> io:format(user, "CALC_FLAP: ~w: flapping_now ~w start ~w l
|
||||||
if AmFlapping_p ->
|
if AmFlapping_p ->
|
||||||
S2;
|
S2;
|
||||||
true ->
|
true ->
|
||||||
clear_flapping_state(S2)
|
clear_most_flapping_state(S2)
|
||||||
end}.
|
end}.
|
||||||
|
|
||||||
make_flapping_i() ->
|
make_flapping_i() ->
|
||||||
|
@ -2234,7 +2282,7 @@ projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) ->
|
||||||
?RETURN2(Else)
|
?RETURN2(Else)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
projection_transition_is_sane_final_review(_P1, P2,
|
projection_transition_is_sane_final_review(P1, P2,
|
||||||
{expected_author2,UPI1_tail}=Else) ->
|
{expected_author2,UPI1_tail}=Else) ->
|
||||||
%% Reminder: P1 & P2 are outer projections
|
%% Reminder: P1 & P2 are outer projections
|
||||||
%%
|
%%
|
||||||
|
@ -2252,11 +2300,24 @@ projection_transition_is_sane_final_review(_P1, P2,
|
||||||
%%
|
%%
|
||||||
%% So, there's a special return value that tells us to try to check for
|
%% So, there's a special return value that tells us to try to check for
|
||||||
%% the correct authorship here.
|
%% the correct authorship here.
|
||||||
|
P1HasInner_p = inner_projection_exists(P1),
|
||||||
if UPI1_tail == P2#projection_v1.author_server ->
|
P2HasInner_p = inner_projection_exists(P2),
|
||||||
|
P1_LastInnerUPI = case (inner_projection_or_self(P1))#projection_v1.upi of
|
||||||
|
P1InnerUPI=[_|_] when P1HasInner_p ->
|
||||||
|
lists:last(P1InnerUPI);
|
||||||
|
_ ->
|
||||||
|
no_such_author
|
||||||
|
end,
|
||||||
|
if P1HasInner_p, P2HasInner_p ->
|
||||||
|
if UPI1_tail == P1_LastInnerUPI ->
|
||||||
|
?RETURN2(true);
|
||||||
|
true ->
|
||||||
|
?RETURN2(Else)
|
||||||
|
end;
|
||||||
|
UPI1_tail == P2#projection_v1.author_server ->
|
||||||
?RETURN2(true);
|
?RETURN2(true);
|
||||||
true ->
|
true ->
|
||||||
?RETURN2(Else)
|
?RETURN2({gazzuknkgazzuknk, Else, gazzuknk})
|
||||||
end;
|
end;
|
||||||
projection_transition_is_sane_final_review(
|
projection_transition_is_sane_final_review(
|
||||||
#projection_v1{mode=CMode1}=_P1,
|
#projection_v1{mode=CMode1}=_P1,
|
||||||
|
@ -2819,11 +2880,14 @@ all_hosed_history(#projection_v1{epoch_number=_Epoch, flap=Flap},
|
||||||
end.
|
end.
|
||||||
|
|
||||||
clear_flapping_state(S) ->
|
clear_flapping_state(S) ->
|
||||||
|
S2 = clear_most_flapping_state(S),
|
||||||
|
S#ch_mgr{not_sanes=orddict:new()}.
|
||||||
|
|
||||||
|
clear_most_flapping_state(S) ->
|
||||||
S#ch_mgr{flap_count=0,
|
S#ch_mgr{flap_count=0,
|
||||||
flap_start=?NOT_FLAPPING_START,
|
flap_start=?NOT_FLAPPING_START,
|
||||||
%% Do not clear flap_last_up.
|
%% Do not clear flap_last_up.
|
||||||
flap_counts_last=[],
|
flap_counts_last=[]}.
|
||||||
not_sanes=orddict:new()}.
|
|
||||||
|
|
||||||
full_majority_size(N) when is_integer(N) ->
|
full_majority_size(N) when is_integer(N) ->
|
||||||
(N div 2) + 1;
|
(N div 2) + 1;
|
||||||
|
|
Loading…
Reference in a new issue