WIP: chain mgr clutter, trying to debug infinite loop
This commit is contained in:
parent
fd1b4363b9
commit
9404e954e7
1 changed files with 79 additions and 27 deletions
|
@ -135,8 +135,9 @@ init({MyName, All_list, Seed, OldThreshold, NoPartitionThreshold, MyFLUPid}) ->
|
||||||
|
|
||||||
handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
|
handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
|
||||||
{reply, not_initialized, S};
|
{reply, not_initialized, S};
|
||||||
handle_call({calculate_projection_internal_old}, _From, S) ->
|
handle_call({calculate_projection_internal_old}, _From, #ch_mgr{myflu=MyFLU}=S) ->
|
||||||
{Reply, S2} = calc_projection(S, [{author_proc, call}]),
|
RelativeToServer = MyFLU,
|
||||||
|
{Reply, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
|
||||||
{reply, Reply, S2};
|
{reply, Reply, S2};
|
||||||
handle_call({test_write_proposed_projection}, _From, S) ->
|
handle_call({test_write_proposed_projection}, _From, S) ->
|
||||||
if S#ch_mgr.proj_proposed == none ->
|
if S#ch_mgr.proj_proposed == none ->
|
||||||
|
@ -149,8 +150,9 @@ handle_call({ping}, _From, S) ->
|
||||||
{reply, pong, S};
|
{reply, pong, S};
|
||||||
handle_call({stop}, _From, S) ->
|
handle_call({stop}, _From, S) ->
|
||||||
{stop, normal, ok, S};
|
{stop, normal, ok, S};
|
||||||
handle_call({test_calc_projection, KeepRunenvP}, _From, S) ->
|
handle_call({test_calc_projection, KeepRunenvP}, _From, #ch_mgr{myflu=MyFLU}=S) ->
|
||||||
{P, S2} = calc_projection(S, [{author_proc, call}]),
|
RelativeToServer = MyFLU,
|
||||||
|
{P, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
|
||||||
{reply, {ok, P}, if KeepRunenvP -> S2;
|
{reply, {ok, P}, if KeepRunenvP -> S2;
|
||||||
true -> S
|
true -> S
|
||||||
end};
|
end};
|
||||||
|
@ -174,8 +176,9 @@ handle_call(_Call, _From, S) ->
|
||||||
|
|
||||||
handle_cast(_Cast, #ch_mgr{init_finished=false} = S) ->
|
handle_cast(_Cast, #ch_mgr{init_finished=false} = S) ->
|
||||||
{noreply, S};
|
{noreply, S};
|
||||||
handle_cast({test_calc_proposed_projection}, S) ->
|
handle_cast({test_calc_proposed_projection}, #ch_mgr{myflu=MyFLU}=S) ->
|
||||||
{Proj, S2} = calc_projection(S, [{author_proc, cast}]),
|
RelativeToServer = MyFLU,
|
||||||
|
{Proj, S2} = calc_projection(S, RelativeToServer, [{author_proc, cast}]),
|
||||||
%% ?Dw({?LINE,make_projection_summary(Proj)}),
|
%% ?Dw({?LINE,make_projection_summary(Proj)}),
|
||||||
{noreply, S2#ch_mgr{proj_proposed=Proj}};
|
{noreply, S2#ch_mgr{proj_proposed=Proj}};
|
||||||
handle_cast(_Cast, S) ->
|
handle_cast(_Cast, S) ->
|
||||||
|
@ -376,17 +379,19 @@ update_projection_checksum(P) ->
|
||||||
update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
|
update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
|
||||||
P#projection{dbg2=Dbg2}.
|
P#projection{dbg2=Dbg2}.
|
||||||
|
|
||||||
calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, Dbg) ->
|
calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, RelativeToServer,
|
||||||
|
Dbg) ->
|
||||||
OldThreshold = proplists:get_value(old_threshold, RunEnv),
|
OldThreshold = proplists:get_value(old_threshold, RunEnv),
|
||||||
NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv),
|
NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv),
|
||||||
calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg, S).
|
calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
|
||||||
|
RelativeToServer, Dbg, S).
|
||||||
|
|
||||||
%% OldThreshold: Percent chance of using the old/previous network partition list
|
%% OldThreshold: Percent chance of using the old/previous network partition list
|
||||||
%% NoPartitionThreshold: If the network partition changes, what are the odds
|
%% NoPartitionThreshold: If the network partition changes, what are the odds
|
||||||
%% that there are no partitions at all?
|
%% that there are no partitions at all?
|
||||||
|
|
||||||
calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
|
calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
|
||||||
#ch_mgr{name=MyName, runenv=RunEnv1} = S) ->
|
RelativeToServer, Dbg, #ch_mgr{name=MyName,runenv=RunEnv1}=S) ->
|
||||||
#projection{epoch_number=OldEpochNum,
|
#projection{epoch_number=OldEpochNum,
|
||||||
all_members=All_list,
|
all_members=All_list,
|
||||||
upi=OldUPI_list,
|
upi=OldUPI_list,
|
||||||
|
@ -397,9 +402,15 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
|
||||||
{Up, _, RunEnv2} = calc_up_nodes(MyName, OldThreshold, NoPartitionThreshold,
|
{Up, _, RunEnv2} = calc_up_nodes(MyName, OldThreshold, NoPartitionThreshold,
|
||||||
AllMembers, RunEnv1),
|
AllMembers, RunEnv1),
|
||||||
|
|
||||||
%% Assumption: MyName is a local FLU and is always up & available.
|
NewUp = Up -- LastUp,
|
||||||
NewUp = lists:usort([MyName] ++ (Up -- LastUp)),
|
Down = AllMembers -- Up,
|
||||||
Down = lists:usort((AllMembers -- Up) -- [MyName]),
|
|
||||||
|
%% HRRRRRRRmmm buggy icky ... causes things like:
|
||||||
|
%% ...,to,[{epoch,8},{author,a},{upi,[a,b]},{repair,[a]}, oops!
|
||||||
|
|
||||||
|
%% %% Assumption: MyName is a local FLU and is always up & available.
|
||||||
|
%% NewUp = lists:usort([MyName] ++ (Up -- LastUp)),
|
||||||
|
%% Down = lists:usort((AllMembers -- Up) -- [MyName]),
|
||||||
|
|
||||||
NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)],
|
NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)],
|
||||||
Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)],
|
Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)],
|
||||||
|
@ -409,7 +420,9 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
|
||||||
{NewUPI_list, [], RunEnv2};
|
{NewUPI_list, [], RunEnv2};
|
||||||
{[], [H|T]} ->
|
{[], [H|T]} ->
|
||||||
{Prob, RunEnvX} = roll_dice(100, RunEnv2),
|
{Prob, RunEnvX} = roll_dice(100, RunEnv2),
|
||||||
if Prob =< 50 ->
|
if Prob =< 50 andalso (NewUPI_list == []
|
||||||
|
orelse
|
||||||
|
(RelativeToServer == hd(NewUPI_list))) ->
|
||||||
{NewUPI_list ++ [H], T, RunEnvX};
|
{NewUPI_list ++ [H], T, RunEnvX};
|
||||||
true ->
|
true ->
|
||||||
{NewUPI_list, OldRepairing_list, RunEnvX}
|
{NewUPI_list, OldRepairing_list, RunEnvX}
|
||||||
|
@ -422,9 +435,22 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
|
||||||
NewUp -> Repairing_list3 ++ NewUp
|
NewUp -> Repairing_list3 ++ NewUp
|
||||||
end,
|
end,
|
||||||
Repairing_list5 = Repairing_list4 -- Down,
|
Repairing_list5 = Repairing_list4 -- Down,
|
||||||
|
|
||||||
|
TentativeUPI = NewUPI_list3,
|
||||||
|
TentativeRepairing = Repairing_list5,
|
||||||
|
|
||||||
|
{NewUPI, NewRepairing} =
|
||||||
|
if TentativeUPI == [] andalso TentativeRepairing /= [] ->
|
||||||
|
[FirstRepairing|TailRepairing] = TentativeRepairing,
|
||||||
|
{[FirstRepairing], TailRepairing};
|
||||||
|
true ->
|
||||||
|
{TentativeUPI, TentativeRepairing}
|
||||||
|
end,
|
||||||
|
|
||||||
P = make_projection(OldEpochNum + 1,
|
P = make_projection(OldEpochNum + 1,
|
||||||
MyName, All_list, Down, NewUPI_list3, Repairing_list5,
|
MyName, All_list, Down, NewUPI, NewRepairing,
|
||||||
Dbg ++ [{nodes_up, Up}]),
|
Dbg ++ [{nodes_up, Up}]),
|
||||||
|
if P#projection.upi == [] -> io:format(user, "old: ~w\n", [make_projection_summary(LastProj)]), io:format(user, "new: ~w\n", [make_projection_summary(P)]), exit(zoozoo); true -> ok end,
|
||||||
{P, S#ch_mgr{runenv=RunEnv3}}.
|
{P, S#ch_mgr{runenv=RunEnv3}}.
|
||||||
|
|
||||||
calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
|
calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
|
||||||
|
@ -526,9 +552,11 @@ do_react_to_env(S) ->
|
||||||
react_to_env_A10(S) ->
|
react_to_env_A10(S) ->
|
||||||
react_to_env_A20(0, S).
|
react_to_env_A20(0, S).
|
||||||
|
|
||||||
react_to_env_A20(Retries, S) ->
|
react_to_env_A20(Retries, #ch_mgr{myflu=MyFLU} = S) ->
|
||||||
%% io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),
|
%% io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),
|
||||||
{P_newprop, S2} = calc_projection(S, [{author_proc, react}]),
|
RelativeToServer = MyFLU,
|
||||||
|
{P_newprop, S2} = calc_projection(S, RelativeToServer,
|
||||||
|
[{author_proc, react}]),
|
||||||
%% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end,
|
%% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end,
|
||||||
react_to_env_A30(Retries, P_newprop, S2).
|
react_to_env_A30(Retries, P_newprop, S2).
|
||||||
|
|
||||||
|
@ -555,6 +583,10 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
P_latest#projection.epoch_number > P_current#projection.epoch_number
|
P_latest#projection.epoch_number > P_current#projection.epoch_number
|
||||||
orelse
|
orelse
|
||||||
not LatestUnanimousP ->
|
not LatestUnanimousP ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
|
?D(P_latest#projection.epoch_number),
|
||||||
|
?D(P_current#projection.epoch_number),
|
||||||
|
?D(LatestUnanimousP),
|
||||||
|
|
||||||
%% 1st clause: someone else has written a newer projection
|
%% 1st clause: someone else has written a newer projection
|
||||||
%% 2nd clause: a network partition has healed, revealing a
|
%% 2nd clause: a network partition has healed, revealing a
|
||||||
|
@ -565,6 +597,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
P_latest#projection.epoch_number < P_current#projection.epoch_number
|
P_latest#projection.epoch_number < P_current#projection.epoch_number
|
||||||
orelse
|
orelse
|
||||||
P_latest /= P_current ->
|
P_latest /= P_current ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
|
|
||||||
%% Both of these cases are rare. Elsewhere, the code
|
%% Both of these cases are rare. Elsewhere, the code
|
||||||
%% assumes that the local FLU's projection store is always
|
%% assumes that the local FLU's projection store is always
|
||||||
|
@ -589,6 +622,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
|
|
||||||
%% A40a (see flowchart)
|
%% A40a (see flowchart)
|
||||||
Rank_newprop > Rank_latest ->
|
Rank_newprop > Rank_latest ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
react_to_env_C300(P_newprop, P_latest, S);
|
react_to_env_C300(P_newprop, P_latest, S);
|
||||||
|
|
||||||
%% A40b (see flowchart)
|
%% A40b (see flowchart)
|
||||||
|
@ -597,10 +631,12 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
(P_newprop#projection.upi /= P_latest#projection.upi
|
(P_newprop#projection.upi /= P_latest#projection.upi
|
||||||
orelse
|
orelse
|
||||||
P_newprop#projection.repairing /= P_latest#projection.repairing) ->
|
P_newprop#projection.repairing /= P_latest#projection.repairing) ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
react_to_env_C300(P_newprop, P_latest, S);
|
react_to_env_C300(P_newprop, P_latest, S);
|
||||||
|
|
||||||
%% A40c (see flowchart)
|
%% A40c (see flowchart)
|
||||||
LatestAuthorDownP ->
|
LatestAuthorDownP ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
|
|
||||||
%% TODO: I believe that membership in the
|
%% TODO: I believe that membership in the
|
||||||
%% P_newprop#projection.down is not sufficient for long
|
%% P_newprop#projection.down is not sufficient for long
|
||||||
|
@ -624,6 +660,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
react_to_env_C300(P_newprop, P_latest, S);
|
react_to_env_C300(P_newprop, P_latest, S);
|
||||||
|
|
||||||
true ->
|
true ->
|
||||||
|
?D({a40,?LINE}),
|
||||||
{{no_change, P_latest#projection.epoch_number}, S}
|
{{no_change, P_latest#projection.epoch_number}, S}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -631,9 +668,11 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) ->
|
Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) ->
|
||||||
if
|
if
|
||||||
LatestUnanimousP ->
|
LatestUnanimousP ->
|
||||||
|
?D({b10, ?LINE}),
|
||||||
react_to_env_C100(P_newprop, P_latest, S);
|
react_to_env_C100(P_newprop, P_latest, S);
|
||||||
|
|
||||||
Retries > 2 ->
|
Retries > 2 ->
|
||||||
|
?D({b10, ?LINE}),
|
||||||
|
|
||||||
%% The author of P_latest is too slow or crashed.
|
%% The author of P_latest is too slow or crashed.
|
||||||
%% Let's try to write P_newprop and see what happens!
|
%% Let's try to write P_newprop and see what happens!
|
||||||
|
@ -642,6 +681,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
Rank_latest >= Rank_newprop
|
Rank_latest >= Rank_newprop
|
||||||
andalso
|
andalso
|
||||||
P_latest#projection.author_server /= MyName ->
|
P_latest#projection.author_server /= MyName ->
|
||||||
|
?D({b10, ?LINE}),
|
||||||
|
|
||||||
%% Give the author of P_latest an opportunite to write a
|
%% Give the author of P_latest an opportunite to write a
|
||||||
%% new projection in a new epoch to resolve this mixed
|
%% new projection in a new epoch to resolve this mixed
|
||||||
|
@ -649,6 +689,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||||
react_to_env_C200(Retries, P_latest, S);
|
react_to_env_C200(Retries, P_latest, S);
|
||||||
|
|
||||||
true ->
|
true ->
|
||||||
|
?D({b10, ?LINE}),
|
||||||
|
|
||||||
%% P_newprop is best, so let's write it.
|
%% P_newprop is best, so let's write it.
|
||||||
react_to_env_C300(P_newprop, P_latest, S)
|
react_to_env_C300(P_newprop, P_latest, S)
|
||||||
|
@ -658,11 +699,18 @@ react_to_env_C100(P_newprop, P_latest,
|
||||||
#ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
|
#ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
|
||||||
case projection_transition_is_sane(P_current, P_latest, MyFLU) of
|
case projection_transition_is_sane(P_current, P_latest, MyFLU) of
|
||||||
true ->
|
true ->
|
||||||
|
?D({c100, ?LINE}),
|
||||||
react_to_env_C110(P_latest, S);
|
react_to_env_C110(P_latest, S);
|
||||||
_AnyOtherReturnValue ->
|
_AnyOtherReturnValue ->
|
||||||
%% P_latest is known to be crap.
|
?D({c100, ?LINE, _AnyOtherReturnValue}), timer:sleep(50),
|
||||||
%% By process of elimination, P_newprop is best,
|
%% %% P_latest is known to be crap.
|
||||||
%% so let's write it.
|
%% %% By process of elimination, P_newprop is best,
|
||||||
|
%% %% so let's write it.
|
||||||
|
%% MaxEpoch = erlang:max(P_newprop#projection.epoch_number,
|
||||||
|
%% P_latest#projection.epoch_number) + 1,
|
||||||
|
%% P_newprop2 = update_projection_checksum(
|
||||||
|
%% P_newprop#projection{epoch_number=MaxEpoch}),
|
||||||
|
%% react_to_env_C300(P_newprop2, P_latest, S)
|
||||||
react_to_env_C300(P_newprop, P_latest, S)
|
react_to_env_C300(P_newprop, P_latest, S)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -701,16 +749,19 @@ react_to_env_C220(Retries, S) ->
|
||||||
react_to_env_A20(Retries + 1, S).
|
react_to_env_A20(Retries + 1, S).
|
||||||
|
|
||||||
react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop,
|
react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop,
|
||||||
#projection{epoch_number=Epoch_latest}=_P_latest, S) ->
|
#projection{epoch_number=_Epoch_latest}=_P_latest, S) ->
|
||||||
NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
|
%% NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
|
||||||
P_newprop2 = P_newprop#projection{epoch_number=NewEpoch},
|
%% P_newprop2 = P_newprop#projection{epoch_number=NewEpoch},
|
||||||
|
%% Let's return to the old epoch thingie and see what happens.........
|
||||||
|
Epoch = Epoch_newprop,
|
||||||
|
P_newprop2 = P_newprop#projection{epoch_number=Epoch + 1},
|
||||||
react_to_env_C310(update_projection_checksum(P_newprop2), S).
|
react_to_env_C310(update_projection_checksum(P_newprop2), S).
|
||||||
|
|
||||||
react_to_env_C310(P_newprop, S) ->
|
react_to_env_C310(P_newprop, S) ->
|
||||||
Epoch = P_newprop#projection.epoch_number,
|
Epoch = P_newprop#projection.epoch_number,
|
||||||
{_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
|
{_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
|
||||||
MyFLU=S#ch_mgr.myflu, ?D({c310, MyFLU, Epoch, _Res}), timer:sleep(200),
|
%% MyFLU=S#ch_mgr.myflu, ?D({c310, MyFLU, Epoch, _Res}), timer:sleep(200),
|
||||||
MPS = mps(P_newprop), ?D(MPS),
|
%% MPS = mps(P_newprop), ?D(MPS),
|
||||||
|
|
||||||
react_to_env_A10(S2).
|
react_to_env_A10(S2).
|
||||||
|
|
||||||
|
@ -811,7 +862,7 @@ projection_transition_is_sane(
|
||||||
%% both, then those authors would not have allowed
|
%% both, then those authors would not have allowed
|
||||||
%% a bad transition, so we will assume this
|
%% a bad transition, so we will assume this
|
||||||
%% transition is OK.
|
%% transition is OK.
|
||||||
?D(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa),
|
%% ?D(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa), timer:sleep(200),
|
||||||
lists:member(AuthorServer1, UPI_list1)
|
lists:member(AuthorServer1, UPI_list1)
|
||||||
andalso
|
andalso
|
||||||
lists:member(AuthorServer2, UPI_list2)
|
lists:member(AuthorServer2, UPI_list2)
|
||||||
|
@ -1054,7 +1105,7 @@ zoof_test() ->
|
||||||
erlang:yield(),
|
erlang:yield(),
|
||||||
Res = test_react_to_env(MMM),
|
Res = test_react_to_env(MMM),
|
||||||
Res=Res %% ?D({self(), Res})
|
Res=Res %% ?D({self(), Res})
|
||||||
end || _ <- lists:seq(1,20)],
|
end || _ <- lists:seq(1,10)],
|
||||||
Parent ! done
|
Parent ! done
|
||||||
end) || MMM <- [Ma, Mb, Mc] ],
|
end) || MMM <- [Ma, Mb, Mc] ],
|
||||||
[receive
|
[receive
|
||||||
|
@ -1068,6 +1119,7 @@ zoof_test() ->
|
||||||
DoIt(),
|
DoIt(),
|
||||||
[test_reset_thresholds(M, 0, 100) || M <- [Ma, Mb, Mc]],
|
[test_reset_thresholds(M, 0, 100) || M <- [Ma, Mb, Mc]],
|
||||||
DoIt(),
|
DoIt(),
|
||||||
|
DoIt(),
|
||||||
|
|
||||||
%% [begin
|
%% [begin
|
||||||
%% La = machi_flu0:proj_list_all(FLU, Type),
|
%% La = machi_flu0:proj_list_all(FLU, Type),
|
||||||
|
|
Loading…
Reference in a new issue