WIP: chain mgr clutter, trying to debug infinite loop

This commit is contained in:
Scott Lystig Fritchie 2014-11-03 12:06:04 +09:00
parent fd1b4363b9
commit 9404e954e7

View file

@ -135,8 +135,9 @@ init({MyName, All_list, Seed, OldThreshold, NoPartitionThreshold, MyFLUPid}) ->
handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) -> handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
{reply, not_initialized, S}; {reply, not_initialized, S};
handle_call({calculate_projection_internal_old}, _From, S) -> handle_call({calculate_projection_internal_old}, _From, #ch_mgr{myflu=MyFLU}=S) ->
{Reply, S2} = calc_projection(S, [{author_proc, call}]), RelativeToServer = MyFLU,
{Reply, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
{reply, Reply, S2}; {reply, Reply, S2};
handle_call({test_write_proposed_projection}, _From, S) -> handle_call({test_write_proposed_projection}, _From, S) ->
if S#ch_mgr.proj_proposed == none -> if S#ch_mgr.proj_proposed == none ->
@ -149,8 +150,9 @@ handle_call({ping}, _From, S) ->
{reply, pong, S}; {reply, pong, S};
handle_call({stop}, _From, S) -> handle_call({stop}, _From, S) ->
{stop, normal, ok, S}; {stop, normal, ok, S};
handle_call({test_calc_projection, KeepRunenvP}, _From, S) -> handle_call({test_calc_projection, KeepRunenvP}, _From, #ch_mgr{myflu=MyFLU}=S) ->
{P, S2} = calc_projection(S, [{author_proc, call}]), RelativeToServer = MyFLU,
{P, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
{reply, {ok, P}, if KeepRunenvP -> S2; {reply, {ok, P}, if KeepRunenvP -> S2;
true -> S true -> S
end}; end};
@ -174,8 +176,9 @@ handle_call(_Call, _From, S) ->
handle_cast(_Cast, #ch_mgr{init_finished=false} = S) -> handle_cast(_Cast, #ch_mgr{init_finished=false} = S) ->
{noreply, S}; {noreply, S};
handle_cast({test_calc_proposed_projection}, S) -> handle_cast({test_calc_proposed_projection}, #ch_mgr{myflu=MyFLU}=S) ->
{Proj, S2} = calc_projection(S, [{author_proc, cast}]), RelativeToServer = MyFLU,
{Proj, S2} = calc_projection(S, RelativeToServer, [{author_proc, cast}]),
%% ?Dw({?LINE,make_projection_summary(Proj)}), %% ?Dw({?LINE,make_projection_summary(Proj)}),
{noreply, S2#ch_mgr{proj_proposed=Proj}}; {noreply, S2#ch_mgr{proj_proposed=Proj}};
handle_cast(_Cast, S) -> handle_cast(_Cast, S) ->
@ -376,17 +379,19 @@ update_projection_checksum(P) ->
update_projection_dbg2(P, Dbg2) when is_list(Dbg2) -> update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
P#projection{dbg2=Dbg2}. P#projection{dbg2=Dbg2}.
calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, Dbg) -> calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, RelativeToServer,
Dbg) ->
OldThreshold = proplists:get_value(old_threshold, RunEnv), OldThreshold = proplists:get_value(old_threshold, RunEnv),
NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv), NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv),
calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg, S). calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
RelativeToServer, Dbg, S).
%% OldThreshold: Percent chance of using the old/previous network partition list %% OldThreshold: Percent chance of using the old/previous network partition list
%% NoPartitionThreshold: If the network partition changes, what are the odds %% NoPartitionThreshold: If the network partition changes, what are the odds
%% that there are no partitions at all? %% that there are no partitions at all?
calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg, calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
#ch_mgr{name=MyName, runenv=RunEnv1} = S) -> RelativeToServer, Dbg, #ch_mgr{name=MyName,runenv=RunEnv1}=S) ->
#projection{epoch_number=OldEpochNum, #projection{epoch_number=OldEpochNum,
all_members=All_list, all_members=All_list,
upi=OldUPI_list, upi=OldUPI_list,
@ -397,9 +402,15 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
{Up, _, RunEnv2} = calc_up_nodes(MyName, OldThreshold, NoPartitionThreshold, {Up, _, RunEnv2} = calc_up_nodes(MyName, OldThreshold, NoPartitionThreshold,
AllMembers, RunEnv1), AllMembers, RunEnv1),
%% Assumption: MyName is a local FLU and is always up & available. NewUp = Up -- LastUp,
NewUp = lists:usort([MyName] ++ (Up -- LastUp)), Down = AllMembers -- Up,
Down = lists:usort((AllMembers -- Up) -- [MyName]),
%% HRRRRRRRmmm buggy icky ... causes things like:
%% ...,to,[{epoch,8},{author,a},{upi,[a,b]},{repair,[a]}, oops!
%% %% Assumption: MyName is a local FLU and is always up & available.
%% NewUp = lists:usort([MyName] ++ (Up -- LastUp)),
%% Down = lists:usort((AllMembers -- Up) -- [MyName]),
NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)], NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)],
Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)], Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)],
@ -409,7 +420,9 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
{NewUPI_list, [], RunEnv2}; {NewUPI_list, [], RunEnv2};
{[], [H|T]} -> {[], [H|T]} ->
{Prob, RunEnvX} = roll_dice(100, RunEnv2), {Prob, RunEnvX} = roll_dice(100, RunEnv2),
if Prob =< 50 -> if Prob =< 50 andalso (NewUPI_list == []
orelse
(RelativeToServer == hd(NewUPI_list))) ->
{NewUPI_list ++ [H], T, RunEnvX}; {NewUPI_list ++ [H], T, RunEnvX};
true -> true ->
{NewUPI_list, OldRepairing_list, RunEnvX} {NewUPI_list, OldRepairing_list, RunEnvX}
@ -422,9 +435,22 @@ calc_projection(OldThreshold, NoPartitionThreshold, LastProj, Dbg,
NewUp -> Repairing_list3 ++ NewUp NewUp -> Repairing_list3 ++ NewUp
end, end,
Repairing_list5 = Repairing_list4 -- Down, Repairing_list5 = Repairing_list4 -- Down,
TentativeUPI = NewUPI_list3,
TentativeRepairing = Repairing_list5,
{NewUPI, NewRepairing} =
if TentativeUPI == [] andalso TentativeRepairing /= [] ->
[FirstRepairing|TailRepairing] = TentativeRepairing,
{[FirstRepairing], TailRepairing};
true ->
{TentativeUPI, TentativeRepairing}
end,
P = make_projection(OldEpochNum + 1, P = make_projection(OldEpochNum + 1,
MyName, All_list, Down, NewUPI_list3, Repairing_list5, MyName, All_list, Down, NewUPI, NewRepairing,
Dbg ++ [{nodes_up, Up}]), Dbg ++ [{nodes_up, Up}]),
if P#projection.upi == [] -> io:format(user, "old: ~w\n", [make_projection_summary(LastProj)]), io:format(user, "new: ~w\n", [make_projection_summary(P)]), exit(zoozoo); true -> ok end,
{P, S#ch_mgr{runenv=RunEnv3}}. {P, S#ch_mgr{runenv=RunEnv3}}.
calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) -> calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
@ -526,9 +552,11 @@ do_react_to_env(S) ->
react_to_env_A10(S) -> react_to_env_A10(S) ->
react_to_env_A20(0, S). react_to_env_A20(0, S).
react_to_env_A20(Retries, S) -> react_to_env_A20(Retries, #ch_mgr{myflu=MyFLU} = S) ->
%% io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]), %% io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),
{P_newprop, S2} = calc_projection(S, [{author_proc, react}]), RelativeToServer = MyFLU,
{P_newprop, S2} = calc_projection(S, RelativeToServer,
[{author_proc, react}]),
%% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end, %% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end,
react_to_env_A30(Retries, P_newprop, S2). react_to_env_A30(Retries, P_newprop, S2).
@ -555,6 +583,10 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
P_latest#projection.epoch_number > P_current#projection.epoch_number P_latest#projection.epoch_number > P_current#projection.epoch_number
orelse orelse
not LatestUnanimousP -> not LatestUnanimousP ->
?D({a40,?LINE}),
?D(P_latest#projection.epoch_number),
?D(P_current#projection.epoch_number),
?D(LatestUnanimousP),
%% 1st clause: someone else has written a newer projection %% 1st clause: someone else has written a newer projection
%% 2nd clause: a network partition has healed, revealing a %% 2nd clause: a network partition has healed, revealing a
@ -565,6 +597,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
P_latest#projection.epoch_number < P_current#projection.epoch_number P_latest#projection.epoch_number < P_current#projection.epoch_number
orelse orelse
P_latest /= P_current -> P_latest /= P_current ->
?D({a40,?LINE}),
%% Both of these cases are rare. Elsewhere, the code %% Both of these cases are rare. Elsewhere, the code
%% assumes that the local FLU's projection store is always %% assumes that the local FLU's projection store is always
@ -589,6 +622,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
%% A40a (see flowchart) %% A40a (see flowchart)
Rank_newprop > Rank_latest -> Rank_newprop > Rank_latest ->
?D({a40,?LINE}),
react_to_env_C300(P_newprop, P_latest, S); react_to_env_C300(P_newprop, P_latest, S);
%% A40b (see flowchart) %% A40b (see flowchart)
@ -597,10 +631,12 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
(P_newprop#projection.upi /= P_latest#projection.upi (P_newprop#projection.upi /= P_latest#projection.upi
orelse orelse
P_newprop#projection.repairing /= P_latest#projection.repairing) -> P_newprop#projection.repairing /= P_latest#projection.repairing) ->
?D({a40,?LINE}),
react_to_env_C300(P_newprop, P_latest, S); react_to_env_C300(P_newprop, P_latest, S);
%% A40c (see flowchart) %% A40c (see flowchart)
LatestAuthorDownP -> LatestAuthorDownP ->
?D({a40,?LINE}),
%% TODO: I believe that membership in the %% TODO: I believe that membership in the
%% P_newprop#projection.down is not sufficient for long %% P_newprop#projection.down is not sufficient for long
@ -624,6 +660,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
react_to_env_C300(P_newprop, P_latest, S); react_to_env_C300(P_newprop, P_latest, S);
true -> true ->
?D({a40,?LINE}),
{{no_change, P_latest#projection.epoch_number}, S} {{no_change, P_latest#projection.epoch_number}, S}
end. end.
@ -631,9 +668,11 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) -> Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) ->
if if
LatestUnanimousP -> LatestUnanimousP ->
?D({b10, ?LINE}),
react_to_env_C100(P_newprop, P_latest, S); react_to_env_C100(P_newprop, P_latest, S);
Retries > 2 -> Retries > 2 ->
?D({b10, ?LINE}),
%% The author of P_latest is too slow or crashed. %% The author of P_latest is too slow or crashed.
%% Let's try to write P_newprop and see what happens! %% Let's try to write P_newprop and see what happens!
@ -642,6 +681,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
Rank_latest >= Rank_newprop Rank_latest >= Rank_newprop
andalso andalso
P_latest#projection.author_server /= MyName -> P_latest#projection.author_server /= MyName ->
?D({b10, ?LINE}),
%% Give the author of P_latest an opportunite to write a %% Give the author of P_latest an opportunite to write a
%% new projection in a new epoch to resolve this mixed %% new projection in a new epoch to resolve this mixed
@ -649,6 +689,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
react_to_env_C200(Retries, P_latest, S); react_to_env_C200(Retries, P_latest, S);
true -> true ->
?D({b10, ?LINE}),
%% P_newprop is best, so let's write it. %% P_newprop is best, so let's write it.
react_to_env_C300(P_newprop, P_latest, S) react_to_env_C300(P_newprop, P_latest, S)
@ -658,11 +699,18 @@ react_to_env_C100(P_newprop, P_latest,
#ch_mgr{myflu=MyFLU, proj=P_current}=S) -> #ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
case projection_transition_is_sane(P_current, P_latest, MyFLU) of case projection_transition_is_sane(P_current, P_latest, MyFLU) of
true -> true ->
?D({c100, ?LINE}),
react_to_env_C110(P_latest, S); react_to_env_C110(P_latest, S);
_AnyOtherReturnValue -> _AnyOtherReturnValue ->
%% P_latest is known to be crap. ?D({c100, ?LINE, _AnyOtherReturnValue}), timer:sleep(50),
%% By process of elimination, P_newprop is best, %% %% P_latest is known to be crap.
%% so let's write it. %% %% By process of elimination, P_newprop is best,
%% %% so let's write it.
%% MaxEpoch = erlang:max(P_newprop#projection.epoch_number,
%% P_latest#projection.epoch_number) + 1,
%% P_newprop2 = update_projection_checksum(
%% P_newprop#projection{epoch_number=MaxEpoch}),
%% react_to_env_C300(P_newprop2, P_latest, S)
react_to_env_C300(P_newprop, P_latest, S) react_to_env_C300(P_newprop, P_latest, S)
end. end.
@ -701,16 +749,19 @@ react_to_env_C220(Retries, S) ->
react_to_env_A20(Retries + 1, S). react_to_env_A20(Retries + 1, S).
react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop, react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop,
#projection{epoch_number=Epoch_latest}=_P_latest, S) -> #projection{epoch_number=_Epoch_latest}=_P_latest, S) ->
NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1, %% NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
P_newprop2 = P_newprop#projection{epoch_number=NewEpoch}, %% P_newprop2 = P_newprop#projection{epoch_number=NewEpoch},
%% Let's return to the old epoch thingie and see what happens.........
Epoch = Epoch_newprop,
P_newprop2 = P_newprop#projection{epoch_number=Epoch + 1},
react_to_env_C310(update_projection_checksum(P_newprop2), S). react_to_env_C310(update_projection_checksum(P_newprop2), S).
react_to_env_C310(P_newprop, S) -> react_to_env_C310(P_newprop, S) ->
Epoch = P_newprop#projection.epoch_number, Epoch = P_newprop#projection.epoch_number,
{_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S), {_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
MyFLU=S#ch_mgr.myflu, ?D({c310, MyFLU, Epoch, _Res}), timer:sleep(200), %% MyFLU=S#ch_mgr.myflu, ?D({c310, MyFLU, Epoch, _Res}), timer:sleep(200),
MPS = mps(P_newprop), ?D(MPS), %% MPS = mps(P_newprop), ?D(MPS),
react_to_env_A10(S2). react_to_env_A10(S2).
@ -811,7 +862,7 @@ projection_transition_is_sane(
%% both, then those authors would not have allowed %% both, then those authors would not have allowed
%% a bad transition, so we will assume this %% a bad transition, so we will assume this
%% transition is OK. %% transition is OK.
?D(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa), %% ?D(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa), timer:sleep(200),
lists:member(AuthorServer1, UPI_list1) lists:member(AuthorServer1, UPI_list1)
andalso andalso
lists:member(AuthorServer2, UPI_list2) lists:member(AuthorServer2, UPI_list2)
@ -1054,7 +1105,7 @@ zoof_test() ->
erlang:yield(), erlang:yield(),
Res = test_react_to_env(MMM), Res = test_react_to_env(MMM),
Res=Res %% ?D({self(), Res}) Res=Res %% ?D({self(), Res})
end || _ <- lists:seq(1,20)], end || _ <- lists:seq(1,10)],
Parent ! done Parent ! done
end) || MMM <- [Ma, Mb, Mc] ], end) || MMM <- [Ma, Mb, Mc] ],
[receive [receive
@ -1068,6 +1119,7 @@ zoof_test() ->
DoIt(), DoIt(),
[test_reset_thresholds(M, 0, 100) || M <- [Ma, Mb, Mc]], [test_reset_thresholds(M, 0, 100) || M <- [Ma, Mb, Mc]],
DoIt(), DoIt(),
DoIt(),
%% [begin %% [begin
%% La = machi_flu0:proj_list_all(FLU, Type), %% La = machi_flu0:proj_list_all(FLU, Type),