WIP: cp_mode improvements

This commit is contained in:
Scott Lystig Fritchie 2015-08-24 19:04:26 +09:00
parent 66cafe066e
commit 2f82fe0487
3 changed files with 461 additions and 96 deletions

View file

@ -39,7 +39,8 @@
-record(flap_i, {
flap_count :: {term(), term()},
all_hosed :: list(),
all_flap_counts :: list()
all_flap_counts :: list(),
my_unique_prop_count :: non_neg_integer()
}).
-type p_srvr() :: #p_srvr{}.

View file

@ -773,7 +773,6 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg,
D_foo ++
Dbg ++ [{ps, Partitions},{nodes_up, Up}]),
P2 = if CMode == cp_mode ->
%% TODO incompete logic!
UpWitnesses = [W || W <- Up, lists:member(W, OldWitness_list)],
Majority = full_majority_size(AllMembers),
SoFar = length(NewUPI),
@ -1231,7 +1230,14 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
Latest_authors_flap_count_latest}},
{move_from_inner, MoveFromInnerToNorm_p}],
?REACT({a30, ?LINE, ClauseInfo}),
if MoveFromInnerToNorm_p orelse Kicker_p ->
MoveToNorm_p = MoveFromInnerToNorm_p orelse Kicker_p,
if MoveToNorm_p, CMode == cp_mode ->
%% Too much weird stuff may have hapened while we were suffering
%% the flapping/asymmetric partition. Fall back to the none
%% projection as if we're restarting.
?REACT({a30, ?LINE, [{move_to_norm, MoveToNorm_p}]}),
react_to_env_A49(P_latest, [], S10);
MoveToNorm_p, CMode == ap_mode ->
%% Move from inner projection to outer.
P_inner2A = inner_projection_or_self(P_current),
ResetEpoch = P_newprop10#projection_v1.epoch_number,
@ -1588,6 +1594,16 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
end
end.
react_to_env_A49(P_latest, FinalProps, #ch_mgr{name=MyName,
proj=P_current} = S) ->
?REACT(a49),
#projection_v1{all_members=All_list,
witnesses=Witness_list,
members_dict=MembersDict} = P_current,
P_none = make_none_projection(MyName, All_list, Witness_list,
MembersDict),
react_to_env_A50(P_none, FinalProps, S#ch_mgr{proj=P_none}).
react_to_env_A50(P_latest, FinalProps, #ch_mgr{proj=P_current}=S) ->
?REACT(a50),
?REACT({a50, ?LINE, [{current_epoch, P_current#projection_v1.epoch_number},
@ -1635,16 +1651,33 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
end,
P_newprop_AllHosedPlus =
lists:flatten([[X,Y] || {X,problem_with,Y} <- P_newprop_AllHosed]),
AllAreFlapping_and_IamBad_and_NotRelevant_p =
inner_projection_exists(P_current) andalso
inner_projection_exists(P_latest) andalso
inner_projection_exists(P_newprop) andalso
MyUniquePropCount == 1 andalso
%% Commit 66cafe06 added UnanimousLatestInnerNotRelevant_p to the
%% compound predicate below. I'm yanking it out now. TODO re-study?
#projection_v1{upi=P_newprop_upi_ooi, repairing=P_newprop_repairing_ooi} =
inner_projection_or_self(P_newprop),
EnoughAreFlapping_and_IamBad_p =
%% Ignore inner_projection_exists(P_current): We might need to
%% shut up quickly (adopting a new P_current can take a long
%% time).
(inner_projection_exists(P_latest) orelse
inner_projection_exists(P_newprop)) andalso
%% I'm suspected of being bad
lists:member(MyName, P_newprop_AllHosedPlus) andalso
UnanimousLatestInnerNotRelevant_p,
%% I'm not in the critical UPI or repairing lists
(not lists:member(MyName, P_newprop_upi_ooi++P_newprop_repairing_ooi))
andalso
%% My down lists are the same, i.e., no state change to announce
P_current#projection_v1.down == P_newprop#projection_v1.down,
if EnoughAreFlapping_and_IamBad_p -> io:format(user, "B10 ~w ~w current_down ~w newprop_down ~w, ", [MyName, EnoughAreFlapping_and_IamBad_p, P_current#projection_v1.down, P_newprop#projection_v1.down]); true -> ok end,
?REACT({b10, ?LINE, [{0,EnoughAreFlapping_and_IamBad_p},
{1,inner_projection_exists(P_current)},
{2,inner_projection_exists(P_latest)},
{3,inner_projection_exists(P_newprop)},
{4,MyUniquePropCount},
{5,{MyName, P_newprop_AllHosedPlus}},
{6,UnanimousLatestInnerNotRelevant_p}]}),
if
AllAreFlapping_and_IamBad_and_NotRelevant_p ->
EnoughAreFlapping_and_IamBad_p ->
?REACT({b10, ?LINE, []}),
%% There's outer flapping happening *and* we ourselves are
@ -1764,8 +1797,7 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest,
?REACT(c100),
Sane = projection_transition_is_sane(P_current, P_latest, MyName),
%% if Sane == true -> ok; true -> ?V("insane-~w-~w,", [MyName, P_newprop#projection_v1.epoch_number]) end, %%% DELME!!!
if Sane == true -> ok; true -> ?V("insane-~w-~w-~p,", [MyName, P_newprop#projection_v1.epoch_number, Sane]) end, %%% DELME!!!
if Sane == true -> ok; true -> ?V("~w-insane-~w-auth=~w:~w:~w:~w:~w:~w:~w-~p,", [?LINE, MyName, P_newprop#projection_v1.author_server, P_newprop#projection_v1.epoch_number, P_newprop#projection_v1.upi, P_newprop#projection_v1.repairing, (inner_projection_or_self(P_newprop))#projection_v1.epoch_number, (inner_projection_or_self(P_newprop))#projection_v1.upi, (inner_projection_or_self(P_newprop))#projection_v1.repairing, Sane]) end, %%% DELME!!!
Flap_latest = if is_record(Flap_latest0, flap_i) ->
Flap_latest0;
true ->
@ -1782,14 +1814,12 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest,
_ when P_current#projection_v1.epoch_number == 0 ->
%% Epoch == 0 is reserved for first-time, just booting conditions.
?REACT({c100, ?LINE, [first_write]}),
if Sane == true -> ok; true -> ?V("insane-~w-~w@~w,", [MyName, P_newprop#projection_v1.epoch_number, ?LINE]) end, %%% DELME!!!
if Sane == true -> ok; true -> ?V("~w-insane-~w-~w:~w:~w,", [?LINE, MyName, P_newprop#projection_v1.epoch_number, P_newprop#projection_v1.upi, P_newprop#projection_v1.repairing]) end, %%% DELME!!!
react_to_env_C110(P_latest, S);
true ->
?REACT({c100, ?LINE, [sane]}),
if Sane == true -> ok; true -> ?V("insane-~w-~w@~w,", [MyName, P_newprop#projection_v1.epoch_number, ?LINE]) end, %%% DELME!!!
if Sane == true -> ok; true -> ?V("~w-insane-~w-~w:~w:~w@~w,", [?LINE, MyName, P_newprop#projection_v1.epoch_number, P_newprop#projection_v1.upi, P_newprop#projection_v1.repairing, ?LINE]) end, %%% DELME!!!
react_to_env_C110(P_latest, S);
%% 20150715: I've seen this loop happen with {expected_author2,X}
%% where nobody agrees, weird.
DoctorSays ->
?REACT({c100, ?LINE, [{not_sane, DoctorSays}]}),
%% This is a fun case. We had just enough asymmetric partition
@ -1843,10 +1873,16 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest,
end.
react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName,
P_newprop, P_latest, S) ->
P_newprop, P_latest,
#ch_mgr{consistency_mode=CMode} = S) ->
NotSanesDict = orddict:update_counter(Author_latest, 1, NotSanesDict0),
S2 = S#ch_mgr{not_sanes=NotSanesDict, sane_transitions=0},
case orddict:fetch(Author_latest, NotSanesDict) of
N when CMode == cp_mode ->
?V("YOYO-cp-mode,~w,~w,~w,",[MyName, P_latest#projection_v1.epoch_number,N]),
?REACT({c100, ?LINE, [{cmode,CMode},
{not_sanes_author_count, N}]}),
react_to_env_A49(P_latest, [], S2);
N when N > ?TOO_FREQUENT_BREAKER ->
?V("\n\nYOYO ~w breaking the cycle of:\n current: ~w\n new : ~w\n", [MyName, machi_projection:make_summary(S#ch_mgr.proj), machi_projection:make_summary(P_latest)]),
?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}),
@ -1863,20 +1899,21 @@ react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName,
react_to_env_C103(#projection_v1{epoch_number=Epoch_newprop} = P_newprop,
#projection_v1{epoch_number=Epoch_latest,
all_members=All_list,
flap=Flap,
members_dict=MembersDict} = P_latest,
flap=Flap} = P_latest,
#ch_mgr{name=MyName, proj=P_current}=S) ->
#projection_v1{witnesses=Witness_list} = P_current,
#projection_v1{witnesses=Witness_list,
members_dict=MembersDict} = P_current,
P_none0 = make_none_projection(MyName, All_list, Witness_list, MembersDict),
P_none1 = P_none0#projection_v1{epoch_number=erlang:max(Epoch_newprop,
Epoch_latest),
%% P_none1 = P_none0#projection_v1{epoch_number=erlang:max(Epoch_newprop,
%% Epoch_latest),
P_none1 = P_none0#projection_v1{epoch_number=Epoch_latest,
flap=Flap,
dbg=[{none_projection,true}]},
P_none = machi_projection:update_checksum(P_none1),
%% Use it, darn it, because it's 100% safe. And exit flapping state.
?REACT({c103, ?LINE,
[{current_epoch, P_current#projection_v1.epoch_number},
{none_projection_epoch, Epoch_latest}]}),
{none_projection_epoch, P_none#projection_v1.epoch_number}]}),
%% Reset the not_sanes count dictionary here, or else an already
%% ?TOO_FREQUENT_BREAKER count for an author might prevent a
%% transition from C100_inner()->C300, which can lead to infinite
@ -1885,7 +1922,7 @@ react_to_env_C103(#projection_v1{epoch_number=Epoch_newprop} = P_newprop,
react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) ->
?REACT(c110),
?REACT({c110, [{latest_epoch, P_latest#projection_v1.epoch_number}]}),
?REACT({c110, ?LINE, [{latest_epoch,P_latest#projection_v1.epoch_number}]}),
Extra_todo = [{react,get(react)}],
P_latest2 = machi_projection:update_dbg2(P_latest, Extra_todo),
@ -2098,27 +2135,30 @@ calculate_flaps(P_newprop, P_latest, _P_current, CurrentUp, _FlapLimit,
{uniques, UniqueProposalSummaries}]}),
P_latest_Flap = get_raw_flapping_i(P_latest),
AmFlappingNow_p = not (FlapStart == ?NOT_FLAPPING_START orelse
FlapStart == undefined),
FlapStart == undefined)
andalso
length(UniqueProposalSummaries) == 1,
P_latest_flap_start = case P_latest_Flap of
undefined ->
?NOT_FLAPPING_START;
_ ->
element(1, P_latest_Flap#flap_i.flap_count)
end,
MinQueueLen = 7,
StartFlapping_p =
case {queue:len(H), UniqueProposalSummaries} of
_ when AmFlappingNow_p ->
?REACT({calculate_flaps,?LINE,[{flap_start,FlapStart}]}),
%% I'm already flapping, therefore don't start again.
false;
{N, _} when N >= 3,
{N, _} when N >= MinQueueLen,
P_latest_flap_start /= ?NOT_FLAPPING_START ->
?REACT({calculate_flaps,?LINE,
[{manifesto_clause,2},
{latest_epoch, P_latest#projection_v1.epoch_number},
{latest_flap_count,P_latest_Flap#flap_i.flap_count}]}),
true;
{N, [_]} when N >= 3 ->
{N, [_]} when N >= MinQueueLen ->
?REACT({calculate_flaps,?LINE,[{manifesto_clause,1}]}),
true;
{_N, _} ->
@ -2314,8 +2354,8 @@ projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) ->
?RETURN2(Else)
end.
projection_transition_is_sane_final_review(P1, P2,
{expected_author2,UPI1_tail}=Else) ->
projection_transition_is_sane_final_review(
P1, P2, {expected_author2,UPI1_tail,_}=Else) ->
%% Reminder: P1 & P2 are outer projections
%%
%% We have a small problem for state transition sanity checking in the
@ -2815,7 +2855,11 @@ simple_chain_state_transition_is_sane(_Author1, UPI1, Repair1, Author2, UPI2) ->
UPI1_tail when UPI1_tail == Author2 ->
?RETURN2(true);
UPI1_tail ->
?RETURN2({expected_author2,UPI1_tail})
?RETURN2({expected_author2,UPI1_tail,
[{upi1,UPI1},
{repair1,Repair1},
{author2,Author2},
{upi2,UPI2}]})
end
end
end.
@ -2962,7 +3006,8 @@ make_zerf2(OldEpochNum, Up, MajoritySize, MyName, AllMembers, OldWitness_list, M
put(epochs, Epochs),
Relation = [],
put(xxx_epoch, OldEpochNum),
zerf_find_last_common(Epochs, Relation, MajoritySize, Up, S)
Proj = zerf_find_last_common(Epochs, Relation, MajoritySize, Up, S),
Proj#projection_v1{flap=make_flapping_i()}
catch
throw:{zerf,no_common} ->
FirstEpoch_p = case get(epochs) of
@ -2994,7 +3039,7 @@ make_zerf2(OldEpochNum, Up, MajoritySize, MyName, AllMembers, OldWitness_list, M
P = make_none_projection(MyName, AllMembers,OldWitness_list,
MembersDict),
machi_projection:update_checksum(
P#projection_v1{epoch_number=OldEpochNum,dbg2=[zerf_none]})
P#projection_v1{epoch_number=OldEpochNum,dbg2=[zerf_none, {es, get(epochs)},{up,Up},{maj,MajoritySize}]})
end;
_X:_Y ->
throw({zerf, {damn_exception, Up, _X, _Y, erlang:get_stacktrace()}})
@ -3012,11 +3057,12 @@ zerf_find_last_common(UnsearchedEpochs, Relation, MajoritySize, Up, S) ->
case ?FLU_PC:read_projection(Proxy, private, E, ?TO) of
{ok, Proj} ->
%% Sort order: we want inner = bigger.
CSum = Proj#projection_v1.epoch_csum,
OorI = case inner_projection_exists(Proj) of
true -> z_inner;
false -> a_outer
end,
K = {E, OorI, Proj#projection_v1{dbg2=[]}},
K = {E, CSum, OorI, Proj#projection_v1{dbg2=[]}},
Rel2 = case lists:keyfind(K, 1, Rel) of
false ->
[{K, [FLU]}|Rel];
@ -3032,13 +3078,14 @@ zerf_find_last_common(UnsearchedEpochs, Relation, MajoritySize, Up, S) ->
end
end, Relation, lists:reverse([{E, FLU} || E <- NowEpochs, FLU <- Up])),
SortedRel = lists:reverse(lists:sort(Rel2)),
case [T || T={{E, OorI, Proj}, WrittenFLUs} <- SortedRel,
lists:sort(Proj#projection_v1.upi) == lists:sort(WrittenFLUs)
case [T || T={{E, _CSum, _OorI, Proj}, WrittenFLUs} <- SortedRel,
ordsets:is_subset(ordsets:from_list(Proj#projection_v1.upi),
ordsets:from_list(WrittenFLUs))
andalso
length(Proj#projection_v1.upi) >= MajoritySize] of
[] ->
zerf_find_last_common(NextEpochs, Rel2, MajoritySize, Up, S);
[{{_E, _OorI, Proj}, _WrittenFLUs}|_] ->
[{{_E, _CSum, _OorI, Proj}, _WrittenFLUs}|_] ->
Proj
end.

View file

@ -192,6 +192,10 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) ->
{ok, MPid} = ?MGR:start_link(P#p_srvr.name, MembersDict, MgrOpts),
{P#p_srvr.name, MPid}
end || P <- Ps],
CpApMode = case is_list(proplists:get_value(witnesses, MgrOpts)) of
true -> cp_mode;
false -> ap_mode
end,
try
[{_, Ma}|_] = MgrNamez,
@ -233,11 +237,12 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) ->
end || _ <- Pids]
end,
machi_partition_simulator:reset_thresholds(10, 50),
io:format(user, "\nLet loose the dogs of war!\n", []),
%% machi_partition_simulator:always_these_partitions([]),
%% io:format(user, "\nPuppies for everyone!\n", []),
[DoIt(30, 0, 0) || _ <- lists:seq(1,2)],
%% machi_partition_simulator:reset_thresholds(10, 50),
%% io:format(user, "\nLet loose the dogs of war!\n", []),
machi_partition_simulator:always_these_partitions([]),
io:format(user, "\nPuppies for everyone!\n", []),
[DoIt(30, 0, 0) || _ <- lists:seq(1,5)],
AllPs = make_partition_list(All_list),
PartitionCounts = lists:zip(AllPs, lists:seq(1, length(AllPs))),
MaxIters = NumFLUs * (NumFLUs + 1) * 6,
@ -272,7 +277,10 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) ->
try
[{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} ||
{FLU, Psx} <- PrivProjs]
catch _Err:_What ->
catch
_Err:_What when CpApMode == cp_mode ->
io:format(user, "none proj skip detected, TODO? ", []);
_Err:_What when CpApMode == ap_mode ->
io:format(user, "PrivProjs ~p\n", [PrivProjs]),
exit({line, ?LINE, _Err, _What})
end,
@ -333,7 +341,10 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) ->
[{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} ||
{FLU, Psx} <- PrivProjs],
io:format(user, "\nAll sanity checks pass, hooray!\n", [])
catch _Err:_What ->
catch
_Err:_What when CpApMode == cp_mode ->
io:format(user, "none proj skip detected, TODO? ", []);
_Err:_What when CpApMode == ap_mode ->
io:format(user, "Report ~p\n", [Report]),
io:format(user, "PrivProjs ~p\n", [PrivProjs]),
exit({line, ?LINE, _Err, _What})
@ -359,29 +370,73 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) ->
%% Uncomment *one* of the following make_partition_list() bodies.
make_partition_list(All_list) ->
_X_Ys1 = [[{X,Y}] || X <- All_list, Y <- All_list, X /= Y],
_X_Ys2 = [[{X,Y}, {A,B}] || X <- All_list, Y <- All_list, X /= Y,
A <- All_list, B <- All_list, A /= B,
X /= A],
_X_Ys3 = [[{X,Y}, {A,B}, {C,D}] || X <- All_list, Y <- All_list, X /= Y,
A <- All_list, B <- All_list, A /= B,
C <- All_list, D <- All_list, C /= D,
X /= A, X /= C, A /= C],
[
[{b,c}],
[],
[{c,d}],
[],
[{d,e}],
[],
[{c,e}]
].
%% _X_Ys1 = [[{X,Y}] || X <- All_list, Y <- All_list, X /= Y],
%% _X_Ys2 = [[{X,Y}, {A,B}] || X <- All_list, Y <- All_list, X /= Y,
%% A <- All_list, B <- All_list, A /= B,
%% X /= A],
%% _X_Ys3 = [[{X,Y}, {A,B}, {C,D}] || X <- All_list, Y <- All_list, X /= Y,
%% A <- All_list, B <- All_list, A /= B,
%% C <- All_list, D <- All_list, C /= D,
%% X /= A, X /= C, A /= C],
%% Concat = _X_Ys1,
%% Concat = _X_Ys2,
%% Concat = _X_Ys1 ++ _X_Ys2,
%% %% Concat = _X_Ys3,
%% Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3,
%% %% Concat = _X_Ys2,
%% %% Concat = _X_Ys1 ++ _X_Ys2,
%% %% %% Concat = _X_Ys3,
%% %% Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3,
%% random_sort(lists:usort([lists:sort(L) || L <- Concat])).
%% [
%% [{b,c}],
%% [{b,c},{c,d},{e,a}],
%% [{a,c},{a,d},{a,e},{c,a},{d,a},{e,a},{b,c},{b,d},{b,e},{b,c},{b,d},{b,e}, % iof2
%% {c,a},{c,b},{c,d},{c,e},{a,c},{b,c},{d,c},{e,c}, % island of 1
%% {d,a},{d,b},{d,c},{d,e},{a,d},{b,d},{c,d},{e,d}, % island of 1
%% {e,a},{e,b},{e,c},{e,d},{a,e},{b,e},{c,e},{d,e}],% island of 1
%% [{a,e},{b,c},{d,e}] % the stinker?
%% ,
%% [],
%% [{b,a},{d,e},{e,a}],
%% [{b,c},{c,d}],
%% [{a,c},{c,a},{d,b}],
%% [{a,e},{c,e},{e,d}],
%% [{a,e},{c,d},{d,b}],
%% [{b,e},{c,a},{e,d}],
%% [{b,c},{c,d},{e,a}],
%% [{d,e},{e,c}],
%% [{a,e},{b,c},{d,e}] % the stinker?
%% ,
%% [],
%% [{e,a},{g,d}],
%% [{b,f},{f,b}],
%% [{a,g},{c,d}]
%% ]. % for 5 in AP, yay, working now.
%% [ [{a,b},{b,d},{c,b}],
%% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
%% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
%% [{a,b},{b,d},{c,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}],
%% [{a,b},{b,d},{c,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ].
%% [ [{a,b}, {b,c}],
%% [{a,b}, {c,b}] ].
%% [ [{a,b}, {b,c}],
%% [{a,b}, {a,c}] ].
%% Q = [ {X,Y} || X <- [a], Y <- [b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] ],
%% %% [ [{d,e}], Q]. %% len=7 problem: bad inner flip when ps=[] at end!
%% [ Q, [{a,b},{c,d},{e,f}] ]. %% len=7 problem: WTF, double-check please!
%% len=7 problem: insane evil-near-infinite-loop sometimes
%% [ [{a,b}], Q, [{c,d}], Q, [{d,e}], Q].
%% [ [{a,b}, {b,c}] ]. %% hosed-not-equal @ 3 FLUs
@ -399,12 +454,26 @@ make_partition_list(All_list) ->
%% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], []
%% ].
%% [ [{a,b}, {b,a}] ].
%% [
%% [{a,b}],
%% [],
%% [{a,b}, {b,a}],
%% [],
%% [{b,c}]
%% ].
[
[{c,b}, {c,a}],
[{b,c}, {b,a}]
].
%% [
%% [{a,c},{b,a},{c,b}],
%% [{b,a}]
%% ].
%% [
%% [{c,b}, {c,a}],
%% [{b,c}, {b,a}],
%% [],
%% [{c,b}, {c,a}],
%% [{b,c}, {b,a}]
%% ].
%% [
%% [{a,b}], [],
@ -474,7 +543,7 @@ todo_why_does_this_crash_sometimes(FLUName, FLU, PPPepoch) ->
private_projections_are_stable(Namez, PollFunc) ->
Private1 = [{Name, get_latest_inner_proj_summ(FLU)} || {Name,FLU} <- Namez],
[PollFunc(5, 1, 10) || _ <- lists:seq(1,2)],
[PollFunc(15, 1, 10) || _ <- lists:seq(1,6)],
Private2 = [{Name, get_latest_inner_proj_summ(FLU)} || {Name,FLU} <- Namez],
%% Is = [Inner_p || {_,_,_,_,Inner_p} <- Private1],
put(stable, lists:sort(Private1)),
@ -495,54 +564,90 @@ private_projections_are_stable(Namez, PollFunc) ->
%%
FLUs = [FLU || {FLU,_Pid} <- Namez],
U_UPI_Rs = lists:usort([UPI++Rep ||
{_Nm,{_Epoch,UPI,Rep,_Dn,_W,InnerP}} <- Private2]),
FLU_uses = [{Name, Epoch} ||
{Name,{Epoch,_UPI,Rep,_Dn,_W,InnerP}} <- Private2],
{_Nm,{_EpochID,UPI,Rep,_Dn,_W,InnerP}} <- Private2]),
FLU_uses = [{Name, EpochID} ||
{Name,{EpochID,_UPI,Rep,_Dn,_W,InnerP}} <- Private2],
Witnesses = hd([Ws ||
{_Name,{_Epoch,_UPI,Rep,_Dn,Ws,InnerP}} <- Private2]),
{_Name,{_EpochID,_UPI,Rep,_Dn,Ws,InnerP}} <- Private2]),
HaveWitnesses_p = Witnesses /= [],
CMode = if HaveWitnesses_p -> cp_mode;
true -> ap_mode
end,
Unanimous_with_all_peers_p =
lists:all(fun({FLU, UsesEpoch}) ->
WhoInEpoch = [Name ||
{Name,{Epoch,_UPI,_Rep,_Dn,_W,I_}}<-Private2,
Epoch == UsesEpoch],
WhoInEpoch_s = ordsets:from_list(WhoInEpoch),
lists:all(fun({FLU, UsesEpochID}) ->
WhoInEpochID = [Name ||
{Name,{EpochID,_UPI,_Rep,_Dn,_W,I_}}<-Private2,
EpochID == UsesEpochID],
WhoInEpochID_s = ordsets:from_list(WhoInEpochID),
UPI_R_versions = [UPI++Rep ||
{_Name,{Epoch,UPI,Rep,_Dn,_W,I_}}<-Private2,
Epoch == UsesEpoch],
{_Name,{EpochID,UPI,Rep,_Dn,_W,I_}}<-Private2,
EpochID == UsesEpochID],
UPI_R_vers_s = ordsets:from_list(hd(UPI_R_versions)),
UPI_R_versions == [ [] ] % This FLU in minority partition
orelse
(length(lists:usort(UPI_R_versions)) == 1
andalso
(ordsets:is_subset(UPI_R_vers_s, WhoInEpoch_s) orelse
(ordsets:is_subset(UPI_R_vers_s, WhoInEpochID_s) orelse
(CMode == cp_mode andalso
ordsets:is_disjoint(UPI_R_vers_s, WhoInEpoch_s))))
ordsets:is_disjoint(UPI_R_vers_s, WhoInEpochID_s))))
end, FLU_uses),
Pubs = [begin
{ok, P} = ?FLU_PC:read_latest_projection(FLU, public),
{Name, P#projection_v1.epoch_number}
end || {Name, FLU} <- Namez],
Flat_U_UPI_Rs = lists:flatten(U_UPI_Rs),
%% Pubs = [begin
%% {ok, P} = ?FLU_PC:read_latest_projection(FLU, public),
%% {Name, P#projection_v1.epoch_number}
%% end || {Name, FLU} <- Namez],
%% In AP mode, if not disjoint, then a FLU will appear twice in
%% flattented U_UPIs.
AP_mode_disjoint_test_p =
if CMode == cp_mode ->
true;
CMode == ap_mode ->
lists:sort(Flat_U_UPI_Rs) == lists:usort(Flat_U_UPI_Rs)
end,
CP_mode_agree_test_p =
if CMode == cp_mode ->
FullMajority = (length(Namez) div 2) + 1,
EpochIDs = lists:sort(
[EpochID || {_Name,{EpochID,_UPI,_Rep,_Dn,_W,I_}}<-Private2]),
case lists:reverse(lists:sort(uniq_c(EpochIDs))) of
[{Count,EpochID}|_] when Count >= FullMajority ->
[{UPI, Rep}] = lists:usort(
[{_UPI,_Rep} || {_Name,{EpochIDx,_UPI,_Rep,_Dn,_W,I_}}<-Private2,
EpochIDx == EpochID]),
ExpectedFLUs = lists:sort(UPI ++ Rep),
UsingFLUs = lists:sort(
[Name || {Name,{EpochIDx,_UPI,_Rep,_Dn,_W,I_}}<-Private2,
EpochIDx == EpochID]),
io:format(user, "Priv2: EID ~W e ~w u ~w\n", [EpochID, 7, ExpectedFLUs, UsingFLUs]),
ordsets:is_subset(ordsets:from_list(ExpectedFLUs),
ordsets:from_list(UsingFLUs));
_Else ->
io:format(user, "Priv2: Else ~p\n", [_Else]),
false
end;
CMode == ap_mode ->
true
end,
io:format(user, "\nPriv1 ~P agree ~p\n", [lists:sort(Private1), 20, Unanimous_with_all_peers_p]),
Private1 == Private2 andalso
(CMode == cp_mode orelse % CP mode = skip this criterion
%% In AP mode,
%% if not disjoint, then a flu will appear twice in flattented U_UPIs.
lists:sort(Flat_U_UPI_Rs) == lists:usort(Flat_U_UPI_Rs)) andalso
%% Another property that we want is that for each participant
%% X mentioned in a UPI or Repairing list of some epoch E that
%% X is using the same epoch E.
%%
%% It's possible (in theory) for humming consensus to agree on
%% the membership of UPI+Repairing but arrive those lists at
%% different epoch numbers. Machi chain replication won't
%% work in that case: all participants need to be using the
%% same epoch (and csum)! (NOTE: We ignore epoch_csum here.)
Unanimous_with_all_peers_p.
AP_mode_disjoint_test_p andalso
(
%% Another property that we want is that for each participant
%% X mentioned in a UPI or Repairing list of some epoch E that
%% X is using the same epoch E.
%%
%% It's possible (in theory) for humming consensus to agree on
%% the membership of UPI+Repairing but arrive those lists at
%% different epoch numbers. Machi chain replication won't
%% work in that case: all participants need to be using the
%% same epoch (and csum)!
(CMode == ap_mode andalso Unanimous_with_all_peers_p)
orelse
(CMode == cp_mode andalso CP_mode_agree_test_p)
).
get_latest_inner_proj_summ(FLU) ->
{ok, Proj} = ?FLU_PC:read_latest_projection(FLU, private),
@ -559,5 +664,217 @@ random_sort(L) ->
L1 = [{random:uniform(99999), X} || X <- L],
[X || {_, X} <- lists:sort(L1)].
foo(NumFLUs, MgrOpts0) ->
timer:sleep(100),
%% Faster test startup, commented: io:format(user, short_doc(), []),
%% Faster test startup, commented: timer:sleep(3000),
TcpPort = 62877,
ok = filelib:ensure_dir("/tmp/c/not-used"),
FluInfo = [
{a,TcpPort+0,"/tmp/c/data.a"}, {b,TcpPort+1,"/tmp/c/data.b"},
{c,TcpPort+2,"/tmp/c/data.c"}, {d,TcpPort+3,"/tmp/c/data.d"},
{e,TcpPort+4,"/tmp/c/data.e"}, {f,TcpPort+5,"/tmp/c/data.f"},
{g,TcpPort+6,"/tmp/c/data.g"}, {h,TcpPort+7,"/tmp/c/data.h"},
{i,TcpPort+8,"/tmp/c/data.i"}, {j,TcpPort+9,"/tmp/c/data.j"},
{k,TcpPort+10,"/tmp/c/data.k"}, {l,TcpPort+11,"/tmp/c/data.l"},
{m,TcpPort+12,"/tmp/c/data.m"}, {n,TcpPort+13,"/tmp/c/data.n"},
{o,TcpPort+14,"/tmp/c/data.o"}, {p,TcpPort+15,"/tmp/c/data.p"},
{q,TcpPort+16,"/tmp/c/data.q"}, {r,TcpPort+17,"/tmp/c/data.r"}
],
FLU_biglist = [X || {X,_,_} <- FluInfo],
All_list = lists:sublist(FLU_biglist, NumFLUs),
io:format(user, "\nSET # of FLUs = ~w members ~w).\n",
[NumFLUs, All_list]),
machi_partition_simulator:start_link({111,222,33}, 0, 100),
_ = machi_partition_simulator:get(All_list),
Ps = [#p_srvr{name=Name,address="localhost",port=Port} ||
{Name,Port,_Dir} <- lists:sublist(FluInfo, NumFLUs)],
PsDirs = lists:zip(Ps,
[Dir || {_,_,Dir} <- lists:sublist(FluInfo, NumFLUs)]),
FLU_pids = [machi_flu1_test:setup_test_flu(Name, Port, Dir) ||
{#p_srvr{name=Name,port=Port}, Dir} <- PsDirs],
Namez = [begin
{ok, PPid} = ?FLU_PC:start_link(P),
{Name, PPid}
end || {#p_srvr{name=Name}=P, _Dir} <- PsDirs],
MembersDict = machi_projection:make_members_dict(Ps),
MgrOpts = MgrOpts0 ++ ?DEFAULT_MGR_OPTS,
MgrNamez =
[begin
{ok, MPid} = ?MGR:start_link(P#p_srvr.name, MembersDict, MgrOpts),
{P#p_srvr.name, MPid}
end || P <- Ps],
try
[{_, Ma}|_] = MgrNamez,
{ok, P1} = ?MGR:test_calc_projection(Ma, false),
[ok = ?FLU_PC:write_projection(FLUPid, public, P1) ||
{_, FLUPid} <- Namez, FLUPid /= Ma],
machi_partition_simulator:reset_thresholds(10, 50),
_ = machi_partition_simulator:get(All_list),
Parent = self(),
DoIt = fun(Iters, S_min, S_max) ->
%% io:format(user, "\nDoIt: top\n\n", []),
io:format(user, "DoIt, ", []),
Pids = [spawn(fun() ->
random:seed(now()),
[begin
erlang:yield(),
S_max_rand = random:uniform(
S_max + 1),
%% io:format(user, "{t}", []),
Elapsed =
?MGR:sleep_ranked_order(
S_min, S_max_rand,
M_name, All_list),
_ = ?MGR:trigger_react_to_env(MMM),
%% Be more unfair by not
%% sleeping here.
% timer:sleep(S_max - Elapsed),
Elapsed
end || _ <- lists:seq(1, Iters)],
Parent ! done
end) || {M_name, MMM} <- MgrNamez ],
[receive
done ->
ok
after 120*1000 ->
exit(icky_timeout)
end || _ <- Pids]
end,
%% machi_partition_simulator:reset_thresholds(10, 50),
%% io:format(user, "\nLet loose the dogs of war!\n", []),
machi_partition_simulator:always_these_partitions([]),
io:format(user, "\nPuppies for everyone!\n", []),
[DoIt(30, 0, 0) || _ <- lists:seq(1,5)],
DoIt
catch XXX:YYY ->
{XXX,YYY}
end.
uniq_c(L) ->
uniq_c(L, 0, unused).
uniq_c([], 0, _Last) ->
[];
uniq_c([], Count, Last) ->
[{Count, Last}];
uniq_c([H|T], 0, _Last) ->
uniq_c(T, 1, H);
uniq_c([H|T], Count, H) ->
uniq_c(T, Count+1, H);
uniq_c([H|T], Count, Last) ->
[{Count, Last}|uniq_c(T, 1, H)].
%% MaxIters = NumFLUs * (NumFLUs + 1) * 6,
%% Stable = fun(S_Namez) ->
%% true = lists:foldl(
%% fun(_, true) ->
%% true;
%% (_, _) ->
%% %% Run a few iterations
%% [DoIt(10, 10, 50) || _ <- lists:seq(1, 6)],
%% %% If stable, return true to short circuit remaining
%% private_projections_are_stable(S_Namez, DoIt)
%% end, false, lists:seq(0, MaxIters))
%% end,
%% %% Part_b = [{a,b},{c,d}],
%% Part_b = [{c,d}],
%% %% Part_b = [{X,Y} || {X,_} <- Namez, {Y,_} <- Namez, X == b orelse Y == b],
%% %% Part_d = [{X,Y} || {X,_} <- Namez, {Y,_} <- Namez, X == d orelse Y == d],
%% %% machi_partition_simulator:always_these_partitions(Part_b),
%% %% io:format(user, "\nSET partitions = ~w at ~w\n", [Part_b, time()]),
%% %% true = Stable(Namez), io:format(user, "\nSweet, private projections are stable\n", []), io:format(user, "\t~P\n", [get(stable), 14]), (fun() -> ReportXX = machi_chain_manager1_test:unanimous_report(Namez), true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), io:format(user, "Yay for ReportXX!\n", []) end)(),
%% %% Part_bd = Part_b ++ Part_d,
%% %% machi_partition_simulator:always_these_partitions(Part_bd),
%% %% io:format(user, "\nSET partitions = ~w at ~w\n", [Part_bd, time()]),
%% %% true = Stable(Namez), io:format(user, "\nSweet, private projections are stable\n", []), io:format(user, "\t~P\n", [get(stable), 14]), (fun() -> ReportXX = machi_chain_manager1_test:unanimous_report(Namez), true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), io:format(user, "Yay for ReportXX!\n", []) end)(),
%% os:cmd("rm /tmp/signal"),
%% Part_b_partial_d = Part_b ++ [{e,f}],
%% %% Part_b_partial_d = [{a,b}, {b,c}, {c,d}, {d,e}],
%% %% Part_b_partial_d = [{a,b}, {b,d}, {d,e}],
%% machi_partition_simulator:always_these_partitions(Part_b_partial_d),
%% io:format(user, "\nSET partitions = ~w at ~w\n", [Part_b_partial_d, time()]),
%% %% Only_ab_namez = [T || T={Name, _} <- Namez, lists:member(Name, [a,b])],
%% true = Stable(Namez), io:format(user, "\nSweet, private projections are stable\n", []), io:format(user, "\t~P\n", [get(stable), 14]), (fun() -> ReportXX = machi_chain_manager1_test:unanimous_report(Namez), true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), io:format(user, "Yay for ReportXX!\n", []) end)(),
%% machi_partition_simulator:always_these_partitions([{b,c}]),
%% io:format(user, "\nSET partitions = ~w at ~w\n", [[{b,c}], time()]),
%% %% Only_ab_namez = [T || T={Name, _} <- Namez, lists:member(Name, [a,b])],
%% [true = Stable(Namez) || _ <- [1,2,3] ], io:format(user, "\nSweet, private projections are stable\n", []), io:format(user, "\t~P\n", [get(stable), 14]), (fun() -> ReportXX = machi_chain_manager1_test:unanimous_report(Namez), true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), io:format(user, "Yay for ReportXX!\n", []) end)(),
%% %% [begin
%% %% QQQ = lists:sublist(Part_b_partial_d, NNN),
%% %% machi_partition_simulator:always_these_partitions(QQQ),
%% %% io:format(user, "\nSET partitions = ~w at ~w\n", [QQQ, time()]),
%% %% %% Only_ab_namez = [T || T={Name, _} <- Namez, lists:member(Name, [a,b])],
%% %% true = Stable(Namez), io:format(user, "\nSweet, private projections are stable\n", []), io:format(user, "\t~P\n", [get(stable), 14]), (fun() -> ReportXX = machi_chain_manager1_test:unanimous_report(Namez), true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), io:format(user, "Yay for ReportXX!\n", []) end)()
%% %% end || NNN <- lists:seq(1, length(Part_b_partial_d))],
%% io:format(user, "\nSET partitions = []\n", []),
%% io:format(user, "We should see convergence to 1 correct chain.\n", []),
%% machi_partition_simulator:no_partitions(),
%% [DoIt(50, 10, 50) || _ <- [1,2,3]],
%% true = private_projections_are_stable(Namez, DoIt),
%% io:format(user, "~s\n", [os:cmd("date")]),
%% %% We are stable now ... analyze it.
%% %% Create a report where at least one FLU has written a
%% %% private projection.
%% Report = machi_chain_manager1_test:unanimous_report(Namez),
%% %% ?D(Report),
%% %% Report is ordered by Epoch. For each private projection
%% %% written during any given epoch, confirm that all chain
%% %% members appear in only one unique chain, i.e., the sets of
%% %% unique chains are disjoint.
%% true = machi_chain_manager1_test:all_reports_are_disjoint(Report),
%% %% io:format(user, "\nLast Reports: ~p\n", [lists:nthtail(length(Report)-8,Report)]),
%% %% For each chain transition experienced by a particular FLU,
%% %% confirm that each state transition is OK.
%% PrivProjs = [{Name, begin
%% {ok, Ps9} = ?FLU_PC:get_all_projections(FLU,
%% private),
%% [P || P <- Ps9,
%% P#projection_v1.epoch_number /= 0]
%% end} || {Name, FLU} <- Namez],
%% try
%% [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} ||
%% {FLU, Psx} <- PrivProjs],
%% io:format(user, "\nAll sanity checks pass, hooray!\n", [])
%% catch _Err:_What ->
%% io:format(user, "Report ~p\n", [Report]),
%% io:format(user, "PrivProjs ~p\n", [PrivProjs]),
%% exit({line, ?LINE, _Err, _What})
%% end,
%% %% ?D(R_Projs),
%% ok
%% catch
%% XX:YY ->
%% io:format(user, "BUMMER ~p ~p @ ~p\n",
%% [XX, YY, erlang:get_stacktrace()]),
%% exit({bummer,XX,YY})
%% after
%% [ok = ?MGR:stop(MgrPid) || {_, MgrPid} <- MgrNamez],
%% [ok = ?FLU_PC:quit(PPid) || {_, PPid} <- Namez],
%% [ok = machi_flu1:stop(FLUPid) || FLUPid <- FLU_pids],
%% ok = machi_partition_simulator:stop()
%% end.
-endif. % !PULSE
-endif. % TEST