WIP: crufty TODO & comment cleanup

This commit is contained in:
Scott Lystig Fritchie 2015-04-14 16:17:49 +09:00
parent 59936eda62
commit 9e587b3d11
3 changed files with 70 additions and 195 deletions

View file

@ -28,7 +28,9 @@ func, and pattern match Erlang style in that func.
** TODO Fix all known bugs with Chain Manager ** TODO Fix all known bugs with Chain Manager
*** DONE Fix known bugs *** DONE Fix known bugs
*** TODO Clean up crufty TODO comments and other obvious cruft *** DONE Clean up crufty TODO comments and other obvious cruft
*** TODO Re-add verification step of stable epochs, including inner projections!
*** TODO Attempt to remove cruft items in flapping_i?
** TODO Finish OTP'izing the Chain Manager with FLU & proj store processes ** TODO Finish OTP'izing the Chain Manager with FLU & proj store processes
** TODO Change all protocol ops to enforce the epoch ID ** TODO Change all protocol ops to enforce the epoch ID

View file

@ -541,6 +541,8 @@ rank_and_sort_projections(Ps, CurrentProj) ->
%% Perhaps this means that we should change the responsibility %% Perhaps this means that we should change the responsibility
%% for repair management to the highest ranking member of the %% for repair management to the highest ranking member of the
%% UPI_list? %% UPI_list?
%% TODO Hrrrmmmmm ... what about the TODO comment in A40's A40a clause?
%% That could perhaps resolve this same problem in a better way?
rank_projections(Projs, CurrentProj) -> rank_projections(Projs, CurrentProj) ->
#projection_v1{all_members=All_list} = CurrentProj, #projection_v1{all_members=All_list} = CurrentProj,
@ -603,7 +605,6 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
#ch_mgr{name=MyName, proj=P_current, #ch_mgr{name=MyName, proj=P_current,
flap_limit=FlapLimit} = S) -> flap_limit=FlapLimit} = S) ->
?REACT(a30), ?REACT(a30),
%% io:format(user, "HEE30s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- get(react), is_atom(X) orelse element(1,X) == b10])]),
{P_newprop1, S2} = calc_projection(S, MyName), {P_newprop1, S2} = calc_projection(S, MyName),
?REACT({a30, ?LINE, [{current, machi_projection:make_summary(S#ch_mgr.proj)}]}), ?REACT({a30, ?LINE, [{current, machi_projection:make_summary(S#ch_mgr.proj)}]}),
?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}), ?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}),
@ -618,7 +619,6 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
NewEpoch = erlang:max(Epoch_newprop2, Epoch_latest) + 1, NewEpoch = erlang:max(Epoch_newprop2, Epoch_latest) + 1,
P_newprop3 = P_newprop2#projection_v1{epoch_number=NewEpoch}, P_newprop3 = P_newprop2#projection_v1{epoch_number=NewEpoch},
?REACT({a30, ?LINE, [{newprop3, machi_projection:make_summary(P_newprop3)}]}), ?REACT({a30, ?LINE, [{newprop3, machi_projection:make_summary(P_newprop3)}]}),
%% if MyName == 'd' -> io:format(user, "QQQQQ ~w P_latest is ~w\n", [MyName, machi_projection:make_summary(P_latest)]); true -> ok end,
{P_newprop10, S10} = {P_newprop10, S10} =
case get_flap_count(P_newprop3) of case get_flap_count(P_newprop3) of
@ -675,49 +675,22 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
_ -> _ ->
{P_newprop3, S3} {P_newprop3, S3}
end, end,
%% QQQ WHAT IF?
%% "What if we checked P_current for flap count, and it was > 0?" %% Here's a more common reason for moving from inner projection to
%% "What if we checked P_newprop10 for flap count, and it was = 0?" %% a normal projection: the old proj has an inner but the newprop
%% "If P_current also has an inner proj, then pull that inner proj..." %% does not.
%% "out and use *it* for P_newprop20/yeah!!?????" MoveFromInnerToNorm_p =
case {inner_projection_exists(P_current),
inner_projection_exists(P_newprop10)} of
{true, false} -> true;
{_, _} -> false
end,
%% If P_current says that we believe that we're currently flapping,
%% and if P_newprop10 says that we're no longer flapping, then we
%% really ought to stop flapping, right.
%% %%
%% QQQ 2015-04-13: New thinking %% Not quite so simple....
%%
%% -1. Hey, I'm wondering, duh, isn't #4 the right thing????
%%
%% 0. I think I'm leaning toward trying to use option #3 below first.
%% If that doesn't work, then consider #1 or #2?
%%
%% 1. There are a couple of approaches: a CRDT-style thing, info
%% is always growing and always mergeable (and probably having a
%% pruning problem?). So add a piece of data to the projection
%% that is always merged from all parties that says that a FLU F
%% now believes that the flapping episode that started at epoch E1
%% has now ended as of epoch E2. (It would probably be prunable
%% that for every FLU we maintain only the last two or one
%% flapping episode finished events?)
%%
%% 2. We could do something like query the public (and private?)
%% stores of all participants when we've flapping, to find
%% transient information that's written in some epoch E-d prior to
%% what we've witnessed in our latest-public-projection-read that
%% gave us news of the outside world via P_latest. ??
%%
%% 3. If we see P_latest come in from some other author (not us),
%% and it no longer has an flapping started epoch counter that
%% matches what recall from previous flaps, then we should reset
%% our flap count to zero and propose the last inner projection?
%% That makes a safe (?) transition from flapping to not-flapping,
%% yeah?
%%
%% 4. What a sec. We *KNOW* from our code below ...
%% case {inner_projection_exists(P_current),
%% inner_projection_exists(P_newprop10)} of
%% {true, false} ->
%% ... that *P_newprop10* says that we're no longer flapping. Yay.
%% So we should just use the last inner proj, P_current's inner proj.
%% Hrrrmmmm, except that's what we're just trying to do brute-force here?
%% So, what's wrong with what we're doing here, again???
%% %%
%% AAAAH, right. The case I'm dealing with right now is an asymmetric %% AAAAH, right. The case I'm dealing with right now is an asymmetric
%% partition in a 4 member chain that affects all_hosed=[a,b,c] but %% partition in a 4 member chain that affects all_hosed=[a,b,c] but
@ -727,17 +700,15 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
%% Yes, that hypothesis is confirmed by time-honored io:format() tracing. %% Yes, that hypothesis is confirmed by time-honored io:format() tracing.
%% %%
%% So, we need something to kick a silly member like 'd' out of its %% So, we need something to kick a silly member like 'd' out of its
%% am-still-flapping rut. So, let's try this: %% rut of am-still-flapping. So, let's try this:
%% If we see a P_latest from author != MyName, and if it has a %% If we see a P_latest from author != MyName, and if P_latest's
%% P_latest's author's flap count is 0, but that same member's %% author's flap count is now 0 (latest!), but that same member's
%% flap count in P_current is non-zero, then we assume that author %% flap count in P_current is non-zero, then we assume that author
%% has moved out of flapping state and that we ought to do the same. %% has moved out of flapping state and that therefore we ought to do
%% %% the same.
%% Hrm, well, the 'rank_boost!' thing isn't doing what I thought it
%% would. So, to resume in the morning ... see the LEFT OFF HERE below.
%% Remember! P_current is this manager's private in-use projection. %% Remember! P_current is this manager's private in-use projection.
%% It is always older or equal to P_latest's epoch! %% It is always less than or equal to P_latest's epoch!
Current_flap_counts = get_all_flap_counts(P_current), Current_flap_counts = get_all_flap_counts(P_current),
Latest_authors_flap_count_current = proplists:get_value( Latest_authors_flap_count_current = proplists:get_value(
Author_latest, Current_flap_counts), Author_latest, Current_flap_counts),
@ -752,17 +723,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
false false
end, end,
%% Here's a more common reason for moving from inner projection to if MoveFromInnerToNorm_p orelse Kicker_p ->
%% a normal projection: the old prob has an inner but the newprop
%% does not.
MoveFromInnerToNorm_p =
case {inner_projection_exists(P_current),
inner_projection_exists(P_newprop10)} of
{true, false} -> true;
{_, _} -> false
end,
if Kicker_p orelse MoveFromInnerToNorm_p ->
ClauseInfo = [{inner_kicker, Kicker_p}, ClauseInfo = [{inner_kicker, Kicker_p},
{move_from_inner, MoveFromInnerToNorm_p}], {move_from_inner, MoveFromInnerToNorm_p}],
?REACT({a30, ?LINE, ClauseInfo}), ?REACT({a30, ?LINE, ClauseInfo}),
@ -792,7 +753,16 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
%% Yeah, it appears to work, also, nice! This can help save some %% Yeah, it appears to work, also, nice! This can help save some
%% repair operations (compared to the other safe thing to do %% repair operations (compared to the other safe thing to do
%% here, which uses make_none_projection() to build & repair the %% here, which uses make_none_projection() to build & repair the
%% entire chain from scratch). %% entire chain from scratch). Note that this isn't a guarantee
%% that repair steps will be minimized: for a 4-member cluster
%% that has an asymmetric partition which organizes 3 clusters of
%% inner-upi=[a], inner-upi=[b], and inner-upi[c,d], there is no
%% guarantee (yet?) that the [c,d] chain will be the UPI basis
%% for repairs when the partition is healed: the quickest author
%% after the healing will make that choice for everyone.
%% TODO: Perhaps that quickest author should consult all of the
%% other private stores, check their inner, and if there is a
%% higher rank there, then goto C200 for a wait-and-see cycle?
P_inner2A = inner_projection_or_self(P_current), P_inner2A = inner_projection_or_self(P_current),
P_inner2B = P_inner2B =
@ -864,7 +834,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
%% A40a (see flowchart) %% A40a (see flowchart)
Rank_newprop > Rank_latest -> Rank_newprop > Rank_latest ->
?REACT({b10, ?LINE, ?REACT({a40, ?LINE,
[{rank_latest, Rank_latest}, [{rank_latest, Rank_latest},
{rank_newprop, Rank_newprop}, {rank_newprop, Rank_newprop},
{latest_author, P_latest#projection_v1.author_server}]}), {latest_author, P_latest#projection_v1.author_server}]}),
@ -928,11 +898,6 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
react_to_env_A50(P_latest, FinalProps, S) -> react_to_env_A50(P_latest, FinalProps, S) ->
?REACT(a50), ?REACT(a50),
_HH = get(react),
%% io:format(user, "HEE50s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- _HH, is_atom(X)])]),
%% io:format(user, "HEE50 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(_HH)]),
?REACT({a50, ?LINE, [{latest_epoch, P_latest#projection_v1.epoch_number}, ?REACT({a50, ?LINE, [{latest_epoch, P_latest#projection_v1.epoch_number},
{final_props, FinalProps}]}), {final_props, FinalProps}]}),
{{no_change, FinalProps, P_latest#projection_v1.epoch_number}, S}. {{no_change, FinalProps, P_latest#projection_v1.epoch_number}, S}.
@ -943,15 +908,11 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
?REACT(b10), ?REACT(b10),
{_P_newprop_flap_time, P_newprop_flap_count} = get_flap_count(P_newprop), {_P_newprop_flap_time, P_newprop_flap_count} = get_flap_count(P_newprop),
_LatestAllFlapCounts = get_all_flap_counts_counts(P_latest),
%% Transitive flap counts are buggy: the current method to observe
%% them is **buggy**.
%% P_latest_trans_flap_count = my_find_minmost(LatestAllFlapCounts),
if if
LatestUnanimousP -> LatestUnanimousP ->
%% ?REACT({b10, ?LINE, [{latest_unanimous_p, LatestUnanimousP}]}), ?REACT({b10, ?LINE,
?REACT({b10, ?LINE, [{latest_unanimous_p, LatestUnanimousP}, [{latest_unanimous_p, LatestUnanimousP},
{latest_epoch,P_latest#projection_v1.epoch_number}, {latest_epoch,P_latest#projection_v1.epoch_number},
{latest_author,P_latest#projection_v1.author_server}, {latest_author,P_latest#projection_v1.author_server},
{newprop_epoch,P_newprop#projection_v1.epoch_number}, {newprop_epoch,P_newprop#projection_v1.epoch_number},
@ -965,25 +926,11 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
%% I am flapping ... what else do I do? %% I am flapping ... what else do I do?
?REACT({b10, ?LINE, [i_am_flapping, ?REACT({b10, ?LINE, [i_am_flapping,
{newprop_flap_count, P_newprop_flap_count}, {newprop_flap_count, P_newprop_flap_count},
%% {latest_trans_flap_count, P_latest_trans_flap_count},
{flap_limit, FlapLimit}]}), {flap_limit, FlapLimit}]}),
_B10Hack = get(b10_hack), _B10Hack = get(b10_hack),
%% if _B10Hack == false andalso P_newprop_flap_count - FlapLimit - 3 =< 0 -> io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]), put(b10_hack, true); true -> ok end,
io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]), io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]),
%io:format(user, "FLAP: ~w flapz ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- get(react), is_atom(X)])]),
if if
%% So, if we noticed a flap count by some FLU X with a
%% count below FlapLimit, then X crashes so that X's
%% flap count remains below FlapLimit, then we could get
%% stuck forever? Hrm, except that 'crashes' ought to be
%% detected by our own failure detector and get us out of
%% this current flapping situation, right? TODO
%%
%% 2015-04-10: TODO Flap count detection, as it has
%% been attempted before now, is buggy.
%%
%% MEANWHILE, we have learned some things about this %% MEANWHILE, we have learned some things about this
%% algorithm in the past few months. With the introduction %% algorithm in the past few months. With the introduction
%% of the "inner projection" concept, we know that the inner %% of the "inner projection" concept, we know that the inner
@ -999,7 +946,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
%% doesn't give an accurate sense of global flapping state. %% doesn't give an accurate sense of global flapping state.
%% FlapLimit is enough to be able to tell us to slow down. %% FlapLimit is enough to be able to tell us to slow down.
true -> %% P_latest_trans_flap_count >= FlapLimit -> true ->
%% We already know that I'm flapping. We need to %% We already know that I'm flapping. We need to
%% signal to the rest of the world that I'm writing %% signal to the rest of the world that I'm writing
%% and flapping and churning, so we cannot always %% and flapping and churning, so we cannot always
@ -1017,7 +964,6 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
end, end,
FinalProps = [{my_flap_limit, FlapLimit}, FinalProps = [{my_flap_limit, FlapLimit},
{throttle_seconds, ThrottleTime}], {throttle_seconds, ThrottleTime}],
io:format(user, "<--x=~w-.--~w-~w-~w->", [X, MyName, P_newprop_flap_count,FlapLimit]),
react_to_env_A50(P_latest, FinalProps, S2); react_to_env_A50(P_latest, FinalProps, S2);
true -> true ->
%% It is our moral imperative to write so that %% It is our moral imperative to write so that
@ -1025,7 +971,6 @@ io:format(user, "<--x=~w-.--~w-~w-~w->", [X, MyName, P_newprop_flap_count,FlapLi
%% everyone notices then eventually falls into %% everyone notices then eventually falls into
%% consensus. %% consensus.
?REACT({b10, ?LINE, [flap_continue]}), ?REACT({b10, ?LINE, [flap_continue]}),
io:format(user, "<--x=~w-oooo-~w-~w-~w->", [X, MyName, P_newprop_flap_count,FlapLimit]),
react_to_env_C300(P_newprop, P_latest, S2) react_to_env_C300(P_newprop, P_latest, S2)
end end
end; end;
@ -1065,21 +1010,6 @@ react_to_env_C100(P_newprop, P_latest,
#ch_mgr{name=MyName, proj=P_current}=S) -> #ch_mgr{name=MyName, proj=P_current}=S) ->
?REACT(c100), ?REACT(c100),
%% TODO 2015-04-10
%% OK, well, we need to be checking sanity on inner projections here,
%% but how to do it is still a bit of a mystery.
%%
%% If the *Y bindings are identical to incoming args, then we aren't
%% checking at all. That's bad, but we don't go into Infinite Loops of
%% ReallyReallyBad.
P_newpropY = P_newprop,
P_latestY = P_latest,
P_currentY = P_current,
%% P_newpropY = inner_projection_or_self(P_newprop),
%% P_latestY = inner_projection_or_self(P_latest),
%% P_currentY = inner_projection_or_self(P_current),
I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi), I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi),
I_am_Repairing_in_latest_p = lists:member(MyName, I_am_Repairing_in_latest_p = lists:member(MyName,
P_latest#projection_v1.repairing), P_latest#projection_v1.repairing),
@ -1092,13 +1022,6 @@ react_to_env_C100(P_newprop, P_latest,
Current_sane_p = projection_transition_is_sane(P_current, P_latest, Current_sane_p = projection_transition_is_sane(P_current, P_latest,
MyName), MyName),
Inner_sane_p =
if P_currentY == P_current, P_latestY == P_latest ->
true;
true ->
projection_transition_is_sane(P_currentY, P_latestY, MyName)
end,
case {ShortCircuit_p, Current_sane_p} of case {ShortCircuit_p, Current_sane_p} of
_ when P_current#projection_v1.epoch_number == 0 -> _ when P_current#projection_v1.epoch_number == 0 ->
%% Epoch == 0 is reserved for first-time, just booting conditions. %% Epoch == 0 is reserved for first-time, just booting conditions.
@ -1110,43 +1033,24 @@ react_to_env_C100(P_newprop, P_latest,
%% am/should be repairing. We ignore our proposal and try %% am/should be repairing. We ignore our proposal and try
%% to go with the latest. %% to go with the latest.
?REACT({c100, ?LINE, [repairing_short_circuit]}), ?REACT({c100, ?LINE, [repairing_short_circuit]}),
if Inner_sane_p == false -> io:format(user, "QQQ line ~p false\n", [?LINE]), timer:sleep(500); true -> ok end,
react_to_env_C110(P_latest, S); react_to_env_C110(P_latest, S);
{_, true} when Inner_sane_p -> {_, true} ->
?REACT({c100, ?LINE, [sane]}), ?REACT({c100, ?LINE, [sane]}),
if Inner_sane_p == false -> io:format(user, "QQQ line ~p false\n", [?LINE]), timer:sleep(500); true -> ok end,
react_to_env_C110(P_latest, S); react_to_env_C110(P_latest, S);
{_, _AnyOtherReturnValue} -> {_, _AnyOtherReturnValue} ->
%% P_latest is not sane or else P_latestY is not sane. %% P_latest is not sane.
%% By process of elimination, P_newprop is best, %% By process of elimination, P_newprop is best,
%% so let's write it. %% so let's write it.
io:format(user, "\nUrp: ~p ~p ~p ~p\n", [MyName, ShortCircuit_p, _AnyOtherReturnValue, Inner_sane_p]),
%% io:format(user, "c100 P_newprop : ~w\n", [machi_projection:make_summary(P_newprop)]),
%% io:format(user, "c100 P_newpropY: ~w\n", [machi_projection:make_summary(P_newpropY)]),
%% io:format(user, "c100 P_latest : ~w\n", [machi_projection:make_summary(P_latest)]),
%% io:format(user, "c100 P_latestY: ~w\n", [machi_projection:make_summary(P_latestY)]),
?REACT({c100, ?LINE, [not_sane]}), ?REACT({c100, ?LINE, [not_sane]}),
react_to_env_C300(P_newprop, P_latest, S) react_to_env_C300(P_newprop, P_latest, S)
end. end.
react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) ->
?REACT(c110), ?REACT(c110),
%% TOOD: Should we carry along any extra info that that would be useful
%% in the dbg2 list?
Extra_todo = [], Extra_todo = [],
RunEnv = S#ch_mgr.runenv, P_latest2 = machi_projection:update_dbg2(P_latest, Extra_todo),
Islands = proplists:get_value(network_islands, RunEnv),
P_latest2 = machi_projection:update_dbg2(
P_latest,
[%% {network_islands, Islands},
%% {hooray, {v2, date(), time()}}
Islands--Islands
|Extra_todo]),
MyNamePid = proxy_pid(MyName, S), MyNamePid = proxy_pid(MyName, S),
%% TODO: We need to fetch the inner projection, if it exists, and
%% write it to the private store. Put the outer projection
%% into dbg2 for forensics and perhaps re-start use?
ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO), ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO),
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
true -> true ->
@ -1185,10 +1089,6 @@ react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H} = S) ->
H2 H2
end, end,
HH = get(react),
%% io:format(user, "HEE120s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- HH, is_atom(X)])]),
%% io:format(user, "HEE120 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(HH)]),
?REACT({c120, [{latest, machi_projection:make_summary(P_latest)}]}), ?REACT({c120, [{latest, machi_projection:make_summary(P_latest)}]}),
{{now_using, FinalProps, P_latest#projection_v1.epoch_number}, {{now_using, FinalProps, P_latest#projection_v1.epoch_number},
S#ch_mgr{proj=P_latest, proj_history=H3}}. S#ch_mgr{proj=P_latest, proj_history=H3}}.
@ -1231,7 +1131,6 @@ react_to_env_C310(P_newprop, S) ->
?REACT(c310), ?REACT(c310),
Epoch = P_newprop#projection_v1.epoch_number, Epoch = P_newprop#projection_v1.epoch_number,
{WriteRes, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S), {WriteRes, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
%% io:format(user, "QQQ ~w public write ~w: ~w\n", [S#ch_mgr.name, machi_projection:make_summary(P_newprop), WriteRes]),
?REACT({c310, ?LINE, ?REACT({c310, ?LINE,
[{newprop, machi_projection:make_summary(P_newprop)}, [{newprop, machi_projection:make_summary(P_newprop)},
{write_result, WriteRes}]}), {write_result, WriteRes}]}),
@ -1337,36 +1236,26 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit,
AllHosed = [] AllHosed = []
end, end,
%% 2015-04-13: TODO: this whole notion of "settled" flap counts
%% has not worked as initially planned. Remove it all.
%% %% If there's at least one count in AllFlapCounts that isn't my
%% %% flap count, and if it's over the flap limit, then consider them
%% %% settled.
%% AllFlapCountsSettled = lists:keydelete(MyName, 1, AllFlapCounts) /= []
%% andalso
%% my_find_minmost(AllFlapCounts) >= FlapLimit,
FlappingI = {flapping_i, [{flap_count, {NewFlapStart, NewFlaps}}, FlappingI = {flapping_i, [{flap_count, {NewFlapStart, NewFlaps}},
{all_hosed, AllHosed}, {all_hosed, AllHosed},
{all_flap_counts, lists:sort(AllFlapCounts)}, {all_flap_counts, lists:sort(AllFlapCounts)},
%% {all_flap_counts_settled, AllFlapCountsSettled}, {bad,BadFLUs}]},
{bad,BadFLUs},
{da_downu, DownUnion}, % debugging aid
{da_hosedtu, HosedTransUnion}, % debugging aid
{da_downreports, [{P#projection_v1.epoch_number, P#projection_v1.author_server, P#projection_v1.down} || P <- [BestP|NotBestPs]]} % debugging aid
]},
Dbg2 = [FlappingI|P_newprop#projection_v1.dbg], Dbg2 = [FlappingI|P_newprop#projection_v1.dbg],
%% SLF TODO: 2015-03-04: I'm growing increasingly suspicious of %% TODO: 2015-03-04: I'm growing increasingly suspicious of
%% the 'runenv' variable that's threaded through all this code. %% the 'runenv' variable that's threaded through all this code.
%% It isn't doing what I'd originally intended. And I think that %% It isn't doing what I'd originally intended. Fix it.
%% the flapping information that we've just constructed here is
%% going to get lost, and that's a shame. Fix it.
RunEnv2 = replace(RunEnv1, [FlappingI]), RunEnv2 = replace(RunEnv1, [FlappingI]),
%% NOTE: If we'd increment of flaps here, that doesn't mean that %% NOTE: Just because we increment flaps here, there's no correlation
%% someone's public proj store has been updated. For example, %% to successful public proj store writes! For example,
%% if we loop through states C2xx a few times, we would incr %% if we loop through states C2xx a few times, we would incr
%% flaps each time ... but the C2xx path doesn't write a new %% flaps each time ... but the C2xx path doesn't write a new
%% proposal to everyone's public proj stores, and there's no %% proposal to everyone's public proj stores. Similarly,
%% guarantee that anyone else as written a new public proj either. %% if we go through to C300, we will *try* to write to all public
%% stores, but the C3xx path doesn't care if all of those write
%% attempts *fail*. Our flap count is a rough heuristic only, and
%% a large local flaps count gives no concrete guarantee that any
%% communication has been successful with any other part of the
%% cluster.
{machi_projection:update_checksum(P_newprop#projection_v1{dbg=Dbg2}), {machi_projection:update_checksum(P_newprop#projection_v1{dbg=Dbg2}),
S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv2}}. S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv2}}.
@ -1435,7 +1324,7 @@ projection_transition_is_sane(
true = is_binary(CSum1) andalso is_binary(CSum2), true = is_binary(CSum1) andalso is_binary(CSum2),
{_,_,_} = CreationTime1, {_,_,_} = CreationTime1,
{_,_,_} = CreationTime2, {_,_,_} = CreationTime2,
true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo will probably change true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo type may change?
true = is_list(All_list1) andalso is_list(All_list2), true = is_list(All_list1) andalso is_list(All_list2),
true = is_list(Down_list1) andalso is_list(Down_list2), true = is_list(Down_list1) andalso is_list(Down_list2),
true = is_list(UPI_list1) andalso is_list(UPI_list2), true = is_list(UPI_list1) andalso is_list(UPI_list2),
@ -1463,15 +1352,6 @@ projection_transition_is_sane(
true = sets:is_disjoint(DownS2, RepairingS2), true = sets:is_disjoint(DownS2, RepairingS2),
true = sets:is_disjoint(UPIS2, RepairingS2), true = sets:is_disjoint(UPIS2, RepairingS2),
%% TODO relaxing this is ok, perhaps?
%% %% The author must not be down.
%% false = lists:member(AuthorServer1, Down_list1),
%% false = lists:member(AuthorServer2, Down_list2),
%% TODO relaxing this is ok, perhaps, also?
%% The author must be in either the UPI or repairing list.
%% true = lists:member(AuthorServer1, UPI_list1 ++ Repairing_list1),
%% true = lists:member(AuthorServer2, UPI_list2 ++ Repairing_list2),
%% Additions to the UPI chain may only be at the tail %% Additions to the UPI chain may only be at the tail
UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2), UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2),
if UPI_common_prefix == [] -> if UPI_common_prefix == [] ->
@ -1593,10 +1473,7 @@ projection_transition_is_sane(
%% then adopts that projection (and unwedges %% then adopts that projection (and unwedges
%% itself, etc etc). %% itself, etc etc).
%% io:format(user, "QQQ: RetrospectiveP ~p\n", [RetrospectiveP]), exit({todo, revisit, ?MODULE, ?LINE}),
%% io:format(user, "QQQ: UPI_2_suffix ~p\n", [UPI_2_suffix]),
%% io:format(user, "QQQ: UPI_2_suffix_from_UPI1 ~p\n", [UPI_2_suffix_from_UPI1]),
%% io:format(user, "QQQ: UPI_2_suffix_from_Repairing1 ~p\n", [UPI_2_suffix_from_Repairing1]),
io:format(user, "|~p,~p TODO revisit|", io:format(user, "|~p,~p TODO revisit|",
[?MODULE, ?LINE]), [?MODULE, ?LINE]),
ok; ok;
@ -1606,9 +1483,11 @@ projection_transition_is_sane(
%% normal has a UPI that has nothing to do with %% normal has a UPI that has nothing to do with
%% RelativeToServer a.k.a. me. %% RelativeToServer a.k.a. me.
%% from: %% from:
%% {epoch,847},{author,c},{upi,[c]},{repair,[]},{down,[a,b,d]}, %% {epoch,847},{author,c},{upi,[c]},{repair,[]},
%% {down,[a,b,d]}
%% to: %% to:
%% {epoch,848},{author,a},{upi,[a]},{repair,[]},{down,[b,c,d]}, %% {epoch,848},{author,a},{upi,[a]},{repair,[]},
%% {down,[b,c,d]}
if UPI_2_suffix == [AuthorServer2] -> if UPI_2_suffix == [AuthorServer2] ->
true; true;
not RetrospectiveP -> not RetrospectiveP ->
@ -1623,12 +1502,6 @@ projection_transition_is_sane(
S1 = machi_projection:make_summary(P1), S1 = machi_projection:make_summary(P1),
S2 = machi_projection:make_summary(P2), S2 = machi_projection:make_summary(P2),
Trace = erlang:get_stacktrace(), Trace = erlang:get_stacktrace(),
%% %% TODO: this history goop is useful sometimes for debugging but
%% %% not for any "real" use. Get rid of it, for the long term.
%% H = (catch [{FLUName, Type, P#projection_v1.epoch_number, machi_projection:make_summary(P)} ||
%% FLUName <- P1#projection_v1.all_members,
%% Type <- [public,private],
%% P <- ?FLU_PC:proj_get_all(orddict:fetch(FLUName, What?), Type)]),
{err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer, {err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer,
history, (catch lists:sort([no_history])), history, (catch lists:sort([no_history])),
stack, Trace} stack, Trace}

View file

@ -267,7 +267,7 @@ convergence_demo_testfun(NumFLUs) ->
io:format(user, "\nSweet, all_hosed are identical-or-islands-inconclusive.\n", []), io:format(user, "\nSweet, all_hosed are identical-or-islands-inconclusive.\n", []),
timer:sleep(1000), timer:sleep(1000),
ok ok
end || Partition <- AllPartitionCombinations %% end || Partition <- AllPartitionCombinations
%% end || Partition <- [ [{a,b},{b,d},{c,b}], %% end || Partition <- [ [{a,b},{b,d},{c,b}],
%% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], %% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
%% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}], %% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
@ -278,7 +278,7 @@ convergence_demo_testfun(NumFLUs) ->
%% end || Partition <- [ [{a,b}, {b,c}] ] %% hosed-not-equal @ 3 FLUs %% end || Partition <- [ [{a,b}, {b,c}] ] %% hosed-not-equal @ 3 FLUs
%% end || Partition <- [ [{b,d}] ] %% end || Partition <- [ [{b,d}] ]
%% end || Partition <- [ [{a,b}, {b,a}] ] %% end || Partition <- [ [{a,b}, {b,a}] ]
%% end || Partition <- [ [{a,b}, {b,a}, {a,c},{c,a}] ] end || Partition <- [ [{a,b}, {b,a}, {a,c},{c,a}] ]
%% end || Partition <- [ [{a,b}], %% end || Partition <- [ [{a,b}],
%% [{b,a}] ] %% [{b,a}] ]
%% end || Partition <- [ [{a,b}, {c,b}], %% end || Partition <- [ [{a,b}, {c,b}],