From 87867f8f2e31ed30c2744f53123d2d3261955990 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sat, 18 Jul 2015 17:53:17 +0900 Subject: [PATCH 1/9] WIP: bugfix for rare flapping infinite loop (done fix I hope) {sigh} This is a correction to a think-o error in the "WIP: bugfix for rare flapping infinite loop (better fix I hope)" bugfix that I thought I had finished in the slf/chain-manager/cp-mode branch. Silly me, the test for myself as the author of the not_sane transition was wrong: we don't do that kind of insanity, other nodes might, though. ^_^ --- src/machi_chain_manager1.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 39aac50..0424511 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -1390,8 +1390,7 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, react_to_env_C110(P_latest, S); %% 20150715: I've seen this loop happen with {expected_author2,X} %% where nobody agrees, weird. - false when Author_latest == MyName andalso - is_record(Flap_latest, flap_i) andalso + false when is_record(Flap_latest, flap_i) andalso Flap_latest#flap_i.flapping_me == true -> ?REACT({c100, ?LINE}), ?V("\n\n1YOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), @@ -1439,7 +1438,7 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, P_newprop, P_latest, S); - {expected_author2,_}=_ExpectedErr when Author_latest == MyName andalso + {expected_author2,_}=_ExpectedErr when is_record(Flap_latest, flap_i) andalso Flap_latest#flap_i.flapping_me == true -> ?REACT({c100, ?LINE}), From 57b712203570fd16b50c1689456c15df6ffa791d Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sat, 18 Jul 2015 23:22:14 +0900 Subject: [PATCH 2/9] Fix bug found by PULSE that's not directly chain manager-related (more) PULSE managed to create a situation where machi_proxy_flu_client1 would appear to fail a remote attempt to write_projection. The client would retry, but the 1st attempt really did get through to the server. So, if we hit this case, we try to read the projection, and if it's exactly equal to what we tried to write, we consider the op a success. Ditto for write_chunk. Fix up eunit test to accomodate the change of semantics. --- src/machi_proxy_flu1_client.erl | 37 +++++++++++++++++++++++--- test/machi_proxy_flu1_client_test.erl | 38 ++++++++++++++++++++++----- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl index eb3adf8..bef5ad6 100644 --- a/src/machi_proxy_flu1_client.erl +++ b/src/machi_proxy_flu1_client.erl @@ -223,8 +223,26 @@ write_projection(PidSpec, ProjType, Proj) -> %% @doc Write a projection `Proj' of type `ProjType'. write_projection(PidSpec, ProjType, Proj, Timeout) -> - gen_server:call(PidSpec, {req, {write_projection, ProjType, Proj}}, - Timeout). + case gen_server:call(PidSpec, {req, {write_projection, ProjType, Proj}}, + Timeout) of + {error, written}=Err -> + Epoch = Proj#projection_v1.epoch_number, + case read_projection(PidSpec, ProjType, Epoch, Timeout) of + {ok, Proj2} when Proj2 == Proj -> + %% The proxy made (at least) two attempts to write + %% this projection. An earlier one appeared to + %% have failed, so the proxy retried. The later + %% attempt returned to us {error,written} because + %% the earlier attempt was actually received & + %% processed by the server. So, we consider this + %% a successful write. + ok; + _ -> + Err + end; + Else -> + Else + end. %% @doc Get all projections from the FLU's projection store. @@ -277,8 +295,19 @@ write_chunk(PidSpec, EpochID, File, Offset, Chunk) -> %% with `Prefix' at `Offset'. write_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) -> - gen_server:call(PidSpec, {req, {write_chunk, EpochID, File, Offset, Chunk}}, - Timeout). + case gen_server:call(PidSpec, {req, {write_chunk, EpochID, File, Offset, Chunk}}, + Timeout) of + {error, written}=Err -> + case read_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) of + {ok, Chunk2} when Chunk2 == Chunk -> + %% See equivalent comment inside write_projection(). + ok; + _ -> + Err + end; + Else -> + Else + end. %%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/test/machi_proxy_flu1_client_test.erl b/test/machi_proxy_flu1_client_test.erl index 8550384..aba5612 100644 --- a/test/machi_proxy_flu1_client_test.erl +++ b/test/machi_proxy_flu1_client_test.erl @@ -121,12 +121,14 @@ flu_restart_test() -> {ok, Prox1} = ?MUT:start_link(I), try FakeEpoch = ?DUMMY_PV1_EPOCH, - Data = <<"data!">>, + Data = <<"data!">>, + Dataxx = <<"Fake!">>, {ok, {Off1,Size1,File1}} = ?MUT:append_chunk(Prox1, FakeEpoch, <<"prefix">>, Data, infinity), P_a = #p_srvr{name=a, address="localhost", port=6622}, - P1 = machi_projection:new(1, a, [P_a], [], [a], [], []), + P1 = machi_projection:new(1, a, [P_a], [], [a], [], []), + P1xx = P1#projection_v1{dbg2=["not exactly the same as P1!!!"]}, EpochID = {P1#projection_v1.epoch_number, P1#projection_v1.epoch_csum}, ok = ?MUT:write_projection(Prox1, public, P1), @@ -182,18 +184,31 @@ flu_restart_test() -> (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:read_projection(Prox1, private, 7) end, - fun(run) -> {error, written} = + fun(run) -> ok = ?MUT:write_projection(Prox1, public, P1), ok; (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:write_projection(Prox1, public, P1) end, - fun(run) -> {error, written} = + fun(run) -> ok = ?MUT:write_projection(Prox1, private, P1), ok; (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:write_projection(Prox1, private, P1) end, + fun(run) -> {error, written} = + ?MUT:write_projection(Prox1, public, P1xx), + ok; + (line) -> io:format("line ~p, ", [?LINE]); + (stop) -> ?MUT:write_projection(Prox1, public, P1xx) + end, + fun(run) -> {error, written} = + ?MUT:write_projection(Prox1, private, P1xx), + ok; + (line) -> io:format("line ~p, ", [?LINE]); + (stop) -> ?MUT:write_projection(Prox1, private, P1xx) + end, + fun(run) -> {ok, [_]} = ?MUT:get_all_projections(Prox1, public), ok; @@ -249,9 +264,7 @@ flu_restart_test() -> (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:wedge_status(Prox1) end, - %% NOTE: When write-once enforcement is enabled, this test - %% will fail: change ok -> {error, written} - fun(run) -> %% {error, written} = + fun(run) -> ok = ?MUT:write_chunk(Prox1, FakeEpoch, File1, Off1, Data, infinity), @@ -259,6 +272,17 @@ flu_restart_test() -> (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:write_chunk(Prox1, FakeEpoch, File1, Off1, Data, infinity) + end, + %% NOTE: When write-once enforcement is enabled, this test + %% will fail: change ok -> {error, written} + fun(run) -> %% {error, written} = + ok = + ?MUT:write_chunk(Prox1, FakeEpoch, File1, Off1, + Dataxx, infinity), + ok; + (line) -> io:format("line ~p, ", [?LINE]); + (stop) -> ?MUT:write_chunk(Prox1, FakeEpoch, File1, Off1, + Dataxx, infinity) end ], From b8c642aaa7eeb7d849e6b88b8e17dafada726ff6 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 19 Jul 2015 00:43:10 +0900 Subject: [PATCH 3/9] WIP: bugfix for rare flapping infinite loop (done^2 fix I hope) How can even computer? So, there's a flavor of the flapping infinite loop problem that can happen without flapping being detected (by the existing flapping detector, that is). That detector relies on a series of accepted projections to converge to a single projection repeated X times. However, it's possible to have a race with a simulated repair "finishing" that causes a problem so that no more projections are ever accepted. Oops. See also: new comments in do_react_to_env(). --- src/machi_chain_manager1.erl | 121 +++++++++++++++++------------------ 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 0424511..a735da5 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -68,7 +68,7 @@ flaps=0 :: integer(), flap_start = ?NOT_FLAPPING_START :: {{'epk', integer()}, erlang:timestamp()}, - flap_not_sanes :: orddict:orddict(), + not_sanes :: orddict:orddict(), repair_worker :: 'undefined' | pid(), repair_start :: 'undefined' | erlang:timestamp(), repair_final_status :: 'undefined' | term(), @@ -226,7 +226,7 @@ init({MyName, InitMembersDict, MgrOpts}) -> flap_limit=length(All_list) + 50, timer='undefined', proj_history=queue:new(), - flap_not_sanes=orddict:new(), + not_sanes=orddict:new(), runenv=RunEnv, opts=MgrOpts}, {_, S2} = do_set_chain_members_dict(MembersDict, S), @@ -805,8 +805,46 @@ do_react_to_env(#ch_mgr{name=MyName, S2#ch_mgr{proj=NewProj, members_dict=NewMembersDict}} end; do_react_to_env(S) -> + %% The not_sanes manager counting dictionary is not strictly + %% limited to flapping scenarios. (Though the mechanism first + %% started as a way to deal with rare flapping scenarios.) + %% + %% I believe that the problem cannot happen in real life, but it can + %% happen in simulated environments, especially if the simulation for + %% repair can be approximately infinitely fast. + %% + %% For example: + %% P_current: epoch=1135, UPI=[b,e,a], Repairing=[c,d], author=e + %% + %% Now a partition happens, a & b are on an island, c & d & e on + %% the other island. + %% + %% P_newprop: epoch=1136, UPI=[e,c], Repairing=[d], author=e + %% + %% Why does e think that this is feasible? Well, the old UPI was + %% [b,e,a], and we know that a & b are partitioned away from e. + %% Therefore e chooses the best UPI, [e]. However, the simulator + %% now also says, hey, there are nodes in the repairing list, so + %% let's simulate a repair ... and the repair goes infinitely + %% quickly ...and the epoch is stable during the repair period + %% (i.e., both e/repairer and c/repairee remained in the same + %% epoch 1135) ... so e decides that the simulated repair is + %% "finished" and it's time to add the repairee to the tail of the + %% UPI ... so that's why 1136's UPI=[e,c]. + %% + %% I'll try to add a condition to the simulated repair to try to + %% make slightly fewer assumptions in a row. However, I believe + %% it's a good idea to keep this too-many-not_sane-transition- + %% attempts counter very generic (i.e., not specific for flapping + %% as it once was). + %% + %% The not_sanes counter dict should be reset each time we start + %% an iteration. One could argue that state only for a single + %% iteration shouldn't go in #ch_mgr but should be a separate arg + %% threaded through each of the FSM funcs. + %% TODO possible refactoring task? put(react, []), - react_to_env_A10(S). + react_to_env_A10(S#ch_mgr{not_sanes=orddict:new()}). react_to_env_A10(S) -> ?REACT(a10), @@ -986,7 +1024,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, ?REACT({a30, ?LINE, [{inner_summary, machi_projection:make_summary(P_inner2)}]}), %% Adjust the outer projection's #flap_i info. - ?V("~w,", [{'YOYO',MyName,NewEpoch}]), + ?V("~w,", [{'FLAP',MyName,NewEpoch}]), #projection_v1{flap=OldFlap} = P_newprop3, NewFlap = OldFlap#flap_i{flapping_me=true}, ?REACT({a30, ?LINE, [flap_continue, @@ -1360,7 +1398,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, flap=Flap_latest0}=P_latest, #ch_mgr{name=MyName, proj=P_current, - flap_not_sanes=NotSanesDict0}=S) -> + not_sanes=NotSanesDict0}=S) -> ?REACT(c100), Sane = projection_transition_is_sane(P_current, P_latest, MyName), @@ -1382,18 +1420,16 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, _ when P_current#projection_v1.epoch_number == 0 -> %% Epoch == 0 is reserved for first-time, just booting conditions. ?REACT({c100, ?LINE, [first_write]}), - erase(perhaps_reset_loop), + if Sane == true -> ok; true -> ?V("insane-~w-~w@~w,", [MyName, P_newprop#projection_v1.epoch_number, ?LINE]) end, %%% DELME!!! react_to_env_C110(P_latest, S); true -> ?REACT({c100, ?LINE, [sane]}), - erase(perhaps_reset_loop), + if Sane == true -> ok; true -> ?V("insane-~w-~w@~w,", [MyName, P_newprop#projection_v1.epoch_number, ?LINE]) end, %%% DELME!!! react_to_env_C110(P_latest, S); %% 20150715: I've seen this loop happen with {expected_author2,X} %% where nobody agrees, weird. - false when is_record(Flap_latest, flap_i) andalso - Flap_latest#flap_i.flapping_me == true -> + _ -> ?REACT({c100, ?LINE}), - ?V("\n\n1YOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), %% This is a fun case. We had just enough asymmetric partition %% to cause the chain to fragment into two *incompatible* and %% *overlapping membership* chains, but the chain fragmentation @@ -1435,62 +1471,30 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, %% %% So, we're going to keep track in #ch_mgr state for the number %% of times that this insane judgement has happened. - + %% + %% See also: comment in do_react_to_env() about + %% non-flapping-scenario that can also cause us to want to + %% collapse to the none_projection to break a + %% livelock/infinite loop. react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, - P_newprop, P_latest, S); - {expected_author2,_}=_ExpectedErr when - is_record(Flap_latest, flap_i) andalso - Flap_latest#flap_i.flapping_me == true -> - ?REACT({c100, ?LINE}), - react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, - P_newprop, P_latest, S); - {expected_author2,_ExpectedAuthor2}=_ExpectedErr -> - case get(perhaps_reset_loop) of - undefined -> - put(perhaps_reset_loop, 1), - ?REACT({c100, ?LINE, [not_sane, get(why2), _ExpectedErr]}), - react_to_env_C300(P_newprop, P_latest, S); - X when X > ?TOO_FREQUENT_BREAKER -> - %% Ha, yes, this is possible. For example: - %% outer: author=e,upi=[b,a,d],repair=[c,e] - %% inner: author=e,upi=[b,e], repair=[] - %% In this case, the transition from inner to outer by A30 - %% has chosen the wrong author. We have two choices. - %% 1. Accept this transition, because it really was the - %% safe & transition-approved UPI+repeairing that we - %% were using while we were flapping. I'm 99% certain - %% that this is safe. TODO: Verify - %% 2. I'm not yet 100% certain that #1 is safe, so instead - %% we fall back to the one thing that we know is safe: - %% the 'none' projection, which lets the chain rebuild - %% itself normally during future iterations. - ?REACT({c100, ?LINE}), - react_to_env_C103(P_latest, S); - X -> - put(perhaps_reset_loop, X+1), - ?REACT({c100, ?LINE, [not_sane, get(why2), _ExpectedErr]}), - react_to_env_C300(P_newprop, P_latest, S) - end; - _AnyOtherReturnValue -> - %% P_latest is not sane. - %% By process of elimination, P_newprop is best, - %% so let's write it. - ?REACT({c100, ?LINE, [not_sane, get(why2), _AnyOtherReturnValue]}), - erase(perhaps_reset_loop), - react_to_env_C300(P_newprop, P_latest, S) + P_newprop, P_latest, S) end. react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, P_newprop, P_latest, S) -> NotSanesDict = orddict:update_counter(Author_latest, 1, NotSanesDict0), - S2 = S#ch_mgr{flap_not_sanes=NotSanesDict}, + S2 = S#ch_mgr{not_sanes=NotSanesDict}, case orddict:fetch(Author_latest, NotSanesDict) of N when N > ?TOO_FREQUENT_BREAKER -> ?V("\n\nYOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), ?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}), react_to_env_C103(P_latest, S2); N -> + ?V("YOYO,~w,~w,~w,",[MyName, P_latest#projection_v1.epoch_number,N]), ?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}), + %% P_latest is not sane. + %% By process of elimination, P_newprop is best, + %% so let's write it. react_to_env_C300(P_newprop, P_latest, S2) end. @@ -1507,8 +1511,7 @@ react_to_env_C103(#projection_v1{epoch_number=Epoch_latest, P_none = machi_projection:update_checksum(P_none1), %% Use it, darn it, because it's 100% safe. And exit flapping state. react_to_env_C100(P_none, P_none, S#ch_mgr{flaps=0, - flap_start=?NOT_FLAPPING_START, - flap_not_sanes=orddict:new()}). + flap_start=?NOT_FLAPPING_START}). react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> ?REACT(c110), @@ -1632,8 +1635,7 @@ react_to_env_C310(P_newprop, S) -> calculate_flaps(P_newprop, _P_current, _FlapLimit, #ch_mgr{name=MyName, proj_history=H, flap_start=FlapStart, - flaps=Flaps, flap_not_sanes=NotSanesDict0, - runenv=RunEnv1}=S) -> + flaps=Flaps, runenv=RunEnv1}=S) -> HistoryPs = queue:to_list(H), Ps = HistoryPs ++ [P_newprop], UniqueProposalSummaries = lists:usort([{P#projection_v1.upi, @@ -1695,7 +1697,6 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, true -> NewFlapStart = FlapStart end, - NotSanesDict = NotSanesDict0, %% Wow, this behavior is almost spooky. %% @@ -1725,7 +1726,6 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, {_N, _} -> NewFlaps = 0, NewFlapStart = ?NOT_FLAPPING_START, - NotSanesDict = orddict:new(), AllFlapCounts = [], AllHosed = [] end, @@ -1748,8 +1748,7 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, %% It isn't doing what I'd originally intended. Fix it. {machi_projection:update_checksum(P_newprop#projection_v1{ flap=FlappingI}), - S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, - flap_not_sanes=NotSanesDict, runenv=RunEnv1}}. + S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv1}}. make_flapping_i() -> make_flapping_i({{epk,-1},?NOT_FLAPPING}, 0, [], [], []). From f7ef8c54f59794f94b8e5e179454288d465c4a0f Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 19 Jul 2015 13:32:55 +0900 Subject: [PATCH 4/9] Reduce # of assumptions made by ch_mgr + simulator for 'repair_airquote_done' --- src/machi_chain_manager1.erl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index a735da5..2832f22 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -567,6 +567,7 @@ calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj, RelativeToServer, AllHosed, Dbg, #ch_mgr{name=MyName, + proj=CurrentProj, runenv=RunEnv1, repair_final_status=RepairFS}=S) -> #projection_v1{epoch_number=OldEpochNum, @@ -584,6 +585,11 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj, Down = AllMembers -- Up, NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)], + #projection_v1{upi=CurrentUPI_list} = CurrentProj, + LastInCurrentUPI = case CurrentUPI_list of + [] -> does_not_exist_because_upi_is_empty; + [_|_] -> lists:last(CurrentUPI_list) + end, LastInNewUPI = case NewUPI_list of [] -> does_not_exist_because_upi_is_empty; [_|_] -> lists:last(NewUPI_list) @@ -606,7 +612,8 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj, SameEpoch_p = check_latest_private_projections_same_epoch( NewUPI_list ++ Repairing_list2, S#ch_mgr.proj, Partitions, S), - if Simulator_p andalso SameEpoch_p -> + if Simulator_p andalso SameEpoch_p + andalso RelativeToServer == LastInCurrentUPI -> D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}], {NewUPI_list ++ [H], T, RunEnv2}; not Simulator_p From e14493373bc068b9b7ac72b7454eb085568d4433 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 20 Jul 2015 14:04:25 +0900 Subject: [PATCH 5/9] Bugfix: add missing reset of not_sanes dictionary, fix comments --- src/machi_chain_manager1.erl | 50 +++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 2832f22..0d78825 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -69,6 +69,7 @@ flap_start = ?NOT_FLAPPING_START :: {{'epk', integer()}, erlang:timestamp()}, not_sanes :: orddict:orddict(), + sane_transitions = 0 :: non_neg_integer(), repair_worker :: 'undefined' | pid(), repair_start :: 'undefined' | erlang:timestamp(), repair_final_status :: 'undefined' | term(), @@ -845,13 +846,23 @@ do_react_to_env(S) -> %% attempts counter very generic (i.e., not specific for flapping %% as it once was). %% - %% The not_sanes counter dict should be reset each time we start - %% an iteration. One could argue that state only for a single - %% iteration shouldn't go in #ch_mgr but should be a separate arg - %% threaded through each of the FSM funcs. - %% TODO possible refactoring task? + %% The not_sanes counter dict should be reset when we have had at + %% least 3 state transitions that did not have a not_sane + %% suggested projection transition or whenever we fall back to the + %% none_projection. + %% + %% We'll probably implement a very simple counter that may/will be + %% *inaccurate* by at most one -- so any reset test should ignore + %% counter values of 0 & 1. + %% put(react, []), - react_to_env_A10(S#ch_mgr{not_sanes=orddict:new()}). + if S#ch_mgr.sane_transitions > 3 -> % TODO review this constant + %% ?V("Skr,~w,", [S#ch_mgr.name]), + react_to_env_A10(S#ch_mgr{not_sanes=orddict:new()}); + true -> + %% ?V("Sk,~w,~w,", [S#ch_mgr.name, S#ch_mgr.sane_transitions]), + react_to_env_A10(S) + end. react_to_env_A10(S) -> ?REACT(a10), @@ -1435,8 +1446,8 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, react_to_env_C110(P_latest, S); %% 20150715: I've seen this loop happen with {expected_author2,X} %% where nobody agrees, weird. - _ -> - ?REACT({c100, ?LINE}), + DoctorSays -> + ?REACT({c100, ?LINE, [{not_sane, DoctorSays}]}), %% This is a fun case. We had just enough asymmetric partition %% to cause the chain to fragment into two *incompatible* and %% *overlapping membership* chains, but the chain fragmentation @@ -1490,7 +1501,7 @@ react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, P_newprop, P_latest, S) -> NotSanesDict = orddict:update_counter(Author_latest, 1, NotSanesDict0), - S2 = S#ch_mgr{not_sanes=NotSanesDict}, + S2 = S#ch_mgr{not_sanes=NotSanesDict, sane_transitions=0}, case orddict:fetch(Author_latest, NotSanesDict) of N when N > ?TOO_FREQUENT_BREAKER -> ?V("\n\nYOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), @@ -1506,9 +1517,9 @@ react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, end. react_to_env_C103(#projection_v1{epoch_number=Epoch_latest, - all_members=All_list, - members_dict=MembersDict} = P_latest, - #ch_mgr{name=MyName}=S) -> + all_members=All_list, + members_dict=MembersDict} = P_latest, + #ch_mgr{name=MyName, proj=P_current}=S) -> #projection_v1{epoch_number=Epoch_latest, all_members=All_list, members_dict=MembersDict} = P_latest, @@ -1517,8 +1528,16 @@ react_to_env_C103(#projection_v1{epoch_number=Epoch_latest, dbg=[{none_projection,true}]}, P_none = machi_projection:update_checksum(P_none1), %% Use it, darn it, because it's 100% safe. And exit flapping state. + ?REACT({c103, ?LINE, + [{current_epoch, P_current#projection_v1.epoch_number}, + {none_projection_epoch, Epoch_latest}]}), + %% Reset the not_sanes count dictionary here, or else an already + %% ?TOO_FREQUENT_BREAKER count for an author might prevent a + %% transition from C100_inner()->C300, which can lead to infinite + %% looping C100->C103->C100. react_to_env_C100(P_none, P_none, S#ch_mgr{flaps=0, - flap_start=?NOT_FLAPPING_START}). + flap_start=?NOT_FLAPPING_START, + not_sanes=orddict:new()}). react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> ?REACT(c110), @@ -1578,7 +1597,8 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> end, react_to_env_C120(P_latest, [], S). -react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H}=S) -> +react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H, + sane_transitions=Xtns}=S) -> ?REACT(c120), H2 = queue:in(P_latest, H), H3 = case queue:len(H2) of @@ -1597,7 +1617,7 @@ react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H}=S) -> ?REACT({c120, [{latest, machi_projection:make_summary(P_latest)}]}), {{now_using, FinalProps, P_latest#projection_v1.epoch_number}, - S#ch_mgr{proj=P_latest, proj_history=H3}}. + S#ch_mgr{proj=P_latest, proj_history=H3, sane_transitions=Xtns + 1}}. react_to_env_C200(Retries, P_latest, S) -> ?REACT(c200), From 9ae4afa58e4c45420cd7946e88563da478b8736c Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 20 Jul 2015 14:58:21 +0900 Subject: [PATCH 6/9] Reduce chmgr verbosity a bit --- src/machi_chain_manager1.erl | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 0d78825..054b4cd 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -1042,7 +1042,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, ?REACT({a30, ?LINE, [{inner_summary, machi_projection:make_summary(P_inner2)}]}), %% Adjust the outer projection's #flap_i info. - ?V("~w,", [{'FLAP',MyName,NewEpoch}]), + %% ?V("~w,", [{'FLAP',MyName,NewEpoch}]), #projection_v1{flap=OldFlap} = P_newprop3, NewFlap = OldFlap#flap_i{flapping_me=true}, ?REACT({a30, ?LINE, [flap_continue, @@ -1572,22 +1572,28 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> P_latest2x = P_latest2#projection_v1{dbg2=[]}, % limit verbose len. case inner_projection_exists(P_latest2) of false -> - case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of - true -> + Last2 = get(last_verbose), + Summ2 = machi_projection:make_summary(P_latest2x), + case proplists:get_value(private_write_verbose, + S#ch_mgr.opts) of + true when Summ2 /= Last2 -> + put(last_verbose, Summ2), ?V("\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses plain: ~w\n", - [HH,MM,SS,MSec, S#ch_mgr.name, - machi_projection:make_summary(P_latest2x)]); + [HH,MM,SS,MSec, S#ch_mgr.name, Summ2]); _ -> ok end; true -> - case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of - true -> - P_inner = inner_projection_or_self(P_latest2), - P_innerx = P_inner#projection_v1{dbg2=[]}, % limit verbose len. + Last2 = get(last_verbose), + P_inner = inner_projection_or_self(P_latest2), + P_innerx = P_inner#projection_v1{dbg2=[]}, % limit verbose len. + Summ2 = machi_projection:make_summary(P_innerx), + case proplists:get_value(private_write_verbose, + S#ch_mgr.opts) of + true when Summ2 /= Last2 -> + put(last_verbose, Summ2), ?V("\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses inner: ~w\n", - [HH,MM,SS,MSec, S#ch_mgr.name, - machi_projection:make_summary(P_innerx)]); + [HH,MM,SS,MSec, S#ch_mgr.name, Summ2]); _ -> ok end From 319397ecd26044d02c9b04cb9b512347782ffb10 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 20 Jul 2015 15:08:03 +0900 Subject: [PATCH 7/9] machi_chain_manager1_pulse.erl tweaks --- test/machi_chain_manager1_pulse.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/machi_chain_manager1_pulse.erl b/test/machi_chain_manager1_pulse.erl index 7a2e029..3d4910d 100644 --- a/test/machi_chain_manager1_pulse.erl +++ b/test/machi_chain_manager1_pulse.erl @@ -172,6 +172,7 @@ all_list() -> setup(Num, Seed) -> ?V("\nsetup(~w,~w", [Num, Seed]), + [catch erlang:garbage_collect(P) || P <- processes()], All_list = lists:sublist(all_list(), Num), All_listE = lists:sublist(all_list_extra(), Num), %% shutdown_hard() has taken care of killing all relevant procs. @@ -322,7 +323,7 @@ prop_pulse() -> prop_pulse(Style) when Style == new; Style == regression -> _ = application:start(crypto), ?FORALL({Cmds0, Seed}, {gen_commands(Style), pulse:seed()}, - ?IMPLIES(1 < length(Cmds0) andalso length(Cmds0) < 11, + ?IMPLIES(length(Cmds0) < 11, begin ok = shutdown_hard(), %% PULSE can be really unfair, of course, including having exec_ticks From 88d3228a4c572466b302ad50f78eb8aaa3b3c81e Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 20 Jul 2015 16:25:42 +0900 Subject: [PATCH 8/9] Fix various problems with repair not being aware of inner projections --- src/machi_chain_manager1.erl | 123 ++++++++++++++++++++--------------- src/machi_projection.erl | 6 ++ 2 files changed, 78 insertions(+), 51 deletions(-) diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 054b4cd..213f780 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -214,6 +214,7 @@ init({MyName, InitMembersDict, MgrOpts}) -> Opt = fun(Key, Default) -> proplists:get_value(Key, MgrOpts, Default) end, RunEnv = [{seed, Opt(seed, now())}, {use_partition_simulator, Opt(use_partition_simulator, false)}, + {simulate_repair, Opt(simulate_repair, true)}, {network_partitions, Opt(network_partitions, [])}, {network_islands, Opt(network_islands, [])}, {up_nodes, Opt(up_nodes, not_init_yet)}], @@ -597,6 +598,7 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj, end, Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)], Simulator_p = proplists:get_value(use_partition_simulator, RunEnv2, false), + SimRepair_p = proplists:get_value(simulate_repair, RunEnv2, true), {NewUPI_list3, Repairing_list3, RunEnv3} = case {NewUp, Repairing_list2} of {[], []} -> @@ -613,11 +615,11 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj, SameEpoch_p = check_latest_private_projections_same_epoch( NewUPI_list ++ Repairing_list2, S#ch_mgr.proj, Partitions, S), - if Simulator_p andalso SameEpoch_p - andalso RelativeToServer == LastInCurrentUPI -> + if Simulator_p andalso SimRepair_p andalso + SameEpoch_p andalso RelativeToServer == LastInCurrentUPI -> D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}], {NewUPI_list ++ [H], T, RunEnv2}; - not Simulator_p + not (Simulator_p andalso SimRepair_p) andalso RepairFS == {repair_final_status, ok} -> D_foo=[{repair_done, {repair_final_status, ok, (S#ch_mgr.proj)#projection_v1.epoch_number}}], @@ -1338,7 +1340,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, {flap_limit, FlapLimit}]}), case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of true -> - ?V("{FLAP: ~w flaps ~w}! ", [S#ch_mgr.name, P_newprop_flap_count]); + ok; %% ?V("{FLAP: ~w flaps ~w}! ", [S#ch_mgr.name, P_newprop_flap_count]); _ -> ok end, @@ -2102,66 +2104,85 @@ gobble_calls(StaticCall) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -perhaps_start_repair( - #ch_mgr{name=MyName, - repair_worker=undefined, - proj=#projection_v1{creation_time=Start, - upi=[_|_]=UPI, - repairing=[_|_]}}=S) -> - RepairId = {MyName, os:timestamp()}, - RepairOpts = [{repair_mode, repair}, verbose, {repair_id, RepairId}], - %% RepairOpts = [{repair_mode, check}, verbose], - RepairFun = fun() -> do_repair(S, RepairOpts, ap_mode) end, - LastUPI = lists:last(UPI), - IgnoreStabilityTime_p = proplists:get_value(ignore_stability_time, - S#ch_mgr.opts, false), - case timer:now_diff(os:timestamp(), Start) div 1000000 of - N when MyName == LastUPI andalso - (IgnoreStabilityTime_p orelse - N >= ?REPAIR_START_STABILITY_TIME) -> - {WorkerPid, _Ref} = spawn_monitor(RepairFun), - S#ch_mgr{repair_worker=WorkerPid, - repair_start=os:timestamp(), - repair_final_status=undefined}; +perhaps_start_repair(#ch_mgr{name=MyName, + repair_worker=undefined, + proj=P_current}=S) -> + case inner_projection_or_self(P_current) of + #projection_v1{creation_time=Start, + upi=[_|_]=UPI, + repairing=[_|_]} -> + RepairId = {MyName, os:timestamp()}, + RepairOpts = [{repair_mode,repair}, verbose, {repair_id,RepairId}], + %% RepairOpts = [{repair_mode, check}, verbose], + RepairFun = fun() -> do_repair(S, RepairOpts, ap_mode) end, + LastUPI = lists:last(UPI), + IgnoreStabilityTime_p = proplists:get_value(ignore_stability_time, + S#ch_mgr.opts, false), + case timer:now_diff(os:timestamp(), Start) div 1000000 of + N when MyName == LastUPI andalso + (IgnoreStabilityTime_p orelse + N >= ?REPAIR_START_STABILITY_TIME) -> + {WorkerPid, _Ref} = spawn_monitor(RepairFun), + S#ch_mgr{repair_worker=WorkerPid, + repair_start=os:timestamp(), + repair_final_status=undefined}; + _ -> + S + end; _ -> S end; perhaps_start_repair(S) -> S. -do_repair( - #ch_mgr{name=MyName, - proj=#projection_v1{upi=UPI, - repairing=[_|_]=Repairing, - members_dict=MembersDict}}=_S_copy, - Opts, ap_mode=RepairMode) -> -?V("RePaiR-~w,", [self()]), - T1 = os:timestamp(), - RepairId = proplists:get_value(repair_id, Opts, id1), - error_logger:info_msg("Repair start: tail ~p of ~p -> ~p, ~p ID ~w\n", - [MyName, UPI, Repairing, RepairMode, RepairId]), - +do_repair(#ch_mgr{name=MyName, + proj=#projection_v1{upi=UPI, + repairing=[_|_]=Repairing, + members_dict=MembersDict}}=S, + Opts, ap_mode=RepairMode) -> ETS = ets:new(repair_stats, [private, set]), ETS_T_Keys = [t_in_files, t_in_chunks, t_in_bytes, t_out_files, t_out_chunks, t_out_bytes, t_bad_chunks, t_elapsed_seconds], [ets:insert(ETS, {K, 0}) || K <- ETS_T_Keys], - Res = machi_chain_repair:repair(ap_mode, MyName, Repairing, UPI, - MembersDict, ETS, Opts), - T2 = os:timestamp(), - Elapsed = (timer:now_diff(T2, T1) div 1000) / 1000, - ets:insert(ETS, {t_elapsed_seconds, Elapsed}), - Summary = case Res of ok -> "success"; + {ok, MyProj} = ?FLU_PC:read_latest_projection(proxy_pid(MyName, S), + private), + MyEpochID = machi_projection:get_epoch_id(MyProj), + RepairEpochIDs = [case ?FLU_PC:read_latest_projection(proxy_pid(Rep, S), + private) of + {ok, Proj} -> + machi_projection:get_epoch_id(Proj); + _ -> + unknown + end || Rep <- Repairing], + case lists:usort(RepairEpochIDs) of + [MyEpochID] -> + T1 = os:timestamp(), + RepairId = proplists:get_value(repair_id, Opts, id1), + error_logger:info_msg( + "Repair start: tail ~p of ~p -> ~p, ~p ID ~w\n", + [MyName, UPI, Repairing, RepairMode, RepairId]), + + Res = machi_chain_repair:repair(ap_mode, MyName, Repairing, UPI, + MembersDict, ETS, Opts), + T2 = os:timestamp(), + Elapsed = (timer:now_diff(T2, T1) div 1000) / 1000, + ets:insert(ETS, {t_elapsed_seconds, Elapsed}), + Summary = case Res of ok -> "success"; _ -> "FAILURE" - end, - Stats = [{K, ets:lookup_element(ETS, K, 2)} || K <- ETS_T_Keys], - error_logger:info_msg("Repair ~s: tail ~p of ~p finished ~p repair ID ~w: " - "~p\nStats ~p\n", - [Summary, MyName, UPI, RepairMode, RepairId, - Res, Stats]), - ets:delete(ETS), - exit({repair_final_status, Res}). + end, + Stats = [{K, ets:lookup_element(ETS, K, 2)} || K <- ETS_T_Keys], + error_logger:info_msg( + "Repair ~s: tail ~p of ~p finished ~p repair ID ~w: " + "~p\nStats ~p\n", + [Summary, MyName, UPI, RepairMode, RepairId, + Res, Stats]), + ets:delete(ETS), + exit({repair_final_status, Res}); + _ -> + exit(not_all_in_same_epoch) + end. sanitize_repair_state(#ch_mgr{repair_final_status=Res, proj=#projection_v1{upi=[_|_]}}=S) diff --git a/src/machi_projection.erl b/src/machi_projection.erl index 67dee72..1c1ff91 100644 --- a/src/machi_projection.erl +++ b/src/machi_projection.erl @@ -29,6 +29,7 @@ update_checksum/1, update_dbg2/2, compare/2, + get_epoch_id/1, make_summary/1, make_members_dict/1 ]). @@ -138,6 +139,11 @@ compare(#projection_v1{epoch_number=E1}, E1 > E2 -> 1 end. +%% @doc Return the epoch_id of the projection. + +get_epoch_id(#projection_v1{epoch_number=Epoch, epoch_csum=CSum}) -> + {Epoch, CSum}. + %% @doc Create a proplist-style summary of a projection record. make_summary(#projection_v1{epoch_number=EpochNum, From 52dc40e1fefa95854ee4c3390345132706a9c80f Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 21 Jul 2015 14:19:08 +0900 Subject: [PATCH 9/9] converge demo: converged iff all private projs are stable and all inner/outer --- test/machi_chain_manager1_converge_demo.erl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/machi_chain_manager1_converge_demo.erl b/test/machi_chain_manager1_converge_demo.erl index 520ace1..886af94 100644 --- a/test/machi_chain_manager1_converge_demo.erl +++ b/test/machi_chain_manager1_converge_demo.erl @@ -129,6 +129,8 @@ long_doc() -> n of a naive/1st draft detection algorithm. ". +%% ' silly Emacs syntax highlighting.... + %% convergence_demo_test_() -> %% {timeout, 98*300, fun() -> convergence_demo_testfun() end}. @@ -157,7 +159,9 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> ok = filelib:ensure_dir("/tmp/c/not-used"), FluInfo = [{a,TcpPort+0,"/tmp/c/data.a"}, {b,TcpPort+1,"/tmp/c/data.b"}, {c,TcpPort+2,"/tmp/c/data.c"}, {d,TcpPort+3,"/tmp/c/data.d"}, - {e,TcpPort+4,"/tmp/c/data.e"}, {f,TcpPort+5,"/tmp/c/data.f"}], + {e,TcpPort+4,"/tmp/c/data.e"}, {f,TcpPort+5,"/tmp/c/data.f"}, + {g,TcpPort+6,"/tmp/c/data.g"}, {h,TcpPort+7,"/tmp/c/data.h"}, + {i,TcpPort+8,"/tmp/c/data.i"}, {j,TcpPort+9,"/tmp/c/data.j"}], FLU_biglist = [X || {X,_,_} <- FluInfo], All_list = lists:sublist(FLU_biglist, NumFLUs), io:format(user, "\nSET # of FLUs = ~w members ~w).\n", @@ -444,19 +448,16 @@ private_projections_are_stable(Namez, PollFunc) -> Private1 = [get_latest_inner_proj_summ(FLU) || {_Name, FLU} <- Namez], PollFunc(5, 1, 10), Private2 = [get_latest_inner_proj_summ(FLU) || {_Name, FLU} <- Namez], - if Private1 == Private2 -> - ok; - true -> - io:format(user, "Private1: ~p, ", [Private1]), - io:format(user, "Private2: ~p, ", [Private2]) - end, - Private1 == Private2. + Is = [Inner_p || {_,_,_,_,Inner_p} <- Private1], + %% We want either all true or all false (inner or not). + Private1 == Private2 andalso length(lists:usort(Is)) == 1. get_latest_inner_proj_summ(FLU) -> {ok, Proj} = ?FLU_PC:read_latest_projection(FLU, private), #projection_v1{epoch_number=E, upi=UPI, repairing=Repairing, down=Down} = machi_chain_manager1:inner_projection_or_self(Proj), - {E, UPI, Repairing, Down}. + Inner_p = machi_chain_manager1:inner_projection_exists(Proj), + {E, UPI, Repairing, Down, Inner_p}. random_sort(L) -> random:seed(now()),