diff --git a/include/machi_projection.hrl b/include/machi_projection.hrl index a94210f..eb430d8 100644 --- a/include/machi_projection.hrl +++ b/include/machi_projection.hrl @@ -38,6 +38,7 @@ -record(flap_i, { flap_count :: {term(), term()}, + flapping_me = false :: boolean(), all_hosed :: list(), all_flap_counts :: list(), bad :: list() diff --git a/include/machi_verbose.hrl b/include/machi_verbose.hrl new file mode 100644 index 0000000..5d7aea7 --- /dev/null +++ b/include/machi_verbose.hrl @@ -0,0 +1,31 @@ +%% ------------------------------------------------------------------- +%% +%% Machi: a small village of replicated files +%% +%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-ifdef(PULSE). +-define(V(Fmt, Args), pulse:format(Fmt, Args)). +-else. % PULSE +-define(V(Fmt, Args), io:format(user, Fmt, Args)). +-endif. % PULSE + +-define(D(X), ?V("~s ~p\n", [??X, X])). +-define(Dw(X), ?V("~s ~w\n", [??X, X])). + diff --git a/rebar.config.script b/rebar.config.script index 364ce39..fbd0e8b 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -8,15 +8,24 @@ case PulseBuild of true -> PulseOpts = [{pulse_no_side_effect, - [{erlang,display,1} - ]}, + [{erlang,display,1}, + {os,getenv,1}, + {io,format,2}, + {io,format,3} + ]}, {pulse_side_effect, [ {does_not_exist_yet, some_func, '_'} + , {machi_flu1_client, '_', '_'} + , {machi_projection_store, '_', '_'} + , {machi_proxy_flu1_client, '_', '_'} + , {machi_pb_translate, '_', '_'} + , {prim_file, '_', '_'} , {file, '_', '_'} , {filelib, '_', '_'} - , {os, '_', '_'} ]}, + %% , {os, '_', '_'} + ]}, {pulse_replace_module, [ {gen_server, pulse_gen_server} diff --git a/src/machi.app.src b/src/machi.app.src index 64d9a3a..e12605a 100644 --- a/src/machi.app.src +++ b/src/machi.app.src @@ -1,7 +1,7 @@ {application, machi, [ {description, "A village of write-once files."}, {vsn, "0.0.0"}, - {applications, [kernel, stdlib, sasl, crypto]}, + {applications, [kernel, stdlib, crypto]}, {mod,{machi_app,[]}}, {registered, []}, {env, [ diff --git a/src/machi_app.erl b/src/machi_app.erl index d23718e..649a7db 100644 --- a/src/machi_app.erl +++ b/src/machi_app.erl @@ -27,6 +27,11 @@ -behaviour(application). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-endif. + %% Application callbacks -export([start/2, stop/1]). diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 274d02a..39aac50 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -53,6 +53,9 @@ -include("machi_projection.hrl"). -include("machi_chain_manager.hrl"). +-include("machi_verbose.hrl"). + +-define(NOT_FLAPPING_START, {{epk,-1},?NOT_FLAPPING}). -record(ch_mgr, { name :: pv1_server(), @@ -63,8 +66,9 @@ ignore_timer :: boolean(), proj_history :: queue:queue(), flaps=0 :: integer(), - flap_start = ?NOT_FLAPPING - :: erlang:timestamp(), + flap_start = ?NOT_FLAPPING_START + :: {{'epk', integer()}, erlang:timestamp()}, + flap_not_sanes :: orddict:orddict(), repair_worker :: 'undefined' | pid(), repair_start :: 'undefined' | erlang:timestamp(), repair_final_status :: 'undefined' | term(), @@ -74,9 +78,6 @@ proxies_dict :: orddict:orddict() }). --define(D(X), io:format(user, "~s ~p\n", [??X, X])). --define(Dw(X), io:format(user, "~s ~w\n", [??X, X])). - -define(FLU_PC, machi_proxy_flu1_client). -define(TO, (2*1000)). % default timeout @@ -91,6 +92,11 @@ -define(REPAIR_START_STABILITY_TIME, 10). -endif. % TEST +%% Magic constant for looping "too frequently" breaker. TODO revisit & revise. +-define(TOO_FREQUENT_BREAKER, 10). + +-define(RETURN2(X), begin (catch put(why2, [?LINE|get(why2)])), X end). + %% API -export([start_link/2, start_link/3, stop/1, ping/1, set_chain_members/2, set_active/2, @@ -99,7 +105,12 @@ terminate/2, code_change/3]). -export([make_chmgr_regname/1, projection_transitions_are_sane/2, - inner_projection_exists/1, inner_projection_or_self/1]). + inner_projection_exists/1, inner_projection_or_self/1, + simple_chain_state_transition_is_sane/3, + simple_chain_state_transition_is_sane/5, + chain_state_transition_is_sane/5]). +%% Exports so that EDoc docs generated for these internal funcs. +-export([mk/3]). -ifdef(TEST). @@ -113,6 +124,7 @@ -endif. -ifdef(PULSE). -compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). -endif. -include_lib("eunit/include/eunit.hrl"). @@ -214,6 +226,7 @@ init({MyName, InitMembersDict, MgrOpts}) -> flap_limit=length(All_list) + 50, timer='undefined', proj_history=queue:new(), + flap_not_sanes=orddict:new(), runenv=RunEnv, opts=MgrOpts}, {_, S2} = do_set_chain_members_dict(MembersDict, S), @@ -336,7 +349,7 @@ code_change(_OldVsn, S, _Extra) -> make_none_projection(MyName, All_list, MembersDict) -> Down_list = All_list, UPI_list = [], - machi_projection:new(MyName, MembersDict, UPI_list, Down_list, [], []). + machi_projection:new(MyName, MembersDict, Down_list, UPI_list, [], []). get_my_private_proj_boot_info(MgrOpts, DefaultDict, DefaultProj) -> get_my_proj_boot_info(MgrOpts, DefaultDict, DefaultProj, private). @@ -686,6 +699,9 @@ calc_up_nodes_sim(MyName, AllMembers, RunEnv1) -> [{network_partitions, Partitions2}, {network_islands, Islands2}, {up_nodes, UpNodes}]), + catch ?REACT({calc_up_nodes,?LINE,[{partitions,Partitions2}, + {islands,Islands2}, + {up_nodes, UpNodes}]}), {UpNodes, Partitions2, RunEnv2}. replace(PropList, Items) -> @@ -740,9 +756,9 @@ rank_projections(Projs, CurrentProj) -> rank_projection(#projection_v1{upi=[]}, _MemberRank, _N) -> -100; -rank_projection(#projection_v1{author_server=Author, +rank_projection(#projection_v1{author_server=_Author, upi=UPI_list, - repairing=Repairing_list}, MemberRank, N) -> + repairing=Repairing_list}, _MemberRank, N) -> %% It's possible that there's "cross-talk" across projection %% stores. For example, we were a chain of [a,b], then the %% administrator sets a's members_dict to include only a. @@ -755,10 +771,11 @@ rank_projection(#projection_v1{author_server=Author, %% Server b is no longer a member of a's MemberRank scheme, so we %% need to compensate for this by giving b an extremely low author %% ranking. - AuthorRank = case orddict:find(Author, MemberRank) of - {ok, Rank} -> Rank; - error -> -(N*N*N*N) - end, + AuthorRank = 0, + %% AuthorRank = case orddict:find(Author, MemberRank) of + %% {ok, Rank} -> Rank; + %% error -> -(N*N*N*N) + %% end, AuthorRank + ( N * length(Repairing_list)) + (N*N * length(UPI_list)). @@ -842,9 +859,25 @@ react_to_env_A20(Retries, #ch_mgr{name=MyName}=S) -> UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)), UPI_Repairing_FLUs = lists:sort(P_latest#projection_v1.upi ++ P_latest#projection_v1.repairing), - All_UPI_Repairing_were_unanimous = UPI_Repairing_FLUs == UnanimousFLUs, - %% TODO: investigate if the condition below is more correct? - %% All_UPI_Repairing_were_unanimous = (UPI_Repairing_FLUs -- UnanimousFLUs) == [], + All_UPI_Repairing_were_unanimous = + ordsets:is_subset(ordsets:from_list(UPI_Repairing_FLUs), + ordsets:from_list(UnanimousFLUs)), + NotUnanimousFLUs = lists:sort(proplists:get_value(not_unanimous_flus, + ReadExtra, [xxx])), + NotUnanimousPs = lists:sort(proplists:get_value(not_unanimous_answers, + ReadExtra, [xxx])), + NotUnanimousSumms = [machi_projection:make_summary( + P#projection_v1{dbg2=[omitted]}) || + P <- NotUnanimousPs, + is_record(P, projection_v1)], + BadAnswerFLUs = lists:sort(proplists:get_value(bad_answer_flus, ReadExtra)), + ?REACT({a20,?LINE,[{upi_repairing,UPI_Repairing_FLUs}, + {unanimous_flus,UnanimousFLUs}, + {all_upi_repairing_were_unanimous,All_UPI_Repairing_were_unanimous}, + {not_unanimous_flus, NotUnanimousFLUs}, + {not_unanimous_answers, NotUnanimousSumms}, + {bad_answer_flus, BadAnswerFLUs} + ]}), LatestUnanimousP = if UnanimousTag == unanimous andalso @@ -852,8 +885,7 @@ react_to_env_A20(Retries, #ch_mgr{name=MyName}=S) -> ?REACT({a20,?LINE}), true; UnanimousTag == unanimous -> - ?REACT({a20,?LINE,[{upi_repairing,UPI_Repairing_FLUs}, - {unanimous,UnanimousFLUs}]}), + ?REACT({a20,?LINE}), false; UnanimousTag == not_unanimous -> ?REACT({a20,?LINE}), @@ -870,6 +902,7 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, {P_newprop1, S2} = calc_projection(S, MyName), ?REACT({a30, ?LINE, [{current, machi_projection:make_summary(S#ch_mgr.proj)}]}), ?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}), + ?REACT({a30, ?LINE, [{latest, machi_projection:make_summary(P_latest)}]}), %% Are we flapping yet? {P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2), @@ -886,15 +919,29 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, case get_flap_count(P_newprop3) of {_, P_newprop3_flap_count} when P_newprop3_flap_count >= FlapLimit -> AllHosed = get_all_hosed(P_newprop3), - {P_i, S_i} = calc_projection(S3, MyName, AllHosed), + P_current_inner = inner_projection_or_self(P_current), + {P_i, S_i} = calc_projection(unused, unused, + P_current_inner, + MyName, AllHosed, [], S3), + %% The inner projection will have a fake author, which + %% everyone will agree is the largest UPI member's + %% name. + BiggestUPIMember = + if P_i#projection_v1.upi == [] -> + %% Oops, ok, fall back to author + P_i#projection_v1.author_server; + true -> + lists:last(lists:sort(P_i#projection_v1.upi)) + end, + P_i2 = P_i#projection_v1{author_server=BiggestUPIMember}, P_inner = case lists:member(MyName, AllHosed) of false -> - P_i; + P_i2; true -> - P_i#projection_v1{ + P_i2#projection_v1{ upi=[MyName], repairing=[], - down=P_i#projection_v1.all_members + down=P_i2#projection_v1.all_members -- [MyName]} end, FinalInnerEpoch = @@ -936,13 +983,22 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, P_inner2 = machi_projection:update_checksum( P_inner#projection_v1{epoch_number=FinalInnerEpoch, creation_time=FinalCreation}), - InnerInfo = [{inner_summary, - machi_projection:make_summary(P_inner2)}], - DbgX = replace(P_newprop3#projection_v1.dbg, InnerInfo), - ?REACT({a30, ?LINE, [qqqwww|DbgX]}), - {P_newprop3#projection_v1{dbg=DbgX, - inner=P_inner2}, S_i}; - _ -> + ?REACT({a30, ?LINE, [{inner_summary, + machi_projection:make_summary(P_inner2)}]}), + %% Adjust the outer projection's #flap_i info. + ?V("~w,", [{'YOYO',MyName,NewEpoch}]), + #projection_v1{flap=OldFlap} = P_newprop3, + NewFlap = OldFlap#flap_i{flapping_me=true}, + ?REACT({a30, ?LINE, [flap_continue, + {flapping_me, true}]}), + %% Put it all together. + P_newprop4 = machi_projection:update_checksum( + P_newprop3#projection_v1{flap=NewFlap, + inner=P_inner2}), + {P_newprop4, S_i}; + {_, P_newprop3_flap_count} -> + ?REACT({a30, ?LINE,[{newprop3_flap_count,P_newprop3_flap_count}, + {flap_limit, FlapLimit}]}), {P_newprop3, S3} end, @@ -988,59 +1044,60 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra, Kicker_p = case {Latest_authors_flap_count_current, Latest_authors_flap_count_latest} of {NotUndef, undefined} when NotUndef /= undefined -> - true; + %% OK, someone else has switched from non-zero flap + %% count to zero flap count. But ... do not kick out + %% of our flapping mode locally if we do not have an + %% inner projection. + inner_projection_exists(P_current); {_, _} -> false end, + ClauseInfo = [{inner_kicker, Kicker_p}, + {inner_kicker2, {Latest_authors_flap_count_current, + Latest_authors_flap_count_latest}}, + {move_from_inner, MoveFromInnerToNorm_p}], + ?REACT({a30, ?LINE, ClauseInfo}), if MoveFromInnerToNorm_p orelse Kicker_p -> - ClauseInfo = [{inner_kicker, Kicker_p}, - {move_from_inner, MoveFromInnerToNorm_p}], - ?REACT({a30, ?LINE, ClauseInfo}), - %% %% 2015-04-14: YEAH, this appears to work! - %% %% 1. Create a "safe" projection that is upi=[],repairing=[] - %% %% 2. Declare it to be best & latest by pure fiat. - %% %% (The C100 transition will double-check that it's safe.) - %% %% 3. Jump to C100. Then, for the next iteration, - %% %% our P_current state to a smallest-possible-score - %% %% state ... and let the chain reassemble itself from - %% %% length zero. - %% #projection_v1{epoch_number=Epoch_newprop10, all_members=All_list, - %% members_dict=MembersDict} = P_newprop10, - %% P_noneprop0 = make_none_projection(MyName, All_list, MembersDict), - %% P_noneprop1 = P_noneprop0#projection_v1{epoch_number=Epoch_newprop10}, - %% %% Just to be clear, we clobber any flapping info by setting dbg. - %% P_noneprop = P_noneprop1#projection_v1{dbg=ClauseInfo}, - %% react_to_env_C100(P_noneprop, P_latest, S); - - %% 2015-04-14: Let's experiment with using the current inner - %% projection (or, if there really is no inner, just P_current). - %% This is safe because it's already P_current and by assumption, - %% anything that made it through the logical maze to get here - %% is safe. So re-using it with a higher epoch number doesn't - %% make any significant change. - %% - %% Yeah, it appears to work, also, nice! This can help save some - %% repair operations (compared to the other safe thing to do - %% here, which uses make_none_projection() to build & repair the - %% entire chain from scratch). Note that this isn't a guarantee - %% that repair steps will be minimized: for a 4-member cluster - %% that has an asymmetric partition which organizes 3 clusters of - %% inner-upi=[a], inner-upi=[b], and inner-upi[c,d], there is no - %% guarantee (yet?) that the [c,d] chain will be the UPI basis - %% for repairs when the partition is healed: the quickest author - %% after the healing will make that choice for everyone. - %% TODO: Perhaps that quickest author should consult all of the - %% other private stores, check their inner, and if there is a - %% higher rank there, then goto C200 for a wait-and-see cycle? - + %% Move from inner projection to outer. P_inner2A = inner_projection_or_self(P_current), + ResetEpoch = P_newprop10#projection_v1.epoch_number, + ResetAuthor = case P_current#projection_v1.upi of + [] -> + %% Drat, fall back to current's author. + P_current#projection_v1.author_server; + _ -> + lists:last(P_current#projection_v1.upi) + end, + ClauseInfo2 = [{move_from_inner_to_outer, true}, + {old_author, P_inner2A#projection_v1.author_server}, + {reset_author, ResetAuthor}, + {reset_epoch, ResetEpoch}], P_inner2B = - P_inner2A#projection_v1{epoch_number= - P_newprop10#projection_v1.epoch_number, - dbg=ClauseInfo}, - react_to_env_C100(P_inner2B, P_latest, S); - + machi_projection:update_checksum( + P_inner2A#projection_v1{epoch_number=ResetEpoch, + author_server=ResetAuthor, + dbg=ClauseInfo++ClauseInfo2}), + ReactI = [{inner2b,machi_projection:make_summary(P_inner2B)}], + ?REACT({a30, ?LINE, ReactI}), + %% In the past, we've tried: + %% react_to_env_C100(P_inner2B, P_latest, S); + %% + %% But we *know* that direct transition is racy/buggy: if + %% P_latest UPIs are not unanimous, then we run the risk of + %% non-disjoint UPIs; state B10 exists for a reason! + %% + %% So, we're going to use P_inner2B as our new proposal and run + %% it through the regular system, as we did prior to 2015-04-14. + %% + %% OK, but we need to avoid a possible infinite loop by trying to + %% use the inner projection as-is. Because we're moving from + %% inner to outer projections, the partition situation has + %% altered significantly. Use calc_projection() to find out what + %% nodes are down *now* (as best as we can tell right now). + {P_o, S_o} = calc_projection(unused, unused, + P_inner2B, MyName, [], [], S10), + react_to_env_A40(Retries, P_o, P_latest, LatestUnanimousP, S_o); true -> ?REACT({a30, ?LINE, []}), react_to_env_A40(Retries, P_newprop10, P_latest, @@ -1202,7 +1259,6 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, andalso UnanimousLatestInnerNotRelevant_p -> ?REACT({b10, ?LINE, []}), - put(b10_hack, false), %% Do not go to C100, because we want to ignore this latest %% proposal. Write ours instead via C300. @@ -1216,7 +1272,6 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, {newprop_epoch,P_newprop#projection_v1.epoch_number}, {newprop_author,P_newprop#projection_v1.author_server} ]}), - put(b10_hack, false), react_to_env_C100(P_newprop, P_latest, S); @@ -1225,62 +1280,55 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, ?REACT({b10, ?LINE, [i_am_flapping, {newprop_flap_count, P_newprop_flap_count}, {flap_limit, FlapLimit}]}), - _B10Hack = get(b10_hack), case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of true -> - io:format(user, "{FLAP: ~w flaps ~w}! ", [S#ch_mgr.name, P_newprop_flap_count]); + ?V("{FLAP: ~w flaps ~w}! ", [S#ch_mgr.name, P_newprop_flap_count]); _ -> ok end, + %% MEANWHILE, we have learned some things about this + %% algorithm in the past many months. With the introduction + %% of the "inner projection" concept, we know that the inner + %% projection may be stable but the "outer" projection will + %% continue to be flappy for as long as there's an + %% asymmetric network partition somewhere. We now know that + %% that flappiness is OK and that the only problem with it + %% is that it needs to be slowed down so that we don't have + %% zillions of public projection proposals written every + %% second. + %% + %% It doesn't matter if the FlapLimit count mechanism + %% doesn't give an accurate sense of global flapping state. + %% FlapLimit is enough to be able to tell us to slow down. - if - %% MEANWHILE, we have learned some things about this - %% algorithm in the past few months. With the introduction - %% of the "inner projection" concept, we know that the inner - %% projection may be stable but the "outer" projection will - %% continue to be flappy for as long as there's an - %% asymmetric network partition somewhere. We now know that - %% that flappiness is OK and that the only problem with it - %% is that it needs to be slowed down so that we don't have - %% zillions of public projection proposals written every - %% second. - %% - %% It doesn't matter if the FlapLimit count mechanism - %% doesn't give an accurate sense of global flapping state. - %% FlapLimit is enough to be able to tell us to slow down. - - true -> - %% We already know that I'm flapping. We need to - %% signal to the rest of the world that I'm writing - %% and flapping and churning, so we cannot always - %% go to A50 from here. - %% - %% If we do go to A50, then recommend that we poll less - %% frequently. - {X, S2} = gimme_random_uniform(100, S), - if X < 80 -> - ?REACT({b10, ?LINE, [flap_stop]}), - ThrottleTime = if P_newprop_flap_count < 500 -> 1; - P_newprop_flap_count < 1000 -> 5; - P_newprop_flap_count < 5000 -> 10; - true -> 30 - end, - FinalProps = [{my_flap_limit, FlapLimit}, - {throttle_seconds, ThrottleTime}], - react_to_env_A50(P_latest, FinalProps, S2); - true -> - %% It is our moral imperative to write so that - %% the flap cycle continues enough times so that - %% everyone notices then eventually falls into - %% consensus. - ?REACT({b10, ?LINE, [flap_continue]}), - react_to_env_C300(P_newprop, P_latest, S2) - end + %% We already know that I'm flapping. We need to + %% signal to the rest of the world that I'm writing + %% and flapping and churning, so we cannot always + %% go to A50 from here. + %% + %% If we do go to A50, then recommend that we poll less + %% frequently. + {X, S2} = gimme_random_uniform(100, S), + if X < 80 -> + ?REACT({b10, ?LINE, [flap_stop]}), + ThrottleTime = if P_newprop_flap_count < 500 -> 1; + P_newprop_flap_count < 1000 -> 5; + P_newprop_flap_count < 5000 -> 10; + true -> 30 + end, + FinalProps = [{my_flap_limit, FlapLimit}, + {throttle_seconds, ThrottleTime}], + react_to_env_A50(P_latest, FinalProps, S2); + true -> + %% It is our moral imperative to write so that + %% the flap cycle continues enough times so that + %% everyone notices then eventually falls into + %% consensus. + react_to_env_C300(P_newprop, P_latest, S2) end; Retries > 2 -> ?REACT({b10, ?LINE, [{retries, Retries}]}), - put(b10_hack, false), %% The author of P_latest is too slow or crashed. %% Let's try to write P_newprop and see what happens! @@ -1293,7 +1341,6 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, [{rank_latest, Rank_latest}, {rank_newprop, Rank_newprop}, {latest_author, P_latest#projection_v1.author_server}]}), - put(b10_hack, false), %% TODO: Is a UnanimousLatestInnerNotRelevant_p test needed in this clause??? @@ -1305,46 +1352,168 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP, true -> ?REACT({b10, ?LINE}), ?REACT({b10, ?LINE, [{retries,Retries},{rank_latest, Rank_latest}, {rank_newprop, Rank_newprop}, {latest_author, P_latest#projection_v1.author_server}]}), % TODO debug delete me! - put(b10_hack, false), %% P_newprop is best, so let's write it. react_to_env_C300(P_newprop, P_latest, S) end. -react_to_env_C100(P_newprop, P_latest, - #ch_mgr{name=MyName, proj=P_current}=S) -> +react_to_env_C100(P_newprop, #projection_v1{author_server=Author_latest, + flap=Flap_latest0}=P_latest, + #ch_mgr{name=MyName, proj=P_current, + flap_not_sanes=NotSanesDict0}=S) -> ?REACT(c100), - I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi), - I_am_Repairing_in_latest_p = lists:member(MyName, - P_latest#projection_v1.repairing), - Current_sane_p = projection_transition_is_sane(P_current, P_latest, - MyName), - put(xxx_hack, [{p_current, machi_projection:make_summary(P_current)}, - {epoch_compare, P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number}, - {i_am_upi_in_newprop_p, I_am_UPI_in_newprop_p}, - {i_am_repairing_in_latest_p, I_am_Repairing_in_latest_p}]), - case Current_sane_p of + Sane = projection_transition_is_sane(P_current, P_latest, MyName), + if Sane == true -> ok; true -> ?V("insane-~w-~w,", [MyName, P_newprop#projection_v1.epoch_number]) end, %%% DELME!!! + Flap_latest = if is_record(Flap_latest0, flap_i) -> + Flap_latest0; + true -> + #flap_i{flapping_me=false} + end, + ?REACT({c100, ?LINE, [zoo, {me,MyName}, {author_latest,Author_latest}, + {flap_latest,Flap_latest}, + {flapping_me,Flap_latest#flap_i.flapping_me}]}), + + %% Note: The value of `Sane' may be `true', `false', or `term() /= true'. + %% The error value `false' is reserved for chain order violations. + %% Any other non-true value can be used for projection structure + %% construction errors, checksum error, etc. + case Sane of _ when P_current#projection_v1.epoch_number == 0 -> %% Epoch == 0 is reserved for first-time, just booting conditions. ?REACT({c100, ?LINE, [first_write]}), + erase(perhaps_reset_loop), react_to_env_C110(P_latest, S); true -> ?REACT({c100, ?LINE, [sane]}), + erase(perhaps_reset_loop), react_to_env_C110(P_latest, S); + %% 20150715: I've seen this loop happen with {expected_author2,X} + %% where nobody agrees, weird. + false when Author_latest == MyName andalso + is_record(Flap_latest, flap_i) andalso + Flap_latest#flap_i.flapping_me == true -> + ?REACT({c100, ?LINE}), + ?V("\n\n1YOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), + %% This is a fun case. We had just enough asymmetric partition + %% to cause the chain to fragment into two *incompatible* and + %% *overlapping membership* chains, but the chain fragmentation + %% happened "quickly" enough so that by the time everyone's flap + %% counters hit the flap_limit, the asymmetric partition has + %% disappeared ... we'd be stuck in a flapping state forever (or + %% until the partition situation changes again, which might be a + %% very long time). + %% + %% Alas, this case took a long time to find in model checking + %% zillions of asymmetric partitions. Our solution is a bit + %% harsh: we fall back to the "none projection" and let the chain + %% reassemble from there. Hopefully this case is quite rare, + %% since asymmetric partitions (we assume) are pretty rare? + %% + %% Examples of overlapping membership insanity (at same instant): + %% Key: {author, suggested UPI, suggested Reparing} + %% + %% {a,[a,b],[c,d,e]}, + %% {b,[a,b],[c,d,e]}, + %% {c,[e,b],[a,c,d]}, + %% {d,[a,b],[c,d,e]}, + %% {e,[e,b],[a,c,d]}, + %% OR + %% [{a,[c,e],[a,b,d]}, + %% {b,[e,a,b,c,d],[]}, + %% {c,[c,e],[a,b,d]}, + %% {d,[c,e],[a,b,d]}, + %% {e,[c,e],[a,b,d]}] + %% + %% So, I'd tried this kind of "if everyone is doing it, then we + %% 'agree' and we can do something different" strategy before, + %% and it didn't work then. Silly me. Distributed systems + %% lesson #823: do not forget the past. In a situation created + %% by PULSE, of all=[a,b,c,d,e], b & d & e were scheduled + %% completely unfairly. So a & c were the only authors ever to + %% suceessfully write a suggested projection to a public store. + %% Oops. + %% + %% So, we're going to keep track in #ch_mgr state for the number + %% of times that this insane judgement has happened. + + react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, + P_newprop, P_latest, S); + {expected_author2,_}=_ExpectedErr when Author_latest == MyName andalso + is_record(Flap_latest, flap_i) andalso + Flap_latest#flap_i.flapping_me == true -> + ?REACT({c100, ?LINE}), + react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, + P_newprop, P_latest, S); + {expected_author2,_ExpectedAuthor2}=_ExpectedErr -> + case get(perhaps_reset_loop) of + undefined -> + put(perhaps_reset_loop, 1), + ?REACT({c100, ?LINE, [not_sane, get(why2), _ExpectedErr]}), + react_to_env_C300(P_newprop, P_latest, S); + X when X > ?TOO_FREQUENT_BREAKER -> + %% Ha, yes, this is possible. For example: + %% outer: author=e,upi=[b,a,d],repair=[c,e] + %% inner: author=e,upi=[b,e], repair=[] + %% In this case, the transition from inner to outer by A30 + %% has chosen the wrong author. We have two choices. + %% 1. Accept this transition, because it really was the + %% safe & transition-approved UPI+repeairing that we + %% were using while we were flapping. I'm 99% certain + %% that this is safe. TODO: Verify + %% 2. I'm not yet 100% certain that #1 is safe, so instead + %% we fall back to the one thing that we know is safe: + %% the 'none' projection, which lets the chain rebuild + %% itself normally during future iterations. + ?REACT({c100, ?LINE}), + react_to_env_C103(P_latest, S); + X -> + put(perhaps_reset_loop, X+1), + ?REACT({c100, ?LINE, [not_sane, get(why2), _ExpectedErr]}), + react_to_env_C300(P_newprop, P_latest, S) + end; _AnyOtherReturnValue -> %% P_latest is not sane. %% By process of elimination, P_newprop is best, %% so let's write it. - ?REACT({c100, ?LINE, [not_sane]}), + ?REACT({c100, ?LINE, [not_sane, get(why2), _AnyOtherReturnValue]}), + erase(perhaps_reset_loop), react_to_env_C300(P_newprop, P_latest, S) end. +react_to_env_C100_inner(Author_latest, NotSanesDict0, MyName, + P_newprop, P_latest, S) -> + NotSanesDict = orddict:update_counter(Author_latest, 1, NotSanesDict0), + S2 = S#ch_mgr{flap_not_sanes=NotSanesDict}, + case orddict:fetch(Author_latest, NotSanesDict) of + N when N > ?TOO_FREQUENT_BREAKER -> + ?V("\n\nYOYO ~w breaking the cycle of ~p\n", [MyName, machi_projection:make_summary(P_latest)]), + ?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}), + react_to_env_C103(P_latest, S2); + N -> + ?REACT({c100, ?LINE, [{not_sanes_author_count, N}]}), + react_to_env_C300(P_newprop, P_latest, S2) + end. + +react_to_env_C103(#projection_v1{epoch_number=Epoch_latest, + all_members=All_list, + members_dict=MembersDict} = P_latest, + #ch_mgr{name=MyName}=S) -> + #projection_v1{epoch_number=Epoch_latest, + all_members=All_list, + members_dict=MembersDict} = P_latest, + P_none0 = make_none_projection(MyName, All_list, MembersDict), + P_none1 = P_none0#projection_v1{epoch_number=Epoch_latest, + dbg=[{none_projection,true}]}, + P_none = machi_projection:update_checksum(P_none1), + %% Use it, darn it, because it's 100% safe. And exit flapping state. + react_to_env_C100(P_none, P_none, S#ch_mgr{flaps=0, + flap_start=?NOT_FLAPPING_START, + flap_not_sanes=orddict:new()}). + react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> ?REACT(c110), - %% Extra_todo = [], - Extra_todo = get(xxx_hack), - %% Extra_todo = [{hee, lists:reverse(get(react))}], + Extra_todo = [{react,get(react)}], P_latest2 = machi_projection:update_dbg2(P_latest, Extra_todo), MyNamePid = proxy_pid(MyName, S), @@ -1352,19 +1521,34 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> %% This is the local projection store. Use a larger timeout, so %% that things locally are pretty horrible if we're killed by a %% timeout exception. - {ok,Goo} = {?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO*30),Goo}, + %% ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO*30), + Goo = P_latest2#projection_v1.epoch_number, + %% ?V("HEE110 ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse(get(react))]), + + case {?FLU_PC:write_projection(MyNamePid, private, P_latest2,?TO*30),Goo} of + {ok, Goo} -> + ok; + Else -> + Summ = machi_projection:make_summary(P_latest), + io:format(user, "C11 error by ~w: ~w, ~w, ~w\n", + [MyName, Else, Summ, get(react)]), + error_logger:error_msg("C11 error by ~w: ~w, ~w, ~w\n", + [MyName, Else, Summ, get(react)]), + exit({c110_failure, MyName, Else, Summ}) + end, case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of true -> {_,_,C} = os:timestamp(), MSec = trunc(C / 1000), {HH,MM,SS} = time(), + P_latest2x = P_latest2#projection_v1{dbg2=[]}, % limit verbose len. case inner_projection_exists(P_latest2) of false -> case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of true -> - io:format(user, "\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses plain: ~w\n", + ?V("\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses plain: ~w\n", [HH,MM,SS,MSec, S#ch_mgr.name, - machi_projection:make_summary(P_latest2)]); + machi_projection:make_summary(P_latest2x)]); _ -> ok end; @@ -1372,9 +1556,10 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of true -> P_inner = inner_projection_or_self(P_latest2), - io:format(user, "\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses inner: ~w\n", + P_innerx = P_inner#projection_v1{dbg2=[]}, % limit verbose len. + ?V("\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses inner: ~w\n", [HH,MM,SS,MSec, S#ch_mgr.name, - machi_projection:make_summary(P_inner)]); + machi_projection:make_summary(P_innerx)]); _ -> ok end @@ -1384,7 +1569,7 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) -> end, react_to_env_C120(P_latest, [], S). -react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H} = S) -> +react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H}=S) -> ?REACT(c120), H2 = queue:in(P_latest, H), H3 = case queue:len(H2) of @@ -1399,7 +1584,7 @@ react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H} = S) -> H2 end, %% HH = [if is_atom(X) -> X; is_tuple(X) -> {element(1,X), element(2,X)} end || X <- get(react), is_atom(X) orelse size(X) == 3], - %% io:format(user, "HEE120 ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse(HH)]), + %% ?V("HEE120 ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse(HH)]), ?REACT({c120, [{latest, machi_projection:make_summary(P_latest)}]}), {{now_using, FinalProps, P_latest#projection_v1.epoch_number}, @@ -1411,8 +1596,8 @@ react_to_env_C200(Retries, P_latest, S) -> AuthorProxyPid = proxy_pid(P_latest#projection_v1.author_server, S), ?FLU_PC:kick_projection_reaction(AuthorProxyPid, []) catch _Type:_Err -> - io:format(user, "TODO: tell_author_yo error is probably ignorable: ~p ~p\n", - [_Type, _Err]), + %% ?V("TODO: tell_author_yo is broken: ~p ~p\n", + %% [_Type, _Err]), ok end, react_to_env_C210(Retries, S). @@ -1444,12 +1629,12 @@ react_to_env_C310(P_newprop, S) -> ?REACT({c310, ?LINE, [{newprop, machi_projection:make_summary(P_newprop)}, {write_result, WriteRes}]}), -%% io:format(user, "HEE310 ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse(get(react))]), react_to_env_A10(S2). calculate_flaps(P_newprop, _P_current, _FlapLimit, #ch_mgr{name=MyName, proj_history=H, flap_start=FlapStart, - flaps=Flaps, runenv=RunEnv1} = S) -> + flaps=Flaps, flap_not_sanes=NotSanesDict0, + runenv=RunEnv1}=S) -> HistoryPs = queue:to_list(H), Ps = HistoryPs ++ [P_newprop], UniqueProposalSummaries = lists:usort([{P#projection_v1.upi, @@ -1511,6 +1696,7 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, true -> NewFlapStart = FlapStart end, + NotSanesDict = NotSanesDict0, %% Wow, this behavior is almost spooky. %% @@ -1539,7 +1725,8 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, AllHosed = lists:usort(DownUnion ++ HosedTransUnion ++ BadFLUs); {_N, _} -> NewFlaps = 0, - NewFlapStart = {{epk,-1},?NOT_FLAPPING}, + NewFlapStart = ?NOT_FLAPPING_START, + NotSanesDict = orddict:new(), AllFlapCounts = [], AllHosed = [] end, @@ -1562,7 +1749,8 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit, %% It isn't doing what I'd originally intended. Fix it. {machi_projection:update_checksum(P_newprop#projection_v1{ flap=FlappingI}), - S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv1}}. + S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, + flap_not_sanes=NotSanesDict, runenv=RunEnv1}}. make_flapping_i() -> make_flapping_i({{epk,-1},?NOT_FLAPPING}, 0, [], [], []). @@ -1595,15 +1783,109 @@ projection_transitions_are_sane([P1, P2|T], RelativeToServer, RetrospectiveP) -> Else end. -projection_transition_is_sane(P1, P2, RelativeToServer) -> - projection_transition_is_sane(P1, P2, RelativeToServer, false). - -ifdef(TEST). projection_transition_is_sane_retrospective(P1, P2, RelativeToServer) -> projection_transition_is_sane(P1, P2, RelativeToServer, true). -endif. % TEST -projection_transition_is_sane( +projection_transition_is_sane(P1, P2, RelativeToServer) -> + projection_transition_is_sane(P1, P2, RelativeToServer, false). + +%% @doc Check if a projection transition is sane & safe. +%% +%% NOTE: The return value convention is `true' for sane/safe and +%% `term() /= true' for any unsafe/insane value. + +projection_transition_is_sane(P1, P2, RelativeToServer, RetrospectiveP) -> + put(why2, []), + case projection_transition_is_sane_with_si_epoch( + P1, P2, RelativeToServer, RetrospectiveP) of + true -> + HasInner1 = inner_projection_exists(P1), + HasInner2 = inner_projection_exists(P2), + if HasInner1 orelse HasInner2 -> + Inner1 = inner_projection_or_self(P1), + Inner2 = inner_projection_or_self(P2), + if HasInner1 andalso HasInner2 -> + %% In case of inner->inner transition, we must allow + %% the epoch number to remain constant. Thus, we + %% call the function that does not check for a + %% strictly-increasing epoch. + ?RETURN2( + projection_transition_is_sane_final_review(P1, P2, + projection_transition_is_sane_except_si_epoch( + Inner1, Inner2, RelativeToServer, RetrospectiveP))); + true -> + ?RETURN2( + projection_transition_is_sane_final_review(P1, P2, + projection_transition_is_sane_with_si_epoch( + Inner1, Inner2, RelativeToServer, RetrospectiveP))) + end; + true -> + ?RETURN2(true) + end; + Else -> + ?RETURN2(Else) + end. + +projection_transition_is_sane_final_review(_P1, P2, + {expected_author2,UPI1_tail}=Else) -> + %% Reminder: P1 & P2 are outer projections + %% + %% We have a small problem for state transition sanity checking in the + %% case where we are flapping *and* a repair has finished. One of the + %% sanity checks in simple_chain_state_transition_is_sane(() is that + %% the author of P2 in this case must be the tail of P1's UPI: i.e., + %% it's the tail's responsibility to perform repair, therefore the tail + %% must damn well be the author of any transition that says a repair + %% finished successfully. + %% + %% The problem is that author_server of the inner projection does not + %% reflect the actual author! See the comment with the text + %% "The inner projection will have a fake author" in react_to_env_A30(). + %% + %% So, there's a special return value that tells us to try to check for + %% the correct authorship here. + + if UPI1_tail == P2#projection_v1.author_server -> + ?RETURN2(true); + true -> + ?RETURN2(Else) + end; +projection_transition_is_sane_final_review(_P1, _P2, Else) -> + ?RETURN2(Else). + +%% @doc Check if a projection transition is sane & safe with a +%% strictly increasing epoch number. +%% +%% NOTE: The return value convention is `true' for sane/safe and +%% `term() /= true' for any unsafe/insane value. + +projection_transition_is_sane_with_si_epoch( + #projection_v1{epoch_number=Epoch1} = P1, + #projection_v1{epoch_number=Epoch2} = P2, + RelativeToServer, RetrospectiveP) -> + case projection_transition_is_sane_except_si_epoch( + P1, P2, RelativeToServer, RetrospectiveP) of + true -> + %% Must be a strictly increasing epoch. + case Epoch2 > Epoch1 of + true -> + ?RETURN2(true); + false -> + ?RETURN2({epoch_not_si, Epoch2, 'not_gt', Epoch1}) + end; + Else -> + ?RETURN2(Else) + end. + +%% @doc Check if a projection transition is sane & safe with the +%% exception of a strictly increasing epoch number (equality is ok). +%% +%% NOTE: The return value convention is `true' for sane/safe and +%% `term() /= true' for any unsafe/insane value. + +projection_transition_is_sane_except_si_epoch( #projection_v1{epoch_number=Epoch1, epoch_csum=CSum1, creation_time=CreationTime1, @@ -1622,17 +1904,14 @@ projection_transition_is_sane( upi=UPI_list2, repairing=Repairing_list2, dbg=Dbg2} = P2, - RelativeToServer, RetrospectiveP) -> + RelativeToServer, __TODO_RetrospectiveP) -> + ?RETURN2(undefined), try %% General notes: %% %% I'm making no attempt to be "efficient" here. All of these data - %% structures are small, and they're not called zillions of times per + %% structures are small, and the funcs aren't called zillions of times per %% second. - %% - %% The chain sequence/order checks at the bottom of this function aren't - %% as easy-to-read as they ought to be. However, I'm moderately confident - %% that it isn't buggy. TODO: refactor them for clarity. true = is_integer(Epoch1) andalso is_integer(Epoch2), true = is_binary(CSum1) andalso is_binary(CSum2), @@ -1645,8 +1924,9 @@ projection_transition_is_sane( true = is_list(Repairing_list1) andalso is_list(Repairing_list2), true = is_list(Dbg1) andalso is_list(Dbg2), - true = Epoch2 > Epoch1, - All_list1 = All_list2, % todo will probably change + %% Don't check for strictly increasing epoch here: that's the job of + %% projection_transition_is_sane_with_si_epoch(). + true = Epoch2 >= Epoch1, %% No duplicates true = lists:sort(Down_list2) == lists:usort(Down_list2), @@ -1654,6 +1934,7 @@ projection_transition_is_sane( true = lists:sort(Repairing_list2) == lists:usort(Repairing_list2), %% Disjoint-ness + All_list1 = All_list2, % todo will probably change true = lists:sort(All_list2) == lists:sort(Down_list2 ++ UPI_list2 ++ Repairing_list2), [] = [X || X <- Down_list2, not lists:member(X, All_list2)], @@ -1666,252 +1947,28 @@ projection_transition_is_sane( true = sets:is_disjoint(DownS2, RepairingS2), true = sets:is_disjoint(UPIS2, RepairingS2), - %% Additions to the UPI chain may only be at the tail - UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2), - true = - if UPI_common_prefix == [] -> - if UPI_list1 == [] orelse UPI_list2 == [] -> - %% If the common prefix is empty, then one of the - %% inputs must be empty. - true; - true -> - %% Otherwise, we have a case of UPI changing from - %% one of these two situations: - %% - %% UPI_list1 -> UPI_list2 - %% ------------------------------------------------- - %% [d,c,b,a] -> [c,a] - %% [d,c,b,a] -> [c,a,repair_finished_added_to_tail]. - NotUPI2 = (Down_list2 ++ Repairing_list2), - case lists:prefix(UPI_list1 -- NotUPI2, UPI_list2) of - true -> - true; - false -> - %% Here's a possible failure scenario: - %% UPI_list1 -> UPI_list2 - %% Repairing_list1 -> Repairing_list2 - %% ----------------------------------- - %% [a,b,c] author=a -> [c,a] author=c - %% [] [b] - %% - %% ... where RelativeToServer=b. In this case, b - %% has been partitioned for a while and has only - %% now just learned of several epoch transitions. - %% If the author of both is also in the UPI of - %% both, then those authors would not have allowed - %% a bad transition, so we will assume this - %% transition is OK. - lists:member(AuthorServer1, UPI_list1) - andalso - lists:member(AuthorServer2, UPI_list2) - end - end; - true -> - true - end, - true = lists:prefix(UPI_common_prefix, UPI_list1), - true = lists:prefix(UPI_common_prefix, UPI_list2), - UPI_1_suffix = UPI_list1 -- UPI_common_prefix, - UPI_2_suffix = UPI_list2 -- UPI_common_prefix, + %% We won't check the checksum of P1, but we will of P2. + P2 = machi_projection:update_checksum(P2), - MoreCheckingP = - RelativeToServer == undefined - orelse - not (lists:member(RelativeToServer, Down_list2) orelse - lists:member(RelativeToServer, Repairing_list2)), - - UPIs_are_disjointP = ordsets:is_disjoint(ordsets:from_list(UPI_list1), - ordsets:from_list(UPI_list2)), - case UPI_2_suffix -- UPI_list1 of - [] -> - true; - [_|_] = _Added_by_2 -> - if RetrospectiveP -> - %% Any servers added to the UPI must be added from the - %% repairing list ... but in retrospective mode (where - %% we're checking only the transitions where all - %% UPI+repairing participants have unanimous private - %% projections!), and if we're under asymmetric - %% partition/churn, then we may not see the repairing - %% list. So we will not check that condition here. - true; - not RetrospectiveP -> - %% We're not retrospective. So, if some server was - %% added by to the UPI, then that means that it was - %% added by repair. And repair is coordinated by the - %% UPI tail/last. -%io:format(user, "g: UPI_list1=~w, UPI_list2=~w, UPI_2_suffix=~w, ", -% [UPI_list1, UPI_list2, UPI_2_suffix]), -%io:format(user, "g", []), - true = UPI_list1 == [] orelse - UPIs_are_disjointP orelse - (lists:last(UPI_list1) == AuthorServer2) - end - end, - - if not MoreCheckingP -> - ok; - MoreCheckingP -> - %% Where did elements in UPI_2_suffix come from? - %% Only two sources are permitted. - Oops_check_UPI_2_suffix = - [lists:member(X, Repairing_list1) % X added after repair done - orelse - lists:member(X, UPI_list1) % X in UPI_list1 after common pref - || X <- UPI_2_suffix], - %% Grrrrr, ok, so this check isn't good, at least at bootstrap time. - %% TODO: false = lists:member(false, Oops_check_UPI_2_suffix), - - %% The UPI_2_suffix must exactly be equal to: ordered items from - %% UPI_list1 concat'ed with ordered items from Repairing_list1. - %% Both temp vars below preserve relative order! - UPI_2_suffix_from_UPI1 = [X || X <- UPI_1_suffix, - lists:member(X, UPI_list2)], - UPI_2_suffix_from_Repairing1 = [X || X <- UPI_2_suffix, - lists:member(X, Repairing_list1)], - %% true? - UPI_2_concat = (UPI_2_suffix_from_UPI1 ++ UPI_2_suffix_from_Repairing1), - if UPI_2_suffix == UPI_2_concat -> - ok; - true -> - %% 'make dialyzer' will believe that this can never succeed. - %% 'make dialyzer-test' will not complain, however. - if RetrospectiveP -> - %% We are in retrospective mode. But there are - %% some transitions that are difficult to find - %% when standing outside of all of the FLUs and - %% examining their behavior. (In contrast to - %% this same function being called "in the path" - %% of a projection transition by a particular FLU - %% which knows exactly its prior projection and - %% exactly what it intends to do.) Perhaps this - %% exception clause here can go away with - %% better/more clever retrospection analysis? - %% - %% Here's a case that PULSE found: - %% FLU B: - %% E=257: UPI=[c,a], REPAIRING=[b] - %% E=284: UPI=[c,a], REPAIRING=[b] - %% FLU a: - %% E=251: UPI=[c], REPAIRING=[a,b] - %% E=284: UPI=[c,a], REPAIRING=[b] - %% FLU c: - %% E=282: UPI=[c], REPAIRING=[a,b] - %% E=284: UPI=[c,a], REPAIRING=[b] - %% - %% From the perspective of each individual FLU, - %% the unanimous transition at epoch #284 is - %% good. The repair that is done by FLU c -> a - %% is likewise good. - %% - %% From a retrospective point of view (and the - %% current implementation), there's a bad-looking - %% transition from epoch #269 to #284. This is - %% from the point of view of the last two - %% unanimous private projection store epochs: - %% - %% E=269: UPI=[c], REPAIRING=[], DOWN=[a,b] - %% E=284: UPI=[c,a], REPAIRING=[b] - %% - %% The retrospective view by - %% machi_chain_manager1_pulse.erl just can't - %% reason correctly about this situation. We - %% will instead rely on the non-retrospective - %% sanity checking that each FLU does before it - %% writes to its private projection store and - %% then adopts that projection (and unwedges - %% itself, etc etc). - - if UPIs_are_disjointP -> - true; - true -> - exit({todo, revisit, ?MODULE, ?LINE, - [ - {oops_check_UPI_2_suffix, Oops_check_UPI_2_suffix}, - {upi_2_suffix, UPI_2_suffix}, - {upi_2_concat, UPI_2_concat}, - {retrospectivep, RetrospectiveP} - ]}), - io:format(user, "|~p,~p TODO revisit|", - [?MODULE, ?LINE]), - ok - end; - true -> - %% The following is OK: We're shifting from a - %% normal projection to an inner one. The old - %% normal has a UPI that has nothing to do with - %% RelativeToServer a.k.a. me. - %% Or else the UPI_list1 is empty, and I'm - %% the only member of UPI_list2 - %% But the new/suffix is definitely me. - %% from: - %% {epoch,847},{author,c},{upi,[c]},{repair,[]}, - %% {down,[a,b,d]} - %% to: - %% {epoch,848},{author,a},{upi,[a]},{repair,[]}, - %% {down,[b,c,d]} - FirstCase_p = (UPI_2_suffix == [AuthorServer2]) - andalso - ((inner_projection_exists(P1) == false - andalso - inner_projection_exists(P2) == true) - orelse UPI_list1 == []), - - %% Here's another case that's alright: - %% - %% {a,{err,exit, - %% {upi_2_suffix_error,[c]}, .... - %% - %% from: - %% {epoch,937},{author,a},{upi,[a,b]},{repair,[]}, - %% {down,[c]} - %% to: - %% {epoch,943},{author,a},{upi,{a,b,c},{repair,[]}, - %% {down,[]} - - %% The author server doesn't matter. However, - %% there were two other epochs in between, 939 - %% and 941, where there wasn't universal agreement - %% of private projections. The repair controller - %% at the tail, 'b', had decided that the repair - %% of 'c' was finished @ epoch 941. - SecondCase_p = ((UPI_2_suffix -- Repairing_list1) - == []), - if FirstCase_p -> - true; - SecondCase_p -> - true; - UPIs_are_disjointP -> - %% If there's no overlap at all between - %% UPI_list1 & UPI_list2, then we're OK - %% here. - true; - true -> - exit({upi_2_suffix_error, UPI_2_suffix}) - end - end - end - end, - true + %% Hooray, all basic properties of the projection's elements are + %% not obviously bad. Now let's check if the UPI+Repairing->UPI + %% transition is good. + ?RETURN2( + chain_state_transition_is_sane(AuthorServer1, UPI_list1, Repairing_list1, + AuthorServer2, UPI_list2)) catch _Type:_Err -> + ?RETURN2(oops), S1 = machi_projection:make_summary(P1), S2 = machi_projection:make_summary(P2), Trace = erlang:get_stacktrace(), + %% There are basic data structure checks only, do not return `false' + %% here. {err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer, history, (catch lists:sort([no_history])), stack, Trace} end. -find_common_prefix([], _) -> - []; -find_common_prefix(_, []) -> - []; -find_common_prefix([H|L1], [H|L2]) -> - [H|find_common_prefix(L1, L2)]; -find_common_prefix(_, _) -> - []. - sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list) -> USec = calc_sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list), timer:sleep(USec), @@ -2047,6 +2104,7 @@ do_repair( repairing=[_|_]=Repairing, members_dict=MembersDict}}=_S_copy, Opts, ap_mode=RepairMode) -> +?V("RePaiR-~w,", [self()]), T1 = os:timestamp(), RepairId = proplists:get_value(repair_id, Opts, id1), error_logger:info_msg("Repair start: tail ~p of ~p -> ~p, ~p ID ~w\n", @@ -2089,8 +2147,10 @@ perhaps_call_t(S, Partitions, FLU, DoIt) -> perhaps_call(S, Partitions, FLU, DoIt) catch exit:timeout -> + remember_partition_hack(FLU), {error, partition}; exit:{timeout,_} -> + remember_partition_hack(FLU), {error, partition} end. @@ -2125,3 +2185,167 @@ remember_partition_hack(FLU) -> put(remember_partition_hack, [FLU|get(remember_partition_hack)]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% @doc A simple technique for checking chain state transition safety. +%% +%% Math tells us that any change state `UPI1' plus `Repair1' to state +%% `UPI2' is OK as long as `UPI2' is a concatenation of some +%% order-preserving combination from `UPI1' with some order-preserving +%% combination from `Repair1'. +%% +%% ``` +%% Good_UPI2s = [ X ++ Y || X <- machi_util:ordered_combinations(UPI1), +%% Y <- machi_util:ordered_combinations(Repair1)]''' +%% +%% Rather than creating that list and then checking if `UPI2' is in +%% it, we try a `diff'-like technique to check for basic state +%% transition safety. See docs for {@link mk/3} for more detail. +%% +%% ``` +%% 2> machi_chain_manager1:mk([a,b], [], [a]). +%% {[keep,del],[]} %% good transition +%% 3> machi_chain_manager1:mk([a,b], [], [b,a]). +%% {[del,keep],[]} %% bad transition: too few 'keep' for UPI2's length 2 +%% 4> machi_chain_manager1:mk([a,b], [c,d,e], [a,d]). +%% {[keep,del],[2]} %% good transition +%% 5> machi_chain_manager1:mk([a,b], [c,d,e], [a,bogus]). +%% {[keep,del],[error]} %% bad transition: 'bogus' not in Repair1''' + +simple_chain_state_transition_is_sane(UPI1, Repair1, UPI2) -> + ?RETURN2(simple_chain_state_transition_is_sane(undefined, UPI1, Repair1, + undefined, UPI2)). + +%% @doc Simple check if a projection transition is sane & safe: we assume +%% that the caller has checked basic projection data structure contents. +%% +%% NOTE: The return value convention is `true' for sane/safe and +%% `term() /= true' for any unsafe/insane value. + +simple_chain_state_transition_is_sane(_Author1, UPI1, Repair1, Author2, UPI2) -> + {KeepsDels, Orders} = mk(UPI1, Repair1, UPI2), + NumKeeps = length([x || keep <- KeepsDels]), + NumOrders = length(Orders), + NoErrorInOrders = (false == lists:member(error, Orders)), + OrdersOK = (Orders == lists:sort(Orders)), + UPI2LengthOK = (length(UPI2) == NumKeeps + NumOrders), + Answer1 = NoErrorInOrders andalso OrdersOK andalso UPI2LengthOK, + catch ?REACT({simple, ?LINE, + [{sane, answer1,Answer1, + author1,_Author1, upi1,UPI1, repair1,Repair1, + author2,Author2, upi2,UPI2, + keepsdels,KeepsDels, orders,Orders, numKeeps,NumKeeps, + numOrders,NumOrders, answer1,Answer1}]}), + if not Answer1 -> + ?RETURN2(Answer1); + true -> + if Orders == [] -> + %% No repairing have joined UPI2. Keep original answer. + ?RETURN2(Answer1); + Author2 == undefined -> + %% At least one Repairing1 element is now in UPI2. + %% We need Author2 to make better decision. Go + %% with what we know, silly caller for not giving + %% us what we need. + ?RETURN2(Answer1); + Author2 /= undefined -> + %% At least one Repairing1 element is now in UPI2. + %% We permit only the tail to author such a UPI2. + case catch(lists:last(UPI1)) of + UPI1_tail when UPI1_tail == Author2 -> + ?RETURN2(true); + UPI1_tail -> + ?RETURN2({expected_author2,UPI1_tail}) + end + end + end. + +%% @doc Check if a projection transition is sane & safe: we assume +%% that the caller has checked basic projection data structure contents. +%% +%% NOTE: The return value convention is `true' for sane/safe and `term() /= +%% true' for any unsafe/insane value. This function (and its callee +%% functions) are the only functions (throughout all of the chain state +%% transition sanity checking functions) that is allowed to return `false'. + +chain_state_transition_is_sane(Author1, UPI1, Repair1, Author2, UPI2) -> + ToSelfOnly_p = if UPI2 == [Author2] -> true; + true -> false + end, + Disjoint_UPIs = ordsets:is_disjoint(ordsets:from_list(UPI1), + ordsets:from_list(UPI2)), + %% This if statement contains the only exceptions that we make to + %% the judgement of simple_chain_state_transition_is_sane(). + if ToSelfOnly_p -> + %% The transition is to UPI2=[Author2]. + %% For AP mode, this transition is always safe (though not + %% always optimal for highest availability). + ?RETURN2(true); + Disjoint_UPIs -> + %% The transition from UPI1 -> UPI2 where the two are + %% disjoint/no FLUs in common. + %% For AP mode, this transition is always safe (though not + %% always optimal for highest availability). + ?RETURN2(true); + true -> + ?RETURN2( + simple_chain_state_transition_is_sane(Author1, UPI1, Repair1, + Author2, UPI2)) + end. + +%% @doc Create a 2-tuple that describes how `UPI1' + `Repair1' are +%% transformed into `UPI2' in a chain state change. +%% +%% The 1st part of the 2-tuple is a list of `keep' and `del' instructions, +%% relative to the items in UPI1 and whether they are present (`keep') or +%% absent (`del') in `UPI2'. +%% +%% The 2nd part of the 2-tuple is `list(non_neg_integer()|error)' that +%% describes the relative order of items in `Repair1' that appear in +%% `UPI2'. The `error' atom is used to denote items not present in +%% `Repair1'. + +mk(UPI1, Repair1, UPI2) -> + mk(UPI1, Repair1, UPI2, []). + +mk([X|UPI1], Repair1, [X|UPI2], Acc) -> + mk(UPI1, Repair1, UPI2, [keep|Acc]); +mk([X|UPI1], Repair1, UPI2, Acc) -> + mk(UPI1, Repair1, UPI2 -- [X], [del|Acc]); +mk([], [], [], Acc) -> + {lists:reverse(Acc), []}; +mk([], Repair1, UPI2, Acc) -> + {lists:reverse(Acc), machi_util:mk_order(UPI2, Repair1)}. + +scan_dir(Dir, FileFilterFun, FoldEachFun, FoldEachAcc) -> + Files = filelib:wildcard(Dir ++ "/*"), + Xs = [binary_to_term(element(2, file:read_file(File))) || File <- Files], + Xs2 = FileFilterFun(Xs), + lists:foldl(FoldEachFun, FoldEachAcc, Xs2). + +get_ps(#projection_v1{epoch_number=Epoch, dbg=Dbg}, Acc) -> + [{Epoch, proplists:get_value(ps, Dbg, [])}|Acc]. + +strip_dbg2(P) -> + P#projection_v1{dbg2=[stripped]}. + +has_not_sane(#projection_v1{epoch_number=Epoch, dbg2=Dbg2}, Acc) -> + Reacts = proplists:get_value(react, Dbg2, []), + case [X || {_State,_Line, [not_sane|_]}=X <- Reacts] of + [] -> + Acc; + Xs-> + [{Epoch, Xs}|Acc] + end. + +all_hosed_history(#projection_v1{epoch_number=_Epoch, flap=Flap}, + {OldAllHosed,Acc}) -> + AllHosed = if Flap == undefined -> + []; + true -> + Flap#flap_i.all_hosed + end, + if AllHosed == OldAllHosed -> + {OldAllHosed, Acc}; + true -> + {AllHosed, [AllHosed|Acc]} + end. diff --git a/src/machi_cr_client.erl b/src/machi_cr_client.erl index ce30722..5512e2d 100644 --- a/src/machi_cr_client.erl +++ b/src/machi_cr_client.erl @@ -108,6 +108,7 @@ -include("machi.hrl"). -include("machi_projection.hrl"). +-include("machi_verbose.hrl"). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl index 5628282..04e1022 100644 --- a/src/machi_flu1.erl +++ b/src/machi_flu1.erl @@ -70,6 +70,8 @@ -include("machi.hrl"). -include("machi_pb.hrl"). -include("machi_projection.hrl"). +-define(V(X,Y), ok). +%% -include("machi_verbose.hrl"). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -316,7 +318,8 @@ net_server_loop(Sock, S) -> %% TODO: Weird that sometimes neither catch nor try/catch %% can prevent OTP's SASL from logging an error here. %% Error in process <0.545.0> with exit value: {badarg,[{erlang,port_command,....... - _ = (catch gen_tcp:send(Sock, Resp)), timer:sleep(1000), + %% TODO: is this what causes the intermittent PULSE deadlock errors? + %% _ = (catch gen_tcp:send(Sock, Resp)), timer:sleep(1000), (catch gen_tcp:close(Sock)), exit(normal) end. diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl index d7a1e7e..4aeb4be 100644 --- a/src/machi_flu1_client.erl +++ b/src/machi_flu1_client.erl @@ -45,6 +45,11 @@ -include("machi_pb.hrl"). -include("machi_projection.hrl"). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-endif. + -define(HARD_TIMEOUT, 2500). -export([ diff --git a/src/machi_flu_psup.erl b/src/machi_flu_psup.erl index b02f64d..692f080 100644 --- a/src/machi_flu_psup.erl +++ b/src/machi_flu_psup.erl @@ -61,6 +61,16 @@ -behaviour(supervisor). +-include("machi_verbose.hrl"). + +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-define(SHUTDOWN, infinity). +-else. +-define(SHUTDOWN, 5000). +-endif. + %% External API -export([make_package_spec/4, start_flu_package/4, stop_flu_package/1]). %% Internal API @@ -73,7 +83,7 @@ make_package_spec(FluName, TcpPort, DataDir, Props) -> {FluName, {machi_flu_psup, start_link, [FluName, TcpPort, DataDir, Props]}, - permanent, 5000, supervisor, []}. + permanent, ?SHUTDOWN, supervisor, []}. start_flu_package(FluName, TcpPort, DataDir, Props) -> Spec = make_package_spec(FluName, TcpPort, DataDir, Props), @@ -103,15 +113,15 @@ init([FluName, TcpPort, DataDir, Props0]) -> ProjSpec = {ProjRegName, {machi_projection_store, start_link, [ProjRegName, DataDir, FluName]}, - permanent, 5000, worker, []}, + permanent, ?SHUTDOWN, worker, []}, MgrSpec = {make_mgr_supname(FluName), {machi_chain_manager1, start_link, [FluName, [], Props]}, - permanent, 5000, worker, []}, + permanent, ?SHUTDOWN, worker, []}, FluSpec = {FluName, {machi_flu1, start_link, [ [{FluName, TcpPort, DataDir}|Props] ]}, - permanent, 5000, worker, []}, + permanent, ?SHUTDOWN, worker, []}, {ok, {SupFlags, [ProjSpec, MgrSpec, FluSpec]}}. make_p_regname(FluName) when is_atom(FluName) -> diff --git a/src/machi_flu_sup.erl b/src/machi_flu_sup.erl index 5082b55..e4562d1 100644 --- a/src/machi_flu_sup.erl +++ b/src/machi_flu_sup.erl @@ -28,6 +28,16 @@ -behaviour(supervisor). +-include("machi_verbose.hrl"). + +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-define(SHUTDOWN, infinity). +-else. +-define(SHUTDOWN, 5000). +-endif. + %% API -export([start_link/0]). @@ -45,10 +55,17 @@ init([]) -> MaxSecondsBetweenRestarts = 3600, SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts}, - Ps = application:get_env(machi, initial_flus, []), + Ps = get_initial_flus(), FLU_specs = [machi_flu_psup:make_package_spec(FluName, TcpPort, DataDir, Props) || {FluName, TcpPort, DataDir, Props} <- Ps], {ok, {SupFlags, FLU_specs}}. +-ifdef(PULSE). +get_initial_flus() -> + []. +-else. % PULSE +get_initial_flus() -> + application:get_env(machi, initial_flus, []). +-endif. % PULSE diff --git a/src/machi_pb_translate.erl b/src/machi_pb_translate.erl index ea98289..adab3cf 100644 --- a/src/machi_pb_translate.erl +++ b/src/machi_pb_translate.erl @@ -26,6 +26,11 @@ -include("machi_pb.hrl"). -include("machi_projection.hrl"). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-endif. + -export([from_pb_request/1, from_pb_response/1, to_pb_request/2, @@ -180,7 +185,7 @@ from_pb_request(#mpb_request{req_id=ReqID, from_pb_request(#mpb_request{req_id=ReqID}) -> {ReqID, {high_error, 999966, "Unknown request"}}; from_pb_request(_Else) -> - io:format(user, "\nRRR from_pb_request(~p)\n", [_Else]), timer:sleep(2000), + io:format(user, "\nRRR from_pb_request(~p)\n", [_Else]), %%timer:sleep(2000), {<<>>, {high_error, 999667, "Unknown PB request"}}. from_pb_response(#mpb_ll_response{ diff --git a/src/machi_projection.erl b/src/machi_projection.erl index 3d94cb1..67dee72 100644 --- a/src/machi_projection.erl +++ b/src/machi_projection.erl @@ -35,7 +35,7 @@ %% @doc Create a new projection record. -new(MyName, MemberDict, UPI_list, Down_list, Repairing_list, Ps) -> +new(MyName, MemberDict, Down_list, UPI_list, Repairing_list, Ps) -> new(0, MyName, MemberDict, Down_list, UPI_list, Repairing_list, Ps). %% @doc Create a new projection record. @@ -141,15 +141,26 @@ compare(#projection_v1{epoch_number=E1}, %% @doc Create a proplist-style summary of a projection record. make_summary(#projection_v1{epoch_number=EpochNum, + epoch_csum= <<_CSum4:4/binary, _/binary>>=_CSum, all_members=_All_list, down=Down_list, author_server=Author, upi=UPI_list, repairing=Repairing_list, + inner=Inner, + flap=Flap, dbg=Dbg, dbg2=Dbg2}) -> + InnerInfo = if is_record(Inner, projection_v1) -> + [{inner, make_summary(Inner)}]; + true -> + [] + end, [{epoch,EpochNum},{author,Author}, - {upi,UPI_list},{repair,Repairing_list},{down,Down_list}, - {d,Dbg}, {d2,Dbg2}]. + {upi,UPI_list},{repair,Repairing_list},{down,Down_list}] ++ + InnerInfo ++ + [{flap, Flap}] ++ + %% [{flap, lists:flatten(io_lib:format("~p", [Flap]))}] ++ + [{d,Dbg}, {d2,Dbg2}]. %% @doc Make a `p_srvr_dict()' out of a list of `p_srvr()' or out of a %% `p_srvr_dict()'. diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl index 4c2664b..382d882 100644 --- a/src/machi_projection_store.erl +++ b/src/machi_projection_store.erl @@ -41,6 +41,13 @@ -module(machi_projection_store). -include("machi_projection.hrl"). +-define(V(X,Y), ok). +%% -include("machi_verbose.hrl"). + +%% -ifdef(PULSE). +%% -compile({parse_transform, pulse_instrument}). +%% -include_lib("pulse_otp/include/pulse_otp.hrl"). +%% -endif. %% API -export([ diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl index 6657623..eb3adf8 100644 --- a/src/machi_proxy_flu1_client.erl +++ b/src/machi_proxy_flu1_client.erl @@ -47,6 +47,10 @@ -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-endif. -endif. % TEST. -export([start_link/1]). diff --git a/src/machi_sup.erl b/src/machi_sup.erl index 5ffe918..d4ab71a 100644 --- a/src/machi_sup.erl +++ b/src/machi_sup.erl @@ -27,6 +27,14 @@ -behaviour(supervisor). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-include_lib("pulse_otp/include/pulse_otp.hrl"). +-define(SHUTDOWN, infinity). +-else. +-define(SHUTDOWN, 5000). +-endif. + %% API -export([start_link/0]). @@ -39,6 +47,8 @@ start_link() -> supervisor:start_link({local, ?SERVER}, ?MODULE, []). init([]) -> + %% {_, Ps} = process_info(self(), links), + %% [unlink(P) || P <- Ps], RestartStrategy = one_for_one, MaxRestarts = 1000, MaxSecondsBetweenRestarts = 3600, @@ -46,7 +56,7 @@ init([]) -> SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts}, Restart = permanent, - Shutdown = 5000, + Shutdown = ?SHUTDOWN, Type = supervisor, ServerSup = diff --git a/src/machi_util.erl b/src/machi_util.erl index 9557d53..b330dc6 100644 --- a/src/machi_util.erl +++ b/src/machi_util.erl @@ -38,7 +38,11 @@ info_msg/2, verb/1, verb/2, mbytes/1, %% TCP protocol helpers - connect/2, connect/3 + connect/2, connect/3, + %% List twiddling + permutations/1, perms/1, + combinations/1, ordered_combinations/1, + mk_order/2 ]). -compile(export_all). @@ -301,3 +305,29 @@ escript_connect(Host, Port, Timeout) when is_integer(Port) -> {ok, Sock} = gen_tcp:connect(Host, Port, [{active,false}, {mode,binary}, {packet, raw}], Timeout), Sock. + +permutations(L) -> + perms(L). + +perms([]) -> [[]]; +perms(L) -> [[H|T] || H <- L, T <- perms(L--[H])]. + +combinations(L) -> + lists:usort(perms(L) ++ lists:append([ combinations(L -- [X]) || X <- L])). + +ordered_combinations(Master) -> + [L || L <- combinations(Master), is_ordered(L, Master)]. + +is_ordered(L, Reference) -> + L_order = mk_order(L, Reference), + lists:all(fun(X) -> is_integer(X) end, L_order) andalso + L_order == lists:sort(L_order). + +mk_order(UPI2, Repair1) -> + R1 = length(Repair1), + Repair1_order_d = orddict:from_list(lists:zip(Repair1, lists:seq(1, R1))), + UPI2_order = [case orddict:find(X, Repair1_order_d) of + {ok, Idx} -> Idx; + error -> error + end || X <- UPI2], + UPI2_order. diff --git a/test/legacy/chain_mgr_legacy.erl b/test/legacy/chain_mgr_legacy.erl new file mode 100644 index 0000000..d199531 --- /dev/null +++ b/test/legacy/chain_mgr_legacy.erl @@ -0,0 +1,359 @@ +%% ------------------------------------------------------------------- +%% +%% Machi: a small village of replicated files +%% +%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(chain_mgr_legacy). +-compile(export_all). + +-include("machi_projection.hrl"). +-include("machi_chain_manager.hrl"). + +-define(RETURN1(X), begin put(why1, [?LINE|get(why1)]), X end). + +%% This is old code, and it's broken. We keep it around in case +%% someone wants to execute the QuickCheck property +%% machi_chain_manager1_test:prop_compare_legacy_with_v2_chain_transition_check(). +%% In all counterexamples, this legacy code returns 'true' (i.e., the +%% state transition is OK) where the v2 new code correcly returns +%% 'false' (i.e. the state transition is BAD). Fun, good times. +%% Hooray about more systematic/mathematical reasoning, code +%% structure, and property-based testing. + +projection_transition_is_sane( + #projection_v1{epoch_number=Epoch1, + epoch_csum=CSum1, + creation_time=CreationTime1, + author_server=AuthorServer1, + all_members=All_list1, + down=Down_list1, + upi=UPI_list1, + repairing=Repairing_list1, + dbg=Dbg1} = P1, + #projection_v1{epoch_number=Epoch2, + epoch_csum=CSum2, + creation_time=CreationTime2, + author_server=AuthorServer2, + all_members=All_list2, + down=Down_list2, + upi=UPI_list2, + repairing=Repairing_list2, + dbg=Dbg2} = P2, + RelativeToServer, RetrospectiveP) -> + try + put(why1, []), + %% General notes: + %% + %% I'm making no attempt to be "efficient" here. All of these data + %% structures are small, and they're not called zillions of times per + %% second. + %% + %% The chain sequence/order checks at the bottom of this function aren't + %% as easy-to-read as they ought to be. However, I'm moderately confident + %% that it isn't buggy. TODO: refactor them for clarity. + + true = is_integer(Epoch1) andalso is_integer(Epoch2), + true = is_binary(CSum1) andalso is_binary(CSum2), + {_,_,_} = CreationTime1, + {_,_,_} = CreationTime2, + true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo type may change? + true = is_list(All_list1) andalso is_list(All_list2), + true = is_list(Down_list1) andalso is_list(Down_list2), + true = is_list(UPI_list1) andalso is_list(UPI_list2), + true = is_list(Repairing_list1) andalso is_list(Repairing_list2), + true = is_list(Dbg1) andalso is_list(Dbg2), + + true = Epoch2 > Epoch1, + All_list1 = All_list2, % todo will probably change + + %% No duplicates + true = lists:sort(Down_list2) == lists:usort(Down_list2), + true = lists:sort(UPI_list2) == lists:usort(UPI_list2), + true = lists:sort(Repairing_list2) == lists:usort(Repairing_list2), + + %% Disjoint-ness + true = lists:sort(All_list2) == lists:sort(Down_list2 ++ UPI_list2 ++ + Repairing_list2), + [] = [X || X <- Down_list2, not lists:member(X, All_list2)], + [] = [X || X <- UPI_list2, not lists:member(X, All_list2)], + [] = [X || X <- Repairing_list2, not lists:member(X, All_list2)], + DownS2 = sets:from_list(Down_list2), + UPIS2 = sets:from_list(UPI_list2), + RepairingS2 = sets:from_list(Repairing_list2), + true = sets:is_disjoint(DownS2, UPIS2), + true = sets:is_disjoint(DownS2, RepairingS2), + true = sets:is_disjoint(UPIS2, RepairingS2), + + %% Additions to the UPI chain may only be at the tail + UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2), + true = + if UPI_common_prefix == [] -> + if UPI_list1 == [] orelse UPI_list2 == [] -> + %% If the common prefix is empty, then one of the + %% inputs must be empty. + ?RETURN1(true); + true -> + %% Otherwise, we have a case of UPI changing from + %% one of these two situations: + %% + %% UPI_list1 -> UPI_list2 + %% ------------------------------------------------- + %% [d,c,b,a] -> [c,a] + %% [d,c,b,a] -> [c,a,repair_finished_added_to_tail]. + NotUPI2 = (Down_list2 ++ Repairing_list2), + case lists:prefix(UPI_list1 -- NotUPI2, UPI_list2) of + true -> + ?RETURN1(true); + false -> + %% Here's a possible failure scenario: + %% UPI_list1 -> UPI_list2 + %% Repairing_list1 -> Repairing_list2 + %% ----------------------------------- + %% [a,b,c] author=a -> [c,a] author=c + %% [] [b] + %% + %% ... where RelativeToServer=b. In this case, b + %% has been partitioned for a while and has only + %% now just learned of several epoch transitions. + %% If the author of both is also in the UPI of + %% both, then those authors would not have allowed + %% a bad transition, so we will assume this + %% transition is OK. + ?RETURN1( + lists:member(AuthorServer1, UPI_list1) + andalso + lists:member(AuthorServer2, UPI_list2) + ) + end + end; + true -> + ?RETURN1(true) + end, + true = lists:prefix(UPI_common_prefix, UPI_list1), + true = lists:prefix(UPI_common_prefix, UPI_list2), + UPI_1_suffix = UPI_list1 -- UPI_common_prefix, + UPI_2_suffix = UPI_list2 -- UPI_common_prefix, + _ = ?RETURN1(yo), + + MoreCheckingP = + RelativeToServer == undefined + orelse + not (lists:member(RelativeToServer, Down_list2) orelse + lists:member(RelativeToServer, Repairing_list2)), + _ = ?RETURN1(yo), + + UPIs_are_disjointP = ordsets:is_disjoint(ordsets:from_list(UPI_list1), + ordsets:from_list(UPI_list2)), + case UPI_2_suffix -- UPI_list1 of + [] -> + ?RETURN1(true); + [_|_] = _Added_by_2 -> + if RetrospectiveP -> + %% Any servers added to the UPI must be added from the + %% repairing list ... but in retrospective mode (where + %% we're checking only the transitions where all + %% UPI+repairing participants have unanimous private + %% projections!), and if we're under asymmetric + %% partition/churn, then we may not see the repairing + %% list. So we will not check that condition here. + ?RETURN1(true); + not RetrospectiveP -> + %% We're not retrospective. So, if some server was + %% added by to the UPI, then that means that it was + %% added by repair. And repair is coordinated by the + %% UPI tail/last. +%io:format(user, "g: UPI_list1=~w, UPI_list2=~w, UPI_2_suffix=~w, ", +% [UPI_list1, UPI_list2, UPI_2_suffix]), +%io:format(user, "g", []), + ?RETURN1(true = UPI_list1 == [] orelse + UPIs_are_disjointP orelse + (lists:last(UPI_list1) == AuthorServer2) ) + end + end, + + if not MoreCheckingP -> + ?RETURN1(ok); + MoreCheckingP -> + %% Where did elements in UPI_2_suffix come from? + %% Only two sources are permitted. + Oops_check_UPI_2_suffix = + [lists:member(X, Repairing_list1) % X added after repair done + orelse + lists:member(X, UPI_list1) % X in UPI_list1 after common pref + || X <- UPI_2_suffix], + %% Grrrrr, ok, so this check isn't good, at least at bootstrap time. + %% TODO: false = lists:member(false, Oops_check_UPI_2_suffix), + + %% The UPI_2_suffix must exactly be equal to: ordered items from + %% UPI_list1 concat'ed with ordered items from Repairing_list1. + %% Both temp vars below preserve relative order! + UPI_2_suffix_from_UPI1 = [X || X <- UPI_1_suffix, + lists:member(X, UPI_list2)], + UPI_2_suffix_from_Repairing1 = [X || X <- UPI_2_suffix, + lists:member(X, Repairing_list1)], + %% true? + UPI_2_concat = (UPI_2_suffix_from_UPI1 ++ UPI_2_suffix_from_Repairing1), + if UPI_2_suffix == UPI_2_concat -> + ?RETURN1(ok); + true -> + %% 'make dialyzer' will believe that this can never succeed. + %% 'make dialyzer-test' will not complain, however. + if RetrospectiveP -> + %% We are in retrospective mode. But there are + %% some transitions that are difficult to find + %% when standing outside of all of the FLUs and + %% examining their behavior. (In contrast to + %% this same function being called "in the path" + %% of a projection transition by a particular FLU + %% which knows exactly its prior projection and + %% exactly what it intends to do.) Perhaps this + %% exception clause here can go away with + %% better/more clever retrospection analysis? + %% + %% Here's a case that PULSE found: + %% FLU B: + %% E=257: UPI=[c,a], REPAIRING=[b] + %% E=284: UPI=[c,a], REPAIRING=[b] + %% FLU a: + %% E=251: UPI=[c], REPAIRING=[a,b] + %% E=284: UPI=[c,a], REPAIRING=[b] + %% FLU c: + %% E=282: UPI=[c], REPAIRING=[a,b] + %% E=284: UPI=[c,a], REPAIRING=[b] + %% + %% From the perspective of each individual FLU, + %% the unanimous transition at epoch #284 is + %% good. The repair that is done by FLU c -> a + %% is likewise good. + %% + %% From a retrospective point of view (and the + %% current implementation), there's a bad-looking + %% transition from epoch #269 to #284. This is + %% from the point of view of the last two + %% unanimous private projection store epochs: + %% + %% E=269: UPI=[c], REPAIRING=[], DOWN=[a,b] + %% E=284: UPI=[c,a], REPAIRING=[b] + %% + %% The retrospective view by + %% machi_chain_manager1_pulse.erl just can't + %% reason correctly about this situation. We + %% will instead rely on the non-retrospective + %% sanity checking that each FLU does before it + %% writes to its private projection store and + %% then adopts that projection (and unwedges + %% itself, etc etc). + + if UPIs_are_disjointP -> + ?RETURN1(true); + true -> + ?RETURN1(todo), + exit({todo, revisit, ?MODULE, ?LINE, + [ + {oops_check_UPI_2_suffix, Oops_check_UPI_2_suffix}, + {upi_2_suffix, UPI_2_suffix}, + {upi_2_concat, UPI_2_concat}, + {retrospectivep, RetrospectiveP} + ]}), + io:format(user, "|~p,~p TODO revisit|", + [?MODULE, ?LINE]), + ok + end; + true -> + %% The following is OK: We're shifting from a + %% normal projection to an inner one. The old + %% normal has a UPI that has nothing to do with + %% RelativeToServer a.k.a. me. + %% Or else the UPI_list1 is empty, and I'm + %% the only member of UPI_list2 + %% But the new/suffix is definitely me. + %% from: + %% {epoch,847},{author,c},{upi,[c]},{repair,[]}, + %% {down,[a,b,d]} + %% to: + %% {epoch,848},{author,a},{upi,[a]},{repair,[]}, + %% {down,[b,c,d]} + FirstCase_p = (UPI_2_suffix == [AuthorServer2]) + andalso + ((inner_projection_exists(P1) == false + andalso + inner_projection_exists(P2) == true) + orelse UPI_list1 == []), + + %% Here's another case that's alright: + %% + %% {a,{err,exit, + %% {upi_2_suffix_error,[c]}, .... + %% + %% from: + %% {epoch,937},{author,a},{upi,[a,b]},{repair,[]}, + %% {down,[c]} + %% to: + %% {epoch,943},{author,a},{upi,{a,b,c},{repair,[]}, + %% {down,[]} + + %% The author server doesn't matter. However, + %% there were two other epochs in between, 939 + %% and 941, where there wasn't universal agreement + %% of private projections. The repair controller + %% at the tail, 'b', had decided that the repair + %% of 'c' was finished @ epoch 941. + SecondCase_p = ((UPI_2_suffix -- Repairing_list1) + == []), + if FirstCase_p -> + ?RETURN1(true); + SecondCase_p -> + ?RETURN1(true); + UPIs_are_disjointP -> + %% If there's no overlap at all between + %% UPI_list1 & UPI_list2, then we're OK + %% here. + ?RETURN1(true); + true -> + exit({upi_2_suffix_error, UPI_2_suffix}) + end + end + end + end, + ?RETURN1(true) + catch + _Type:_Err -> + ?RETURN1(oops), + S1 = machi_projection:make_summary(P1), + S2 = machi_projection:make_summary(P2), + Trace = erlang:get_stacktrace(), + {err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer, + history, (catch lists:sort([no_history])), + stack, Trace} + end. + +find_common_prefix([], _) -> + []; +find_common_prefix(_, []) -> + []; +find_common_prefix([H|L1], [H|L2]) -> + [H|find_common_prefix(L1, L2)]; +find_common_prefix(_, _) -> + []. + +inner_projection_exists(#projection_v1{inner=undefined}) -> + false; +inner_projection_exists(#projection_v1{inner=_}) -> + true. diff --git a/test/machi_chain_manager1_converge_demo.erl b/test/machi_chain_manager1_converge_demo.erl index b91b333..520ace1 100644 --- a/test/machi_chain_manager1_converge_demo.erl +++ b/test/machi_chain_manager1_converge_demo.erl @@ -154,9 +154,10 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> %% Faster test startup, commented: timer:sleep(3000), TcpPort = 62877, - FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}, - {c,TcpPort+2,"./data.c"}, {d,TcpPort+3,"./data.d"}, - {e,TcpPort+4,"./data.e"}, {f,TcpPort+5,"./data.f"}], + ok = filelib:ensure_dir("/tmp/c/not-used"), + FluInfo = [{a,TcpPort+0,"/tmp/c/data.a"}, {b,TcpPort+1,"/tmp/c/data.b"}, + {c,TcpPort+2,"/tmp/c/data.c"}, {d,TcpPort+3,"/tmp/c/data.d"}, + {e,TcpPort+4,"/tmp/c/data.e"}, {f,TcpPort+5,"/tmp/c/data.f"}], FLU_biglist = [X || {X,_,_} <- FluInfo], All_list = lists:sublist(FLU_biglist, NumFLUs), io:format(user, "\nSET # of FLUs = ~w members ~w).\n", @@ -175,7 +176,6 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> {Name, PPid} end || {#p_srvr{name=Name}=P, _Dir} <- PsDirs], MembersDict = machi_projection:make_members_dict(Ps), - %% MgrOpts = [private_write_verbose, {active_mode,false}, MgrOpts = MgrOpts0 ++ ?DEFAULT_MGR_OPTS, MgrNamez = [begin @@ -225,48 +225,64 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> machi_partition_simulator:reset_thresholds(10, 50), io:format(user, "\nLet loose the dogs of war!\n", []), - DoIt(30, 0, 0), + [DoIt(30, 0, 0) || _ <- lists:seq(1,2)], AllPs = make_partition_list(All_list), PartitionCounts = lists:zip(AllPs, lists:seq(1, length(AllPs))), - FLUFudge = if NumFLUs < 4 -> - 2; - true -> - 13 - end, + MaxIters = NumFLUs * (NumFLUs + 1) * 6, [begin machi_partition_simulator:always_these_partitions(Partition), io:format(user, "\nSET partitions = ~w (~w of ~w) at ~w\n", [Partition, Count, length(AllPs), time()]), - [DoIt(40, 10, 50) || _ <- lists:seq(0, trunc(NumFLUs*FLUFudge)) ], - - {stable,true} = {stable,private_projections_are_stable(Namez, DoIt)}, + true = lists:foldl( + fun(_, true) -> + true; + (_, _) -> + %% Run a few iterations + [DoIt(10, 10, 50) || _ <- lists:seq(1, 6)], + %% If stable, return true to short circuit remaining + private_projections_are_stable(Namez, DoIt) + end, false, lists:seq(0, MaxIters)), io:format(user, "\nSweet, private projections are stable\n", []), -io:format(user, "Rolling sanity check ... ", []), -PrivProjs = [{Name, begin - {ok, Ps8} = ?FLU_PC:get_all_projections(FLU, private, - infinity), - Ps9 = if length(Ps8) < 5*1000 -> - Ps8; - true -> - io:format(user, "trunc a bit... ", []), - lists:nthtail(3*1000, Ps8) - end, - [P || P <- Ps9, - P#projection_v1.epoch_number /= 0] - end} || {Name, FLU} <- Namez], -try - [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} || - {FLU, Psx} <- PrivProjs] -catch _Err:_What -> - io:format(user, "PrivProjs ~p\n", [PrivProjs]), - exit({line, ?LINE, _Err, _What}) -end, -io:format(user, "Yay!\n", []), + io:format(user, "Rolling sanity check ... ", []), + MaxFiles = 3*1000, + PrivProjs = [{Name, begin + {ok, Ps8} = ?FLU_PC:get_all_projections( + FLU, private, infinity), + Ps9 = if length(Ps8) < MaxFiles -> + Ps8; + true -> + lists:nthtail(MaxFiles, Ps8) + end, + [P || P <- Ps9, + P#projection_v1.epoch_number /= 0] + end} || {Name, FLU} <- Namez], + try + [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} || + {FLU, Psx} <- PrivProjs] + catch _Err:_What -> + io:format(user, "PrivProjs ~p\n", [PrivProjs]), + exit({line, ?LINE, _Err, _What}) + end, + io:format(user, "Yay!\n", []), + ReportXX = machi_chain_manager1_test:unanimous_report(Namez), + true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), + io:format(user, "Yay for ReportXX!\n", []), + + [begin + Privs = filelib:wildcard(Dir ++ "/projection/private/*"), + FilesToDel1 = lists:sublist(Privs, + max(0, length(Privs)-MaxFiles)), + [_ = file:delete(File) || File <- FilesToDel1], + Pubs = filelib:wildcard(Dir ++ "/projection/public/*"), + FilesToDel2 = lists:sublist(Pubs, + max(0, length(Pubs)-MaxFiles)), + [_ = file:delete(File) || File <- FilesToDel2] + end || Dir <- filelib:wildcard("/tmp/c/data*")], + timer:sleep(1250), ok end || {Partition, Count} <- PartitionCounts ], - %% exit(end_experiment), io:format(user, "\nSET partitions = []\n", []), io:format(user, "We should see convergence to 1 correct chain.\n", []), @@ -287,6 +303,7 @@ io:format(user, "Yay!\n", []), %% members appear in only one unique chain, i.e., the sets of %% unique chains are disjoint. true = machi_chain_manager1_test:all_reports_are_disjoint(Report), + %% io:format(user, "\nLast Reports: ~p\n", [lists:nthtail(length(Report)-8,Report)]), %% For each chain transition experienced by a particular FLU, %% confirm that each state transition is OK. @@ -334,10 +351,12 @@ make_partition_list(All_list) -> A <- All_list, B <- All_list, A /= B, C <- All_list, D <- All_list, C /= D, X /= A, X /= C, A /= C], - %% Concat = _X_Ys1 ++ _X_Ys2. - %% Concat = _X_Ys3. - Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3, - random_sort(lists:usort([lists:sort(L) || L <- Concat])). + %% Concat = _X_Ys1, + %% Concat = _X_Ys2, + %% Concat = _X_Ys1 ++ _X_Ys2, + %% %% Concat = _X_Ys3, + %% %% Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3, + %% random_sort(lists:usort([lists:sort(L) || L <- Concat])). %% [ [{a,b},{b,d},{c,b}], %% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], @@ -348,17 +367,29 @@ make_partition_list(All_list) -> %% [ [{a,b}, {b,c}], %% [{a,b}, {c,b}] ]. - %% [{a,b}, {b,c}] ]. %% hosed-not-equal @ 3 FLUs + %% [ [{a,b}, {b,c}] ]. %% hosed-not-equal @ 3 FLUs - %% [{b,d}] ]. + %% [ [{b,d}] ]. + + %% [ [{a,b}], [], [{a,b}], [], [{a,b}] ]. + + %% [ + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{b,a},{d,e}], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [] + %% ]. %% [ [{a,b}, {b,a}] ]. %% [ [{a,b},{b,c},{c,a}], %% [{a,b}, {b,a}, {a,c},{c,a}] ]. - %% [{a,b}, {c,b}], - %% [{a,b}, {b,c}] ]. + %% [ [{a,b}, {c,b}], + %% [{a,b}, {b,c}] ]. %% [ [{a,b}, {b,c}, {c,d}], %% [{a,b}, {b,c},{b,d}, {c,d}], @@ -366,23 +397,34 @@ make_partition_list(All_list) -> %% [{a,b}, {c,b}, {c,d}], %% [{a,b}, {b,c}, {d,c}] ]. + [ + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + [{a,b}], [], [{a,b}], [], [{a,b}] + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{b,a},{d,e}], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [] + ]. %% [ [{a,b}, {b,c}, {c,d}, {d,e}], %% [{b,a}, {b,c}, {c,d}, {d,e}], %% [{a,b}, {c,b}, {c,d}, {d,e}], %% [{a,b}, {b,c}, {d,c}, {d,e}], %% [{a,b}, {b,c}, {c,d}, {e,d}] ]. - %% [ [{c,a}] ]. + %% [ [{c,a}] ]. %% TODO double-check for total repair stability at SET=[]!! - %% [ [{c,a}], [{c,b}, {a, b}] ]. + %% [ [{c,a}], + %% [{c,b}, {a, b}] ]. %% [ [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}], %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {b,c}], %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {c,d}] ]. - %% [ [{a,b}], - %% [{a,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], + %% [ [{a,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], %% [{a,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}], + %% [{a,b}], %% [{a,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}], %% [{a,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]. @@ -405,10 +447,10 @@ private_projections_are_stable(Namez, PollFunc) -> if Private1 == Private2 -> ok; true -> - io:format(user, "Oops: Private1: ~p\n", [Private1]), - io:format(user, "Oops: Private2: ~p\n", [Private2]) + io:format(user, "Private1: ~p, ", [Private1]), + io:format(user, "Private2: ~p, ", [Private2]) end, - true = (Private1 == Private2). + Private1 == Private2. get_latest_inner_proj_summ(FLU) -> {ok, Proj} = ?FLU_PC:read_latest_projection(FLU, private), diff --git a/test/machi_chain_manager1_converge_demo.erl.orig b/test/machi_chain_manager1_converge_demo.erl.orig new file mode 100644 index 0000000..24138c5 --- /dev/null +++ b/test/machi_chain_manager1_converge_demo.erl.orig @@ -0,0 +1,462 @@ +%% ------------------------------------------------------------------- +%% +%% Machi: a small village of replicated files +%% +%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(machi_chain_manager1_converge_demo). + +-include("machi.hrl"). +-include("machi_projection.hrl"). + +-define(MGR, machi_chain_manager1). + +-define(D(X), io:format(user, "~s ~p\n", [??X, X])). +-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])). +-define(FLU_C, machi_flu1_client). +-define(FLU_PC, machi_proxy_flu1_client). + +-compile(export_all). + +-ifdef(TEST). +-ifndef(PULSE). + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +%% -include_lib("eqc/include/eqc_statem.hrl"). +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). +-endif. + +-include_lib("eunit/include/eunit.hrl"). + +short_doc() -> +" +A visualization of the convergence behavior of the chain self-management +algorithm for Machi. + 1. Set up 4 FLUs and chain manager pairs. + 2. Create a number of different network partition scenarios, where + (simulated) partitions may be symmetric or asymmetric. Then halt changing + the partitions and keep the simulated network stable and broken. + 3. Run a number of iterations of the algorithm in parallel by poking each + of the manager processes on a random'ish basis. + 4. Afterward, fetch the chain transition changes made by each FLU and + verify that no transition was unsafe. + +During the iteration periods, the following is a cheatsheet for the output. +See the internal source for interpreting the rest of the output. + + 'Let loose the dogs of war!' Network instability + 'SET partitions = ' Network stability (but broken) + 'x uses:' The FLU x has made an internal state transition. The rest of + the line is a dump of internal state. + '{t}' This is a tick event which triggers one of the manager processes + to evaluate its environment and perhaps make a state transition. + +A long chain of '{t}{t}{t}{t}' means that the chain state has settled +to a stable configuration, which is the goal of the algorithm. +Press control-c to interrupt....". + +long_doc() -> + " +'Let loose the dogs of war!' + + The simulated network is very unstable for a few seconds. + +'x uses' + + After a single iteration, server x has determined that the chain + should be defined by the upi, repair, and down list in this record. + If all participants reach the same conclusion at the same epoch + number (and checksum, see next item below), then the chain is + stable, fully configured, and can provide full service. + +'epoch,E' + + The epoch number for this decision is E. The checksum of the full + record is not shown. For purposes of the protocol, a server will + 'wedge' itself and refuse service (until a new config is chosen) + whenever: a). it sees a bigger epoch number mentioned somewhere, or + b). it sees the same epoch number but a different checksum. In case + of b), there was a network partition that has healed, and both sides + had chosen to operate with an identical epoch number but different + chain configs. + +'upi', 'repair', and 'down' + + Members in the chain that are fully in sync and thus preserving the + Update Propagation Invariant, up but under repair (simulated), and + down, respectively. + +'ps,[some list]' + + The list of asymmetric network partitions. {a,b} means that a + cannot send to b, but b can send to a. + + This partition list is recorded for debugging purposes but is *not* + used by the algorithm. The algorithm only 'feels' its effects via + simulated timeout whenever there's a partition in one of the + messaging directions. + +'nodes_up,[list]' + + The best guess right now of which ndoes are up, relative to the + author node, specified by '{author,X}' + +'SET partitions = [some list]' + + All subsequent iterations should have a stable list of partitions, + i.e. the 'ps' list described should be stable. + +'{FLAP: x flaps n}!' + + Server x has detected that it's flapping/oscillating after iteration + n of a naive/1st draft detection algorithm. +". + +%% convergence_demo_test_() -> +%% {timeout, 98*300, fun() -> convergence_demo_testfun() end}. + +%% convergence_demo_testfun() -> +%% convergence_demo_testfun(3). + +-define(DEFAULT_MGR_OPTS, [{private_write_verbose, false}, + {active_mode,false}, + {use_partition_simulator, true}]). + +t() -> + t(3). + +t(N) -> + t(N, ?DEFAULT_MGR_OPTS). + +t(N, MgrOpts) -> + convergence_demo_testfun(N, MgrOpts). + +convergence_demo_testfun(NumFLUs, MgrOpts0) -> + timer:sleep(100), + %% Faster test startup, commented: io:format(user, short_doc(), []), + %% Faster test startup, commented: timer:sleep(3000), + + TcpPort = 62877, + ok = filelib:ensure_dir("/tmp/c/not-used"), + FluInfo = [{a,TcpPort+0,"/tmp/c/data.a"}, {b,TcpPort+1,"/tmp/c/data.b"}, + {c,TcpPort+2,"/tmp/c/data.c"}, {d,TcpPort+3,"/tmp/c/data.d"}, + {e,TcpPort+4,"/tmp/c/data.e"}, {f,TcpPort+5,"/tmp/c/data.f"}], + FLU_biglist = [X || {X,_,_} <- FluInfo], + All_list = lists:sublist(FLU_biglist, NumFLUs), + io:format(user, "\nSET # of FLUs = ~w members ~w).\n", + [NumFLUs, All_list]), + machi_partition_simulator:start_link({111,222,33}, 0, 100), + _ = machi_partition_simulator:get(All_list), + + Ps = [#p_srvr{name=Name,address="localhost",port=Port} || + {Name,Port,_Dir} <- lists:sublist(FluInfo, NumFLUs)], + PsDirs = lists:zip(Ps, + [Dir || {_,_,Dir} <- lists:sublist(FluInfo, NumFLUs)]), + FLU_pids = [machi_flu1_test:setup_test_flu(Name, Port, Dir) || + {#p_srvr{name=Name,port=Port}, Dir} <- PsDirs], + Namez = [begin + {ok, PPid} = ?FLU_PC:start_link(P), + {Name, PPid} + end || {#p_srvr{name=Name}=P, _Dir} <- PsDirs], + MembersDict = machi_projection:make_members_dict(Ps), + MgrOpts = MgrOpts0 ++ ?DEFAULT_MGR_OPTS, + MgrNamez = + [begin + {ok, MPid} = ?MGR:start_link(P#p_srvr.name, MembersDict, MgrOpts), + {P#p_srvr.name, MPid} + end || P <- Ps], + + try + [{_, Ma}|_] = MgrNamez, + {ok, P1} = ?MGR:test_calc_projection(Ma, false), + [ok = ?FLU_PC:write_projection(FLUPid, public, P1) || + {_, FLUPid} <- Namez, FLUPid /= Ma], + + machi_partition_simulator:reset_thresholds(10, 50), + _ = machi_partition_simulator:get(All_list), + + Parent = self(), + DoIt = fun(Iters, S_min, S_max) -> + %% io:format(user, "\nDoIt: top\n\n", []), + io:format(user, "DoIt, ", []), + Pids = [spawn(fun() -> + random:seed(now()), + [begin + erlang:yield(), + S_max_rand = random:uniform( + S_max + 1), + %% io:format(user, "{t}", []), + Elapsed = + ?MGR:sleep_ranked_order( + S_min, S_max_rand, + M_name, All_list), + _ = ?MGR:test_react_to_env(MMM), + %% Be more unfair by not + %% sleeping here. + % timer:sleep(S_max - Elapsed), + Elapsed + end || _ <- lists:seq(1, Iters)], + Parent ! done + end) || {M_name, MMM} <- MgrNamez ], + [receive + done -> + ok + after 120*1000 -> + exit(icky_timeout) + end || _ <- Pids] + end, + + machi_partition_simulator:reset_thresholds(10, 50), + io:format(user, "\nLet loose the dogs of war!\n", []), + [DoIt(30, 0, 0) || _ <- lists:seq(1,2)], + AllPs = make_partition_list(All_list), + PartitionCounts = lists:zip(AllPs, lists:seq(1, length(AllPs))), + FLUFudge = if NumFLUs < 4 -> + 2; + true -> + 2 + %% 13 + end, + [begin + machi_partition_simulator:always_these_partitions(Partition), + io:format(user, "\nSET partitions = ~w (~w of ~w) at ~w\n", + [Partition, Count, length(AllPs), time()]), + %% [DoIt(40, 10, 50) || _ <- lists:seq(0, trunc(NumFLUs*FLUFudge)) ], + [DoIt(20, 10, 50) || _ <- lists:seq(0, trunc(NumFLUs*FLUFudge)*2) ], + + {stable,true} = {stable,private_projections_are_stable(Namez, DoIt)}, + io:format(user, "\nSweet, private projections are stable\n", []), + io:format(user, "Rolling sanity check ... ", []), + PrivProjs = [{Name, begin + {ok, Ps8} = ?FLU_PC:get_all_projections(FLU, private, + infinity), + Max = 3*1000, + Ps9 = if length(Ps8) < Max -> + Ps8; + true -> + NumToDel = length(Ps8) - Max, + io:format(user, "trunc a bit... ", []), + [begin + FilesToDel = lists:sublist(filelib:wildcard(Dir ++ "/projection/private/*"), NumToDel), + [_ = file:delete(File) || File <- FilesToDel] + end || Dir <- filelib:wildcard("/tmp/c/data*")], + lists:nthtail(Max, Ps8) + end, + [P || P <- Ps9, + P#projection_v1.epoch_number /= 0] + end} || {Name, FLU} <- Namez], + try + [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} || + {FLU, Psx} <- PrivProjs] + catch _Err:_What -> + io:format(user, "PrivProjs ~p\n", [PrivProjs]), + exit({line, ?LINE, _Err, _What}) + end, + io:format(user, "Yay!\n", []), + ReportXX = machi_chain_manager1_test:unanimous_report(Namez), + io:format(user, "ReportXX ~P\n", [ReportXX, 30]), + true = machi_chain_manager1_test:all_reports_are_disjoint(ReportXX), + io:format(user, "Yay for ReportXX!\n", []), + timer:sleep(1250), + ok + end || {Partition, Count} <- PartitionCounts + ], + + io:format(user, "\nSET partitions = []\n", []), + io:format(user, "We should see convergence to 1 correct chain.\n", []), + machi_partition_simulator:no_partitions(), + [DoIt(50, 10, 50) || _ <- [1]], + true = private_projections_are_stable(Namez, DoIt), + io:format(user, "~s\n", [os:cmd("date")]), + + %% We are stable now ... analyze it. + + %% Create a report where at least one FLU has written a + %% private projection. + Report = machi_chain_manager1_test:unanimous_report(Namez), + %% ?D(Report), + + %% Report is ordered by Epoch. For each private projection + %% written during any given epoch, confirm that all chain + %% members appear in only one unique chain, i.e., the sets of + %% unique chains are disjoint. + true = machi_chain_manager1_test:all_reports_are_disjoint(Report), + %% io:format(user, "\nLast Reports: ~p\n", [lists:nthtail(length(Report)-8,Report)]), + + %% For each chain transition experienced by a particular FLU, + %% confirm that each state transition is OK. + PrivProjs = [{Name, begin + {ok, Ps9} = ?FLU_PC:get_all_projections(FLU, + private), + [P || P <- Ps9, + P#projection_v1.epoch_number /= 0] + end} || {Name, FLU} <- Namez], + try + [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} || + {FLU, Psx} <- PrivProjs], + io:format(user, "\nAll sanity checks pass, hooray!\n", []) + catch _Err:_What -> + io:format(user, "Report ~p\n", [Report]), + io:format(user, "PrivProjs ~p\n", [PrivProjs]), + exit({line, ?LINE, _Err, _What}) + end, + %% ?D(R_Projs), + + ok + catch + XX:YY -> + io:format(user, "BUMMER ~p ~p @ ~p\n", + [XX, YY, erlang:get_stacktrace()]), + exit({bummer,XX,YY}) + after + [ok = ?MGR:stop(MgrPid) || {_, MgrPid} <- MgrNamez], + [ok = ?FLU_PC:quit(PPid) || {_, PPid} <- Namez], + [ok = machi_flu1:stop(FLUPid) || FLUPid <- FLU_pids], + ok = machi_partition_simulator:stop() + end. + +%% Many of the static partition lists below have been problematic at one +%% time or another..... +%% +%% Uncomment *one* of the following make_partition_list() bodies. + +make_partition_list(All_list) -> + _X_Ys1 = [[{X,Y}] || X <- All_list, Y <- All_list, X /= Y], + _X_Ys2 = [[{X,Y}, {A,B}] || X <- All_list, Y <- All_list, X /= Y, + A <- All_list, B <- All_list, A /= B, + X /= A], + _X_Ys3 = [[{X,Y}, {A,B}, {C,D}] || X <- All_list, Y <- All_list, X /= Y, + A <- All_list, B <- All_list, A /= B, + C <- All_list, D <- All_list, C /= D, + X /= A, X /= C, A /= C], + %% Concat = _X_Ys1, + %% Concat = _X_Ys2, + %% Concat = _X_Ys1 ++ _X_Ys2, + %% %% Concat = _X_Ys3, + %% %% Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3, + %% random_sort(lists:usort([lists:sort(L) || L <- Concat])). + + %% [ [{a,b},{b,d},{c,b}], + %% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], + %% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}], + %% [{a,b},{b,d},{c,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}], + %% [{a,b},{b,d},{c,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]. + + %% [ [{a,b}, {b,c}], + %% [{a,b}, {c,b}] ]. + + %% [ [{a,b}, {b,c}] ]. %% hosed-not-equal @ 3 FLUs + + %% [ [{b,d}] ]. + + %% [ [{a,b}], [], [{a,b}], [], [{a,b}] ]. + + %% [ + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{b,a},{d,e}], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [] + %% ]. + + %% [ [{a,b}, {b,a}] ]. + + %% [ [{a,b},{b,c},{c,a}], + %% [{a,b}, {b,a}, {a,c},{c,a}] ]. + + %% [ [{a,b}, {c,b}], + %% [{a,b}, {b,c}] ]. + + %% [ [{a,b}, {b,c}, {c,d}], + %% [{a,b}, {b,c},{b,d}, {c,d}], + %% [{b,a}, {b,c}, {c,d}], + %% [{a,b}, {c,b}, {c,d}], + %% [{a,b}, {b,c}, {d,c}] ]. + + [ + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + [{a,b}], [], [{a,b}], [], [{a,b}] + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], + %% [{b,a},{d,e}], + %% [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [], [{a,b}], [] + ]. + %% [ [{a,b}, {b,c}, {c,d}, {d,e}], + %% [{b,a}, {b,c}, {c,d}, {d,e}], + %% [{a,b}, {c,b}, {c,d}, {d,e}], + %% [{a,b}, {b,c}, {d,c}, {d,e}], + %% [{a,b}, {b,c}, {c,d}, {e,d}] ]. + + %% [ [{c,a}] ]. %% TODO double-check for total repair stability at SET=[]!! + + %% [ [{c,a}], + %% [{c,b}, {a, b}] ]. + + %% [ [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}], + %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {b,c}], + %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {c,d}] ]. + +% [ [{a,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}], +% [{a,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}], +% [{a,b}], +% [{a,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}], +% [{a,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]. + +todo_why_does_this_crash_sometimes(FLUName, FLU, PPPepoch) -> + try + {ok, _}=Res = ?FLU_PC:read_projection(FLU, public, PPPepoch), + Res + catch _:_ -> + io:format(user, "QQQ Whoa, it crashed this time for ~p at epoch ~p\n", + [FLUName, PPPepoch]), + timer:sleep(1000), + exit(still_a_problem), + ?FLU_PC:read_projection(FLU, public, PPPepoch) + end. + +private_projections_are_stable(Namez, PollFunc) -> + Private1 = [get_latest_inner_proj_summ(FLU) || {_Name, FLU} <- Namez], + PollFunc(5, 1, 10), + Private2 = [get_latest_inner_proj_summ(FLU) || {_Name, FLU} <- Namez], + if Private1 == Private2 -> + ok; + true -> + io:format(user, "Oops: Private1: ~p\n", [Private1]), + io:format(user, "Oops: Private2: ~p\n", [Private2]) + end, + true = (Private1 == Private2). + +get_latest_inner_proj_summ(FLU) -> + {ok, Proj} = ?FLU_PC:read_latest_projection(FLU, private), + #projection_v1{epoch_number=E, upi=UPI, repairing=Repairing, down=Down} = + machi_chain_manager1:inner_projection_or_self(Proj), + {E, UPI, Repairing, Down}. + +random_sort(L) -> + random:seed(now()), + L1 = [{random:uniform(99999), X} || X <- L], + [X || {_, X} <- lists:sort(L1)]. + +-endif. % !PULSE +-endif. % TEST diff --git a/test/machi_chain_manager1_converge_demo.erl.rej b/test/machi_chain_manager1_converge_demo.erl.rej new file mode 100644 index 0000000..0b4eeae --- /dev/null +++ b/test/machi_chain_manager1_converge_demo.erl.rej @@ -0,0 +1,21 @@ +*************** +*** 348,356 **** + X /= A, X /= C, A /= C], + %% Concat = _X_Ys1, + %% Concat = _X_Ys2, +- Concat = _X_Ys1 ++ _X_Ys2, + %% Concat = _X_Ys3, +- %% Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3, + random_sort(lists:usort([lists:sort(L) || L <- Concat])). + + %% [ [{a,b},{b,d},{c,b}], +--- 353,361 ---- + X /= A, X /= C, A /= C], + %% Concat = _X_Ys1, + %% Concat = _X_Ys2, ++ %% Concat = _X_Ys1 ++ _X_Ys2, + %% Concat = _X_Ys3, ++ Concat = _X_Ys1 ++ _X_Ys2 ++ _X_Ys3, + random_sort(lists:usort([lists:sort(L) || L <- Concat])). + + %% [ [{a,b},{b,d},{c,b}], diff --git a/test/machi_chain_manager1_pulse.erl b/test/machi_chain_manager1_pulse.erl index 9611dcc..12e7842 100644 --- a/test/machi_chain_manager1_pulse.erl +++ b/test/machi_chain_manager1_pulse.erl @@ -2,7 +2,7 @@ %% %% Machi: a small village of replicated files %% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -27,6 +27,7 @@ -compile(export_all). -include("machi_projection.hrl"). +-include("machi_verbose.hrl"). -include_lib("eqc/include/eqc.hrl"). -include_lib("eqc/include/eqc_statem.hrl"). @@ -38,7 +39,9 @@ -compile({pulse_replace_module, [{application, pulse_application}]}). %% The following functions contains side_effects but are run outside %% PULSE, i.e. PULSE needs to leave them alone --compile({pulse_skip,[{prop_pulse_test_,0}, {shutdown_hard,0}]}). +-compile({pulse_skip,[{prop_pulse_test_,0}, {prop_pulse_regression_test_,0}, + {prop_pulse,1}, + {shutdown_hard,0}]}). -compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). %% Used for output within EUnit... @@ -70,11 +73,58 @@ gen_seed() -> noshrink({choose(1, 10000), choose(1, 10000), choose(1, 10000)}). gen_old_threshold() -> - noshrink(choose(1, 100)). + noshrink(frequency([ + {10, {keep}}, + {10, choose(1, 100)}, + {10, oneof([{island1}])}, + {10, oneof([{asymm1}, {asymm2}, {asymm3}])} + ])). gen_no_partition_threshold() -> noshrink(choose(1, 100)). +gen_commands(new) -> + non_empty(commands(?MODULE)); +gen_commands(regression) -> + %% These regression tests include only few, very limited command + %% sequences that have been helpful in turning up bugs in the past. + %% For this style test, QuickCheck is basically just choosing random + %% seeds + PULSE execution to see if one of the oldies-but-goodies can + %% find another execution/interleaving that still shows a problem. + Cmd_a = [{set,{var,1}, + {call,machi_chain_manager1_pulse,setup,[3,{846,1222,4424}]}}, + {set,{var,2}, + {call,machi_chain_manager1_pulse,do_ticks,[6,{var,1},13,48]}}], + Cmd_b = [{set,{var,1}, + {call,machi_chain_manager1_pulse,setup,[4,{354,7401,1237}]}}, + {set,{var,2}, + {call,machi_chain_manager1_pulse,do_ticks,[10,{var,1},15,77]}}, + {set,{var,3}, + {call,machi_chain_manager1_pulse,do_ticks,[7,{var,1},92,39]}}], + Cmd_c = [{set,{var,1}, + {call,machi_chain_manager1_pulse,setup,[2,{5202,467,3157}]}}, + {set,{var,2}, + {call,machi_chain_manager1_pulse,do_ticks,[8,{var,1},98,3]}}, + {set,{var,3}, + {call,machi_chain_manager1_pulse,do_ticks,[5,{var,1},56,49]}}, + {set,{var,4}, + {call,machi_chain_manager1_pulse,do_ticks,[10,{var,1},33,72]}}, + {set,{var,5}, + {call,machi_chain_manager1_pulse,do_ticks,[10,{var,1},88,20]}}, + {set,{var,6}, + {call,machi_chain_manager1_pulse,do_ticks,[8,{var,1},67,10]}}, + {set,{var,7}, + {call,machi_chain_manager1_pulse,do_ticks,[5,{var,1},86,25]}}, + {set,{var,8}, + {call,machi_chain_manager1_pulse,do_ticks,[6,{var,1},74,88]}}, + {set,{var,9}, + {call,machi_chain_manager1_pulse,do_ticks,[8,{var,1},78,39]}}], + Cmd_d = [{set,{var,1}, + {call,machi_chain_manager1_pulse,setup,[5,{436,5950,9085}]}}, + {set,{var,2}, + {call,machi_chain_manager1_pulse,do_ticks,[7,{var,1},19,80]}}], + noshrink(oneof([Cmd_a, Cmd_b, Cmd_c, Cmd_d])). + command(#state{step=0}) -> {call, ?MODULE, setup, [gen_num_pids(), gen_seed()]}; command(S) -> @@ -82,7 +132,7 @@ command(S) -> { 1, {call, ?MODULE, change_partitions, [gen_old_threshold(), gen_no_partition_threshold()]}}, {50, {call, ?MODULE, do_ticks, - [choose(5, 100), S#state.pids, + [choose(1, 100), S#state.pids, gen_old_threshold(), gen_no_partition_threshold()]}} ]). @@ -103,38 +153,49 @@ postcondition(_S, {call, _, _Func, _Args}, _Res) -> true. all_list_extra() -> + {PortBase, DirBase} = get_port_dir_base(), [ %% Genenerators assume that this list is at least 2 items - {#p_srvr{name=a, address="localhost", port=7400, - props=[{chmgr, a_chmgr}]}, "./data.pulse.a"} - , {#p_srvr{name=b, address="localhost", port=7401, - props=[{chmgr, b_chmgr}]}, "./data.pulse.b"} - , {#p_srvr{name=c, address="localhost", port=7402, - props=[{chmgr, c_chmgr}]}, "./data.pulse.c"} - , {#p_srvr{name=d, address="localhost", port=7403, - props=[{chmgr, d_chmgr}]}, "./data.pulse.d"} - , {#p_srvr{name=e, address="localhost", port=7404, - props=[{chmgr, e_chmgr}]}, "./data.pulse.e"} + {#p_srvr{name=a, address="localhost", port=PortBase+0, + props=[{chmgr, a_chmgr}]}, DirBase ++ "/data.pulse.a"} + , {#p_srvr{name=b, address="localhost", port=PortBase+1, + props=[{chmgr, b_chmgr}]}, DirBase ++ "/data.pulse.b"} + , {#p_srvr{name=c, address="localhost", port=PortBase+2, + props=[{chmgr, c_chmgr}]}, DirBase ++ "//data.pulse.c"} + , {#p_srvr{name=d, address="localhost", port=PortBase+3, + props=[{chmgr, d_chmgr}]}, DirBase ++ "/data.pulse.d"} + , {#p_srvr{name=e, address="localhost", port=PortBase+4, + props=[{chmgr, e_chmgr}]}, DirBase ++ "/data.pulse.e"} ]. all_list() -> [P#p_srvr.name || {P, _Dir} <- all_list_extra()]. setup(Num, Seed) -> - ?QC_FMT("\nsetup(~w", [Num]), - error_logger:tty(false), + ?V("\nsetup(~w,~w", [Num, Seed]), All_list = lists:sublist(all_list(), Num), All_listE = lists:sublist(all_list_extra(), Num), %% shutdown_hard() has taken care of killing all relevant procs. - [machi_flu1_test:clean_up_data_dir(Dir) || {_P, Dir} <- All_listE], - ?QC_FMT(",z~w", [?LINE]), + [begin + machi_flu1_test:clean_up_data_dir(Dir), + filelib:ensure_dir(Dir ++ "/not-used") + end || {_P, Dir} <- All_listE], + ?V(",z~w", [?LINE]), - %% Start partition simulator - {ok, PSimPid} = machi_partition_simulator:start_link(Seed, 0, 100), + %% GRRR, not PULSE: {ok, _} = application:ensure_all_started(machi), + [begin + _QQ = (catch application:start(App)) + end || App <- [machi] ], + ?V(",z~w", [?LINE]), + + SimSpec = {part_sim, {machi_partition_simulator, start_link, + [{0,0,0}, 0, 100]}, + permanent, 500, worker, []}, + {ok, PSimPid} = supervisor:start_child(machi_sup, SimSpec), + ok = machi_partition_simulator:set_seed(Seed), _Partitions = machi_partition_simulator:get(All_list), - ?QC_FMT(",z~w", [?LINE]), + ?V(",z~w", [?LINE]), %% Start FLUs and their associated procs - {ok, SupPid} = machi_flu_sup:start_link(), FluOpts = [{use_partition_simulator, true}, {active_mode, false}], [begin #p_srvr{name=Name, port=Port} = P, @@ -142,31 +203,49 @@ setup(Num, Seed) -> end || {P, Dir} <- All_listE], %% Set up the chain Dict = orddict:from_list([{P#p_srvr.name, P} || {P, _Dir} <- All_listE]), - ?QC_FMT(",z~w", [?LINE]), [machi_chain_manager1:set_chain_members(get_chmgr(P), Dict) || {P, _Dir} <- All_listE], + ?V(",z~w", [?LINE]), + %% Trigger some environment reactions for humming consensus: first %% do all the same server first, then round-robin evenly across %% servers. [begin _QQa = machi_chain_manager1:trigger_react_to_env(get_chmgr(P)) end || {P, _Dir} <- All_listE, _I <- lists:seq(1,20), _Repeat <- [1,2]], - ?QC_FMT(",z~w", [?LINE]), [begin _QQa = machi_chain_manager1:trigger_react_to_env(get_chmgr(P)) end || _I <- lists:seq(1,20), {P, _Dir} <- All_listE, _Repeat <- [1,2]], - ?QC_FMT(",z~w", [?LINE]), + ?V(",z~w", [?LINE]), ProxiesDict = ?FLU_PC:start_proxies(Dict), - Res = {PSimPid, SupPid, ProxiesDict, All_listE}, + Res = {PSimPid, 'machi_flu_sup', ProxiesDict, All_listE}, put(manager_pids_hack, Res), - ?QC_FMT("),", []), + ?V("),", []), Res. -change_partitions(OldThreshold, NoPartitionThreshold) -> +change_partitions(OldThreshold, NoPartitionThreshold) + when is_integer(OldThreshold) -> machi_partition_simulator:reset_thresholds(OldThreshold, - NoPartitionThreshold). + NoPartitionThreshold); +change_partitions({keep}, _NoPartitionThreshold) -> + ok; +change_partitions({island1}, _NoPartitionThreshold) -> + AB = [a,b], + NotAB = all_list() -- AB, + Partitions = lists:usort([{X, Y} || X <- AB, Y <- NotAB] ++ + [{X, Y} || X <- NotAB, Y <- AB]), + machi_partition_simulator:always_these_partitions(Partitions); +change_partitions({asymm1}, _NoPartitionThreshold) -> + Partitions = [{a,b}], + machi_partition_simulator:always_these_partitions(Partitions); +change_partitions({asymm2}, _NoPartitionThreshold) -> + Partitions = [{a,b},{a,c},{a,d},{a,e},{b,a},{b,c},{b,e},{c,a},{c,b},{c,d},{c,e},{d,a},{d,c},{d,e},{e,a},{e,b},{e,c},{e,d}], + machi_partition_simulator:always_these_partitions(Partitions); +change_partitions({asymm3}, _NoPartitionThreshold) -> + Partitions = [{a,b},{a,c},{a,d},{a,e},{b,a},{b,d},{b,e},{c,d},{d,a},{d,c},{d,e},{e,a},{e,b},{e,d}], + machi_partition_simulator:always_these_partitions(Partitions). always_last_partitions() -> machi_partition_simulator:always_last_partitions(). @@ -175,29 +254,28 @@ private_stable_check() -> {_PSimPid, _SupPid, ProxiesDict, All_listE} = get(manager_pids_hack), Res = private_projections_are_stable_check(ProxiesDict, All_listE), if not Res -> - io:format(user, "BUMMER: private stable check failed!\n", []); + ?QC_FMT("BUMMER: private stable check failed!\n", []); true -> ok end, Res. do_ticks(Num, PidsMaybe, OldThreshold, NoPartitionThreshold) -> - io:format(user, "~p,~p,~p|", [Num, OldThreshold, NoPartitionThreshold]), + ?V("~p,~p,~p|", [Num, OldThreshold, NoPartitionThreshold]), {_PSimPid, _SupPid, ProxiesDict, All_listE} = case PidsMaybe of undefined -> get(manager_pids_hack); _ -> PidsMaybe end, - if is_integer(OldThreshold) -> - machi_partition_simulator:reset_thresholds(OldThreshold, - NoPartitionThreshold); + if is_atom(OldThreshold) -> + ?V("{e=~w},", [get_biggest_private_epoch_number(ProxiesDict)]), + machi_partition_simulator:no_partitions(); true -> - ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number(ProxiesDict)]), - machi_partition_simulator:no_partitions() + change_partitions(OldThreshold, NoPartitionThreshold) end, Res = exec_ticks(Num, All_listE), if not is_integer(OldThreshold) -> - ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number(ProxiesDict)]); + ?V("{e=~w},", [get_biggest_private_epoch_number(ProxiesDict)]); true -> ok end, @@ -214,73 +292,34 @@ get_biggest_private_epoch_number(ProxiesDict) -> dump_state() -> try - ?QC_FMT("dump_state(", []), + ?V("dump_state(", []), {_PSimPid, _SupPid, ProxiesDict, _AlE} = get(manager_pids_hack), Report = ?MGRTEST:unanimous_report(ProxiesDict), Namez = ProxiesDict, - %% ?QC_FMT("Report ~p\n", [Report]), - - %% Diag1 = [begin - %% {ok, Ps} = ?FLU_PC:get_all_projections(Proxy, Type), - %% [io_lib:format("~p ~p ~p: ~w\n", [FLUName, Type, P#projection_v1.epoch_number, machi_projection:make_summary(P)]) || P <- Ps] - %% end || {FLUName, Proxy} <- orddict:to_list(ProxiesDict), - %% Type <- [public] ], - - UniquePrivateEs = - lists:usort(lists:flatten( - [element(2,?FLU_PC:list_all_projections(Proxy,private)) || - {_FLUName, Proxy} <- orddict:to_list(ProxiesDict)])), - P_lists0 = [{FLUName, Type, - element(2,?FLU_PC:get_all_projections(Proxy, Type))} || - {FLUName, Proxy} <- orddict:to_list(ProxiesDict), - Type <- [public,private]], - P_lists = [{FLUName, Type, P} || {FLUName, Type, Ps} <- P_lists0, - P <- Ps], - AllDict = lists:foldl(fun({FLU, Type, P}, D) -> - K = {FLU, Type, P#projection_v1.epoch_number}, - dict:store(K, P, D) - end, dict:new(), lists:flatten(P_lists)), - DumbFinderBackward = - fun(FLUName) -> - fun(E, error_unwritten) -> - case dict:find({FLUName, private, E}, AllDict) of - {ok, T} -> T; - error -> error_unwritten - end; - (_E, Acc) -> - Acc - end - end, - %% Diag2 = [[ - %% io_lib:format("~p private: ~w\n", - %% [FLUName, - %% machi_projection:make_summary( - %% lists:foldl(DumbFinderBackward(FLUName), - %% error_unwritten, - %% lists:seq(Epoch, 0, -1)))]) - %% || {FLUName, _FLU} <- Namez] - %% || Epoch <- UniquePrivateEs], - PrivProjs = [{Name, begin {ok, Ps} = ?FLU_PC:get_all_projections(Proxy, private), [P || P <- Ps, P#projection_v1.epoch_number /= 0] end} || {Name, Proxy} <- ProxiesDict], - - ?QC_FMT(")", []), + ?V("~w", [catch application:stop(machi)]), + ?V(")", []), Diag1 = Diag2 = "skip_diags", {Report, PrivProjs, lists:flatten([Diag1, Diag2])} catch XX:YY -> - ?QC_FMT("OUCH: ~p ~p @ ~p\n", [XX, YY, erlang:get_stacktrace()]), - ?QC_FMT("Exiting now to move to manual post-mortem....\n", []), - erlang:halt(0), + ?V("OUCH: ~p ~p @ ~p\n", [XX, YY, erlang:get_stacktrace()]), + ?V("Exiting now to move to manual post-mortem....\n", []), + erlang:halt(66), false end. prop_pulse() -> - ?FORALL({Cmds0, Seed}, {non_empty(commands(?MODULE)), pulse:seed()}, - ?IMPLIES(1 < length(Cmds0) andalso length(Cmds0) < 10, + prop_pulse(new). + +prop_pulse(Style) when Style == new; Style == regression -> + _ = application:start(crypto), + ?FORALL({Cmds0, Seed}, {gen_commands(Style), pulse:seed()}, + ?IMPLIES(1 < length(Cmds0) andalso length(Cmds0) < 11, begin ok = shutdown_hard(), %% PULSE can be really unfair, of course, including having exec_ticks @@ -294,32 +333,32 @@ prop_pulse() -> {call, ?MODULE, private_stable_check, []}}], LastTriggerTicks = {set,{var,99999997}, {call, ?MODULE, do_ticks, [123, undefined, no, no]}}, - Cmds1 = lists:duplicate(2, LastTriggerTicks), - %% Cmds1 = lists:duplicate(length(all_list())*2, LastTriggerTicks), + Cmds1 = lists:duplicate(4, LastTriggerTicks), Cmds = Cmds0 ++ Stabilize1 ++ Cmds1 ++ Stabilize2 ++ [{set,{var,99999999}, {call, ?MODULE, dump_state, []}}], + + error_logger:tty(false), + pulse:verbose([format]), {_H2, S2, Res} = pulse:run( fun() -> {_H, _S, _R} = run_commands(?MODULE, Cmds) end, [{seed, Seed}, {strategy, unfair}]), - %% ?QC_FMT("S2 ~p\n", [S2]), - case S2#state.dump_state of - undefined -> - ?QC_FMT("BUMMER Cmds = ~p\n", [Cmds]); - _ -> - ok - end, + ok = shutdown_hard(), {Report, PrivProjs, Diag} = S2#state.dump_state, %% Report is ordered by Epoch. For each private projection %% written during any given epoch, confirm that all chain %% members appear in only one unique chain, i.e., the sets of %% unique chains are disjoint. - AllDisjointP = ?MGRTEST:all_reports_are_disjoint(Report), + {AllDisjointP, AllDisjointDetail} = + case ?MGRTEST:all_reports_are_disjoint(Report) of + true -> {true, true}; + Else -> {false, Else} + end, %% For each chain transition experienced by a particular FLU, %% confirm that each state transition is OK. @@ -328,40 +367,82 @@ prop_pulse() -> Ps, FLU)} || {FLU, Ps} <- PrivProjs], SaneP = lists:all(fun({_FLU, SaneRes}) -> SaneRes == true end, Sane), - - %% The final report item should say that all are agreed_membership. + %% On a really bad day, this could trigger a badmatch exception.... {_LastEpoch, {ok_disjoint, LastRepXs}} = lists:last(Report), - AgreedOrNot = lists:usort([element(1, X) || X <- LastRepXs]), %% TODO: Check that we've converged to a single chain with no repairs. - SingleChainNoRepair = case LastRepXs of - [{agreed_membership,{_UPI,[]}}] -> - true; - _ -> - LastRepXs - end, + {SingleChainNoRepair_p, SingleChainNoRepairDetail} = + case LastRepXs of + [LastUPI] when length(LastUPI) == S2#state.num_pids -> + {true, true}; + _ -> + {false, LastRepXs} + end, - ok = shutdown_hard(), ?WHENFAIL( begin + %% ?QC_FMT("PrivProjs = ~P\n", [PrivProjs, 50]), + ?QC_FMT("Report = ~p\n", [Report]), ?QC_FMT("Cmds = ~p\n", [Cmds]), ?QC_FMT("Res = ~p\n", [Res]), ?QC_FMT("Diag = ~s\n", [Diag]), - ?QC_FMT("Report = ~p\n", [Report]), - ?QC_FMT("PrivProjs = ~p\n", [PrivProjs]), ?QC_FMT("Sane = ~p\n", [Sane]), - ?QC_FMT("SingleChainNoRepair failure =\n ~p\n", [SingleChainNoRepair]) -,erlang:halt(0) + ?QC_FMT("AllDisjointDetail = ~p\n", [AllDisjointDetail]), + ?QC_FMT("SingleChainNoRepair failure = ~p\n", [SingleChainNoRepairDetail]) +,?QC_FMT("\n\nHalting now!!!!!!!!!!\n\n", []),timer:sleep(500),erlang:halt(1) end, conjunction([{res, Res == true orelse Res == ok}, {all_disjoint, AllDisjointP}, {sane, SaneP}, - {all_agreed_at_end, AgreedOrNot == [agreed_membership]}, - {single_chain_no_repair, SingleChainNoRepair} + {single_chain_no_repair, SingleChainNoRepair_p} ])) end)). -prop_pulse_test_() -> +-define(FIXTURE(TIMEOUT, EXTRATO, FUN), {timeout, (Timeout+ExtraTO+600), FUN}). + +prop_pulse_new_test_() -> + {Timeout, ExtraTO} = get_timeouts(), + DoShrink = get_do_shrink(), + F = fun() -> + ?assert(do_quickcheck(DoShrink, Timeout, new)) + end, + case os:getenv("PULSE_SKIP_NEW") of + false -> + ?FIXTURE(Timeout, ExtraTO, F); + _ -> + {timeout, 5, + fun() -> timer:sleep(200), + io:format(user, " (skip new style) ", []) end} + end. + +%% See gen_commands() for more detail on the regression tests. + +prop_pulse_regression_test_() -> + {Timeout, ExtraTO} = get_timeouts(), + DoShrink = get_do_shrink(), + F = fun() -> + ?assert(do_quickcheck(DoShrink, Timeout, regression)) + end, + case os:getenv("PULSE_SKIP_REGRESSION") of + false -> + ?FIXTURE(Timeout, ExtraTO, F); + _ -> + {timeout, 5, + fun() -> timer:sleep(200), + io:format(user, " (skip regression style) ", []) end} + end. + +do_quickcheck(Timeout, Style) -> + do_quickcheck(true, Timeout, Style). + +do_quickcheck(true, Timeout, Style) -> + eqc:quickcheck(eqc:testing_time(Timeout, + ?QC_OUT(prop_pulse(Style)))); +do_quickcheck(false, Timeout, Style) -> + eqc:quickcheck(eqc:testing_time(Timeout, + ?QC_OUT(noshrink(prop_pulse(Style))))). + +get_timeouts() -> Timeout = case os:getenv("PULSE_TIME") of false -> 60; Val -> list_to_integer(Val) @@ -370,36 +451,31 @@ prop_pulse_test_() -> false -> 0; Val2 -> list_to_integer(Val2) end, - {timeout, (Timeout+ExtraTO+600), % 600 = a bit more fudge time - fun() -> - ?assert(eqc:quickcheck(eqc:testing_time(Timeout, - ?QC_OUT(prop_pulse())))) - end}. + {Timeout, ExtraTO}. + +get_do_shrink() -> + case os:getenv("PULSE_NOSHRINK") of + false -> + true; + _ -> + false + end. shutdown_hard() -> - ?QC_FMT("shutdown(", []), - (catch unlink(whereis(machi_partition_simulator))), [begin - Pid = whereis(X), - spawn(fun() -> (catch X:stop()) end), - timer:sleep(50), - (catch unlink(Pid)), - timer:sleep(10), - (catch exit(Pid, shutdown)), - timer:sleep(1), - (catch exit(Pid, kill)) - end || X <- [machi_partition_simulator, machi_flu_sup] ], - timer:sleep(1), - ?QC_FMT(")", []), + _STOP = application:stop(App) + end || App <- [machi] ], + timer:sleep(100), + ok. exec_ticks(Num, All_listE) -> Parent = self(), Pids = [spawn_link(fun() -> [begin - erlang:yield(), M_name = P#p_srvr.name, - Max = 10, + %% Max = 10, + Max = 25, Elapsed = ?MGR:sleep_ranked_order(1, Max, M_name, all_list()), Res = ?MGR:trigger_react_to_env(get_chmgr(P)), @@ -438,4 +514,20 @@ private_projections_are_stable_check(ProxiesDict, All_listE) -> get_chmgr(#p_srvr{props=Ps}) -> proplists:get_value(chmgr, Ps). +%% {PortBase, DirBase} = get_port_dir_base(), +get_port_dir_base() -> + I = case os:getenv("PULSE_BASE_PORT") of + false -> + 0; + II -> + list_to_integer(II) + end, + D = case os:getenv("PULSE_BASE_DIR") of + false -> + "/tmp/c/"; + DD -> + DD + end, + {7400 + (I * 100), D ++ "/" ++ integer_to_list(I)}. + -endif. % PULSE diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl index 8f58994..c93fe5a 100644 --- a/test/machi_chain_manager1_test.erl +++ b/test/machi_chain_manager1_test.erl @@ -45,93 +45,234 @@ -include_lib("eunit/include/eunit.hrl"). -compile(export_all). +%% @doc Create a summary report of all of the *private* projections of +%% each of the FLUs in the chain, and create a summary for each +%% epoch number. +%% +%% Report format: list({EpochNumber:non_neg_integer(), Report:rpt()}) +%% rpt(): {'ok_disjoint', unique_upi_repair_lists()} | +%% {'bummer_NOT_DISJOINT', {flat(), summaries()} +%% unique_upi_repair_lists(): list(upi_and_repair_lists_concatenated()) +%% flat(): debugging term; any duplicate in this list is an invalid FLU. +%% summaries(): list({FLU, ProjectionSummary:string() | 'not_in_this_epoch'}) +%% %% Example: -%% [{1,{ok_disjoint,[{agreed_membership,{[a],[b,c]}}]}}, -%% {3,{ok_disjoint,[{agreed_membership,{[a],[b,c]}}]}}, -%% {8, -%% {ok_disjoint,[{not_agreed,{[a],[b,c]}, -%% [{b,not_in_this_epoch}, -%% <<65,159,66,113,232,15,156,244,197, -%% 210,39,82,229,84,192,19,27,45,161,38>>]}]}}, -%% {10,{ok_disjoint,[{agreed_membership,{[c],[]}}]}}, -%% ...] +%% +%% [{1,{ok_disjoint,[[a,b,c]]}}, +%% {4,{ok_disjoint,[[a,b,c]]}}, +%% {6,{ok_disjoint,[[a,b,c]]}}, +%% {16,{ok_disjoint,[[a,b,c]]}}, +%% {22,{ok_disjoint,[[b]]}}, +%% {1174, +%% {bummer_NOT_DISJOINT,{[a,a,b], +%% [{a,"[{epoch,1174},{author,a},{upi,[a]},{repair,[]},{down,[b]},{d,[{ps,[{a,b},{b,a}]},{nodes_up,[a]}]},{d2,[]}]"}, +%% {b,"[{epoch,1174},{author,b},{upi,[b]},{repair,[a]},{down,[]},{d,[{ps,[]},{nodes_up,[a,b]}]},{d2,[]}]"}]}}}, +%% ...] unanimous_report(Namez) -> UniquePrivateEs = lists:usort(lists:flatten( [element(2, ?FLU_PC:list_all_projections(FLU, private)) || {_FLUName, FLU} <- Namez])), - [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs, - Epoch /= 0]. + [{Epoch, unanimous_report(Epoch, Namez)} || Epoch <- UniquePrivateEs, + Epoch /= 0]. unanimous_report(Epoch, Namez) -> - Projs = [{FLUName, - case ?FLU_PC:read_projection(FLU, private, Epoch) of - {ok, T} -> - machi_chain_manager1:inner_projection_or_self(T); - _Else -> - {FLUName, not_in_this_epoch} - end} || {FLUName, FLU} <- Namez], + FLU_Projs = [{FLUName, + case ?FLU_PC:read_projection(FLU, private, Epoch) of + {ok, T} -> + machi_chain_manager1:inner_projection_or_self(T); + _Else -> + not_in_this_epoch + end} || {FLUName, FLU} <- Namez], + unanimous_report2(FLU_Projs). + +unanimous_report2(FLU_Projs) -> + ProjsSumms = [{FLU, if is_tuple(P) -> + Summ = machi_projection:make_summary(P), + lists:flatten(io_lib:format("~w", [Summ])); + is_atom(P) -> + P + end} || {FLU, P} <- FLU_Projs], UPI_R_Sums = [{Proj#projection_v1.upi, Proj#projection_v1.repairing, Proj#projection_v1.epoch_csum} || - {_FLUname, Proj} <- Projs, + {_FLUname, Proj} <- FLU_Projs, is_record(Proj, projection_v1)], UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]), - Res = - [begin - case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums, - U == UPI]) of - [_1CSum] -> - %% Yay, there's only 1 checksum. Let's check - %% that all FLUs are in agreement. - {UPI, Repairing, _CSum} = - lists:keyfind(UPI, 1, UPI_R_Sums), - Tmp = [{FLU, case proplists:get_value(FLU, Projs) of - P when is_record(P, projection_v1) -> - P#projection_v1.epoch_csum; - Else -> - Else - end} || FLU <- UPI ++ Repairing], - case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of - [_] -> - {agreed_membership, {UPI, Repairing}}; - Else2 -> - {not_agreed, {UPI, Repairing}, Else2} - end; - _Else -> - {not_agreed, {undefined, undefined}, Projs} - end - end || UPI <- UniqueUPIs], - AgreedResUPI_Rs = [UPI++Repairing || - {agreed_membership, {UPI, Repairing}} <- Res], - Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) == - lists:sort(lists:flatten(AgreedResUPI_Rs)) of - true -> - ok_disjoint; - false -> - bummer_NOT_DISJOINT - end, - {Epoch, {Tag, Res}}. + if length(UniqueUPIs) =< 1 -> + {ok_disjoint, UniqueUPIs}; + true -> + Flat = lists:flatten(UniqueUPIs), + case lists:usort(Flat) == lists:sort(Flat) of + true -> + {ok_disjoint, UniqueUPIs}; + false -> + {bummer_NOT_DISJOINT, {lists:sort(Flat), ProjsSumms}} + end + end. all_reports_are_disjoint(Report) -> - [] == [X || {_Epoch, Tuple}=X <- Report, - element(1, Tuple) /= ok_disjoint]. - -extract_chains_relative_to_flu(FLU, Report) -> - {FLU, [{Epoch, UPI, Repairing} || - {Epoch, {ok_disjoint, Es}} <- Report, - {agreed_membership, {UPI, Repairing}} <- Es, - lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}. - -chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) -> - MemberDict = orddict:from_list([{FLU, #p_srvr{name=FLU}} || - FLU <- All_list]), - machi_projection:new(Epoch, MyName, MemberDict, - All_list -- (UPI_list ++ Repairing_list), - UPI_list, Repairing_list, [{artificial_by, ?MODULE}]). + case [X || {_Epoch, Tuple}=X <- Report, + element(1, Tuple) /= ok_disjoint] of + [] -> + true; + Else -> + Else + end. -ifndef(PULSE). +simple_chain_state_transition_is_sane_test_() -> + {timeout, 60, fun() -> simple_chain_state_transition_is_sane_test2() end}. + +simple_chain_state_transition_is_sane_test2() -> + %% All: A list of all FLUS for a particular test + %% UPI1: some combination of All that represents UPI1 + %% Repair1: Some combination of (All -- UP1) that represents Repairing1 + %% ... then we test check_simple_chain_state_transition_is_sane() with all + %% possible UPI1 and Repair1. + [true = check_simple_chain_state_transition_is_sane(UPI1, Repair1) || + %% The five elements below runs on my MacBook Pro in about 4.8 seconds + %% All <- [ [a], [a,b], [a,b,c], [a,b,c,d], [a,b,c,d,e] ], + %% For elements on the same MBP is about 0.15 seconds. + All <- [ [a], [a,b], [a,b,c], [a,b,c,d] ], + UPI1 <- machi_util:combinations(All), + Repair1 <- machi_util:combinations(All -- UPI1)]. + +%% Given a UPI1 and Repair1 list, we calculate all possible good UPI2 +%% lists. For all good {UPI1, Repair1} -> UPI2 transitions, then the +%% simple_chain_state_transition_is_sane() function must be true. For +%% all other UPI2 transitions, simple_chain_state_transition_is_sane() +%% must be false. +%% +%% We include adding an extra possible participant, 'bogus', to the +%% list of all possible UPI2 transitions, just to demonstrate that +%% adding an extra element/participant/thingie is never sane. + +check_simple_chain_state_transition_is_sane([], []) -> + true; +check_simple_chain_state_transition_is_sane(UPI1, Repair1) -> + Good_UPI2s = [ X ++ Y || X <- machi_util:ordered_combinations(UPI1), + Y <- machi_util:ordered_combinations(Repair1)], + All_UPI2s = machi_util:combinations(lists:usort(UPI1 ++ Repair1) ++ + [bogus]), + + [true = ?MGR:simple_chain_state_transition_is_sane(UPI1, Repair1, UPI2) || + UPI2 <- Good_UPI2s], + [false = ?MGR:simple_chain_state_transition_is_sane(UPI1, Repair1, UPI2) || + UPI2 <- (All_UPI2s -- Good_UPI2s)], + + true. + +-ifdef(EQC). + +%% This QuickCheck property is crippled: because the old chain state +%% transition check, chain_mgr_legacy:projection_transition_is_sane(), +%% is so buggy and the new check is (apparently) so much better, I +%% have changed the ?WHENFAIL() criteria to check for either agreement +%% _or_ a case where the legacy check says true but the new check says +%% false. +%% +%% On my MacBook Pro, less than 1000 tests are required to find at +%% least one problem case for the legacy check that the new check is +%% correct for. Running for two seconds can do about 3,500 test +%% cases. + +compare_eqc_setup_test_() -> + %% Silly QuickCheck can take a long time to start up, check its + %% license, etcetc. + %% machi_chain_manager1_test: compare_eqc_setup_test...[1.788 s] ok + {timeout, 30, + fun() -> eqc:quickcheck(eqc:testing_time(0.1, true)) end}. + +-define(COMPARE_TIMEOUT, 1.2). +%% -define(COMPARE_TIMEOUT, 4.8). + +compare_legacy_with_v2_chain_transition_check1_test() -> + eqc:quickcheck( + ?QC_OUT( + eqc:testing_time( + ?COMPARE_TIMEOUT, + prop_compare_legacy_with_v2_chain_transition_check(primitive)))). + +compare_legacy_with_v2_chain_transition_check2_test() -> + eqc:quickcheck( + ?QC_OUT( + eqc:testing_time( + ?COMPARE_TIMEOUT, + prop_compare_legacy_with_v2_chain_transition_check(primitive)))). + +prop_compare_legacy_with_v2_chain_transition_check() -> + prop_compare_legacy_with_v2_chain_transition_check(primitive). + +prop_compare_legacy_with_v2_chain_transition_check(Style) -> + %% ?FORALL(All, nonempty(list([a,b,c,d,e])), + ?FORALL(All, non_empty(some([a,b,c,d])), + ?FORALL({Author1, UPI1, Repair1x, Author2, UPI2, Repair2x}, + {elements(All),some(All),some(All),elements(All),some(All),some(All)}, + ?IMPLIES(length(lists:usort(UPI1 ++ Repair1x)) > 0 andalso + length(lists:usort(UPI2 ++ Repair2x)) > 0, + begin + MembersDict = orddict:from_list([{X, #p_srvr{name=X}} || X <- All]), + Repair1 = Repair1x -- UPI1, + Down1 = All -- (UPI1 ++ Repair1), + Repair2 = Repair2x -- UPI2, + Down2 = All -- (UPI2 ++ Repair2), + P1 = machi_projection:new(1, Author1, MembersDict, + Down1, UPI1, Repair1, []), + P2 = machi_projection:new(2, Author2, MembersDict, + Down2, UPI2, Repair2, []), + Old_res = chain_mgr_legacy:projection_transition_is_sane( + P1, P2, Author1, false), + Old_p = case Old_res of true -> true; + _ -> false + end, + case Style of + primitive -> + New_res = ?MGR:chain_state_transition_is_sane( + Author1, UPI1, Repair1, Author2, UPI2), + New_p = case New_res of true -> true; + _ -> false + end; + whole -> + New_res = machi_chain_manager1:projection_transition_is_sane( + P1, P2, Author1, false), + New_p = case New_res of true -> true; + _ -> false + end + end, + (catch ets:insert(count, + {{Author1, UPI1, Repair1, Author2, UPI2, Repair2}, true})), + ?WHENFAIL(io:format(user, + "Old_res: ~p/~p (~p)\nNew_res: ~p/~p (why line ~P)\n", + [Old_p, Old_res, catch get(why1), + New_p, New_res, catch get(why2), 30]), + %% Old_p == New_p) + Old_p == New_p orelse (Old_p == true andalso New_p == false)) + end))). + +some(L) -> + ?LET(L2, list(oneof(L)), + dedupe(L2)). + +dedupe(L) -> + dedupe(L, []). + +dedupe([H|T], Seen) -> + case lists:member(H, Seen) of + false -> + [H|dedupe(T, [H|Seen])]; + true -> + dedupe(T, Seen) + end; +dedupe([], _) -> + []. + +make_prop_ets() -> + ets:new(count, [named_table, set, public]). + +-endif. % EQC + smoke0_test() -> {ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50), Host = "localhost", @@ -185,16 +326,6 @@ smoke1_test() -> end. nonunanimous_setup_and_fix_test() -> - %% TODO attack list: - %% __ Add start option to chain manager to be "passive" only, i.e., - %% not immediately go to work on - %% 1. Start FLUs with full complement of FLU+proj+chmgr. - %% 2. Put each of them under a supervisor? - %% - Sup proc could be a created-specifically-for-test thing, perhaps? - %% Rather than relying on a supervisor with reg name + OTP app started - %% plus plus more more yaddayadda? - %% 3. Add projection catalog/orddict of #p_srvr records?? - %% 4. Fix this test, etc etc. machi_partition_simulator:start_link({1,2,3}, 100, 0), TcpPort = 62877, FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}], @@ -224,13 +355,16 @@ nonunanimous_setup_and_fix_test() -> ok = ?FLU_PC:write_projection(Proxy_b, public, P1b), %% ?D(x), - {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false), + {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection( + Ma, false), %% ?Dw(_XX), - {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true), + {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection( + Ma, true), %% The read repair here doesn't automatically trigger the creation of %% a new projection (to try to create a unanimous projection). So %% we expect nothing to change when called again. - {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true), + {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection( + Ma, true), {now_using, _, EpochNum_a} = ?MGR:trigger_react_to_env(Ma), {no_change, _, EpochNum_a} = ?MGR:trigger_react_to_env(Ma), @@ -258,5 +392,54 @@ timer:sleep(3000), ok = machi_partition_simulator:stop() end. +unanimous_report_test() -> + TcpPort = 63877, + FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}], + P_s = [#p_srvr{name=Name, address="localhost", port=Port} || + {Name,Port,_Dir} <- FluInfo], + MembersDict = machi_projection:make_members_dict(P_s), + + E5 = 5, + UPI5 = [a,b], + Rep5 = [], + Report5 = [UPI5], + P5 = machi_projection:new(E5, a, MembersDict, [], UPI5, Rep5, []), + {ok_disjoint, Report5} = + unanimous_report2([{a, P5}, {b, P5}]), + {ok_disjoint, Report5} = + unanimous_report2([{a, not_in_this_epoch}, {b, P5}]), + {ok_disjoint, Report5} = + unanimous_report2([{a, P5}, {b, not_in_this_epoch}]), + + UPI5_b = [a], + Rep5_b = [], + P5_b = machi_projection:new(E5, b, MembersDict, [b], UPI5_b, Rep5_b, []), + {bummer_NOT_DISJOINT, _} = unanimous_report2([{a, P5}, {b, P5_b}]), + + UPI5_c = [b], + Rep5_c = [a], + P5_c = machi_projection:new(E5, b, MembersDict, [], UPI5_c, Rep5_c, []), + {bummer_NOT_DISJOINT, _} = + unanimous_report2([{a, P5}, {b, P5_c}]), + + P_s3 = [#p_srvr{name=Name, address="localhost", port=Port} || + {Name,Port,_Dir} <- FluInfo ++ [{c,TcpPort+0,"./data.c"}]], + MembersDict3 = machi_projection:make_members_dict(P_s3), + + UPI5_d = [c], + Rep5_d = [a], + Report5d = [UPI5, UPI5_d], + P5_d = machi_projection:new(E5, b, MembersDict3, [b], UPI5_d, Rep5_d, []), + {ok_disjoint, Report5d} = unanimous_report2([{a, P5}, {b, P5_d}]), + + UPI5_e = [b], + Rep5_e = [c], + Report5be = [UPI5_b, UPI5_e], + P5_e = machi_projection:new(E5, b, MembersDict3, [a], UPI5_e, Rep5_e, []), + {bummer_NOT_DISJOINT, _} = unanimous_report2([{a, P5}, {b, P5_e}]), + {ok_disjoint, Report5be} = unanimous_report2([{a, P5_b}, {b, P5_e}]), + + ok. + -endif. % !PULSE -endif. % TEST diff --git a/test/machi_cr_client_test.erl b/test/machi_cr_client_test.erl index 0bdaafe..4f27f57 100644 --- a/test/machi_cr_client_test.erl +++ b/test/machi_cr_client_test.erl @@ -66,7 +66,7 @@ smoke_test2() -> end || Pid <- [a_chmgr,b_chmgr,c_chmgr] ] end, _ = lists:foldl( - fun(_, [{c,[a,b,c]}]=Acc) -> Acc; + fun(_, [{a,[a,b,c]}]=Acc) -> Acc; (_, _Acc) -> TickAll(), % has some sleep time inside Xs = [begin diff --git a/test/machi_partition_simulator.erl b/test/machi_partition_simulator.erl index fbbdcb9..763b4e3 100644 --- a/test/machi_partition_simulator.erl +++ b/test/machi_partition_simulator.erl @@ -33,7 +33,7 @@ -endif. -export([start_link/3, stop/0, - get/1, reset_thresholds/2, + get/1, reset_thresholds/2, set_seed/1, no_partitions/0, always_last_partitions/0, always_these_partitions/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -67,6 +67,9 @@ get(Nodes) -> reset_thresholds(OldThreshold, NoPartitionThreshold) -> gen_server:call(?MODULE, {reset_thresholds, OldThreshold, NoPartitionThreshold}, infinity). +set_seed(Seed) -> + gen_server:call(?MODULE, {set_seed, Seed}, infinity). + no_partitions() -> reset_thresholds(-999, 999). @@ -98,6 +101,8 @@ handle_call({get, Nodes}, _From, S) -> handle_call({reset_thresholds, OldThreshold, NoPartitionThreshold}, _From, S) -> {reply, ok, S#state{old_threshold=OldThreshold, no_partition_threshold=NoPartitionThreshold}}; +handle_call({set_seed, Seed}, _From, S) -> + {reply, ok, S#state{seed=Seed}}; handle_call({always_these_partitions, Parts}, _From, S) -> {reply, ok, S#state{old_partitions={Parts,[na_reset_by_always]}}}; handle_call({stop}, _From, S) -> diff --git a/test/machi_projection_test.erl b/test/machi_projection_test.erl index e2ed93e..b961e3b 100644 --- a/test/machi_projection_test.erl +++ b/test/machi_projection_test.erl @@ -78,9 +78,12 @@ compare_test() -> try_it(MyName, All_list, UPI_list, Down_list, Repairing_list, Ps) -> try - P = machi_projection:new(MyName, All_list, UPI_list, Down_list, + P = machi_projection:new(MyName, All_list, Down_list, UPI_list, Repairing_list, Ps), - is_record(P, projection_v1) + Down_list = P#projection_v1.down, + UPI_list = P#projection_v1.upi, + Repairing_list = P#projection_v1.repairing, + true catch _:_ -> false end.