Clean up cruft, add more comments

2014-11-05 15:00:32 +09:00 · 2014-11-05 15:00:32 +09:00 · 30f5a84cea
commit 30f5a84cea
parent 32cfcccf34
1 changed files with 90 additions and 82 deletions
--- a/prototype/poc-machi/src/machi_chain_manager1.erl
+++ b/prototype/poc-machi/src/machi_chain_manager1.erl
@ -33,6 +33,9 @@
 -define(D(X), io:format(user, "~s ~p\n", [??X, X])).
 -define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).

+%% Keep a history of our flowchart execution in the process dictionary.
+-define(REACT(T), put(react, [T|get(react)])).
+
 %% API
 -export([start_link/3, stop/1, ping/1,
         calculate_projection_internal_old/1]).
@ -92,7 +95,8 @@ test_calc_proposed_projection(Pid) ->
    gen_server:cast(Pid, {test_calc_proposed_projection}).

 test_read_latest_public_projection(Pid, ReadRepairP) ->
-    gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP}, infinity).
+    gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP},
+                    infinity).

 test_react_to_env(Pid) ->
    gen_server:call(Pid, {test_react_to_env}, infinity).
@ -133,7 +137,8 @@ init({MyName, All_list, MyFLUPid}) ->

 handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
    {reply, not_initialized, S};
-handle_call({calculate_projection_internal_old}, _From, #ch_mgr{myflu=MyFLU}=S) ->
+handle_call({calculate_projection_internal_old}, _From,
+            #ch_mgr{myflu=MyFLU}=S) ->
    RelativeToServer = MyFLU,
    {Reply, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
    {reply, Reply, S2};
@ -148,7 +153,8 @@ handle_call({ping}, _From, S) ->
    {reply, pong, S};
 handle_call({stop}, _From, S) ->
    {stop, normal, ok, S};
-handle_call({test_calc_projection, KeepRunenvP}, _From, #ch_mgr{myflu=MyFLU}=S) ->
+handle_call({test_calc_projection, KeepRunenvP}, _From,
+            #ch_mgr{myflu=MyFLU}=S) ->
    RelativeToServer = MyFLU,
    {P, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
    {reply, {ok, P}, if KeepRunenvP -> S2;
@ -253,7 +259,7 @@ cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP,
    end.

 cl_write_public_proj_remote(FLUs, Partitions, Epoch, Proj, S) ->
-    %% We're going to be very cavalier about this write because we'll rely
+    %% We're going to be very care-free about this write because we'll rely
    %% on the read side to do any read repair.
    DoIt = fun(X) -> machi_flu0:proj_write(X, Epoch, public, Proj) end,
    Rs = [{FLU, perhaps_call_t(S, Partitions, FLU, fun() -> DoIt(FLU) end)} ||
@ -306,17 +312,6 @@ cl_read_latest_public_projection(#ch_mgr{proj=CurrentProj}=S) ->
            {UnanimousTag, BestProj, Extra2, S2}
    end.

-%% 1. Do the results contain a projection?
-%%    perhaps figure that in cl_read_latest_public_projection()?
-%% 2. Were there any error_unwritten?
-%% 3. Repair the unwritten FLUs.
-%% 4. Nothing to do with timeouts, right?  They're
-%%    hopeless for the moment, need to wait.
-%%
-%% For read-repair, just choose the best and then brute-
-%% force write, don't care about write status, then
-%% repeat do_cl_read_latest_public_projection() ??
-
 do_read_repair(FLUsRs, _Extra, #ch_mgr{proj=CurrentProj} = S) ->
    Unwrittens = [x || {_FLU, error_unwritten} <- FLUsRs],
    Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection)],
@ -379,7 +374,7 @@ calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, RelativeToServer,
                    RelativeToServer, Dbg, S).

 %% OldThreshold: Percent chance of using the old/previous network partition list
-%% NoPartitionThreshold: If the network partition changes, what are the odds
+%% NoPartitionThreshold: If the network partition changes, what percent chance
 %%                       that there are no partitions at all?

 calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
@ -446,7 +441,6 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
    P = make_projection(OldEpochNum + 1,
                        MyName, All_list, Down, NewUPI, NewRepairing,
                        Dbg ++ [{nodes_up, Up}]),
-    if P#projection.upi == [] -> io:format(user, "old: ~w\n", [make_projection_summary(LastProj)]), io:format(user, "new: ~w\n", [make_projection_summary(P)]), exit(zoozoo); true -> ok end,
    {P, S#ch_mgr{runenv=RunEnv3}}.

 calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
@ -456,9 +450,9 @@ calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
    {UpNodes, Partitions, S#ch_mgr{runenv=RunEnv2}}.

 calc_up_nodes(MyName, AllMembers, RunEnv1) ->
-    %% Seed1 = proplists:get_value(seed, RunEnv1),
    {Partitions2, Islands2} = machi_partition_simulator:get(AllMembers),
-    catch put(react, [{partitions,Partitions2},{islands,Islands2}|get(react)]),
+    catch ?REACT({partitions,Partitions2}),
+    catch ?REACT({islands,Islands2}),
    UpNodes = lists:sort(
                [Node || Node <- AllMembers,
                         not lists:member({MyName, Node}, Partitions2),
@ -474,11 +468,6 @@ replace(PropList, Items) ->
                        lists:keyreplace(Key, 1, Ps, {Key,Val})
                end, PropList, Items).

-ifdef(TEST).
-mps(P) ->
-    make_projection_summary(P).
-endif. % TEST
-
 make_projection_summary(#projection{epoch_number=EpochNum,
                                    all_members=_All_list,
                                    down=Down_list,
@ -522,22 +511,24 @@ do_react_to_env(S) ->
    react_to_env_A10(S).

 react_to_env_A10(S) ->
-    put(react, [a10|get(react)]),
+    ?REACT(a10),
    react_to_env_A20(0, S).

 react_to_env_A20(Retries, #ch_mgr{myflu=MyFLU} = S) ->
-    put(react, [a20|get(react)]),
-    %% io:format(user, "current:  ~w\n", [make_projection_summary(S#ch_mgr.proj)]),
+    ?REACT(a20),
    RelativeToServer = MyFLU,
    {P_newprop, S2} = calc_projection(S, RelativeToServer,
                                      [{author_proc, react}]),
-    %% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end,
    react_to_env_A30(Retries, P_newprop, S2).

 react_to_env_A30(Retries, P_newprop, S) ->
-    put(react, [a30|get(react)]),
+    ?REACT(a30),
    {UnanimousTag, P_latest, ReadExtra, S2} =
        do_cl_read_latest_public_projection(true, S),
+
+    %% The UnanimousTag isn't quite sufficient for our needs.  We need
+    %% to determine if *all* of the UPI+Repairing FLUs are members of
+    %% the unanimous server replies.
    UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)),
    UPI_Repairing_FLUs = lists:sort(P_latest#projection.upi ++
                                    P_latest#projection.repairing),
@ -545,35 +536,34 @@ react_to_env_A30(Retries, P_newprop, S) ->
    LatestUnanimousP =
        if UnanimousTag == unanimous
           andalso
-           All_UPI_Repairing_were_unanimous -> put(react, [{a30,?LINE}|get(react)]),true;
-           UnanimousTag == unanimous        -> put(react, [{a30,?LINE,UPI_Repairing_FLUs, UnanimousFLUs}|get(react)]),false;
-           UnanimousTag == not_unanimous    -> put(react, [{a30,?LINE}|get(react)]),false;
-           true -> exit({badbad, UnanimousTag})
+           All_UPI_Repairing_were_unanimous ->
+                ?REACT({a30,?LINE}),
+                true;
+           UnanimousTag == unanimous ->
+                ?REACT({a30,?LINE,UPI_Repairing_FLUs,UnanimousFLUs}),
+                false;
+           UnanimousTag == not_unanimous ->
+                ?REACT({a30,?LINE}),
+                false;
+           true ->
+                exit({badbad, UnanimousTag})
        end,
    react_to_env_A40(Retries, P_newprop, P_latest,
                     LatestUnanimousP, S2).

 react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
                 #ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
-    put(react, [a40|get(react)]),
+    ?REACT(a40),
    [{Rank_newprop, _}] = rank_projections([P_newprop], P_current),
    [{Rank_latest, _}] = rank_projections([P_latest], P_current),
    LatestAuthorDownP = lists:member(P_latest#projection.author_server,
                                     P_newprop#projection.down),

-    if P_newprop#projection.epoch_number == 666
-       orelse P_latest#projection.epoch_number == 666 ->
-            io:format(user, "\nBUMMER\nLatest  ~p\nNewprop ~p\nRunenv ~p\nTsns ~w", [make_projection_summary(P_newprop), make_projection_summary(P_latest), S#ch_mgr.runenv, lists:reverse(get(react))]),
-            exit(bummer);
-       true ->
-            ok
-    end,
-
    if
        P_latest#projection.epoch_number > P_current#projection.epoch_number
        orelse
        not LatestUnanimousP ->
-            put(react, [{a40, ?LINE, P_latest#projection.epoch_number > P_current#projection.epoch_number, not LatestUnanimousP}|get(react)]),
+            ?REACT({a40, ?LINE,P_latest#projection.epoch_number > P_current#projection.epoch_number, not LatestUnanimousP}),

            %% 1st clause: someone else has written a newer projection
            %% 2nd clause: a network partition has healed, revealing a
@ -584,7 +574,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
        P_latest#projection.epoch_number < P_current#projection.epoch_number
        orelse
        P_latest /= P_current ->
-            put(react, [{a40, ?LINE}|get(react)]),
+            ?REACT({a40, ?LINE}),

            %% Both of these cases are rare.  Elsewhere, the code
            %% assumes that the local FLU's projection store is always
@ -609,7 +599,16 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,

        %% A40a (see flowchart)
        Rank_newprop > Rank_latest ->
-            put(react, [{a40, ?LINE}|get(react)]),
+            ?REACT({a40, ?LINE}),
+
+            %% TODO: There may be an "improvement" here.  If we're the
+            %% highest-ranking FLU in the all_members list, then if we make a
+            %% projection where our UPI list is the same as P_latest's, and
+            %% our repairing list is the same as P_latest's, then it may not
+            %% be necessary to write our projection: it doesn't "improve"
+            %% anything UPI-wise or repairing-wise.  But it isn't clear to me
+            %% if it's 100% correct to "improve" here and skip writing
+            %% P_newprop, yet.
            react_to_env_C300(P_newprop, P_latest, S);

        %% A40b (see flowchart)
@ -618,12 +617,13 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
        (P_newprop#projection.upi /= P_latest#projection.upi
         orelse
         P_newprop#projection.repairing /= P_latest#projection.repairing) ->
-            put(react, [{a40, ?LINE}|get(react)]),
+            ?REACT({a40, ?LINE}),
+
            react_to_env_C300(P_newprop, P_latest, S);

        %% A40c (see flowchart)
        LatestAuthorDownP ->
-            put(react, [{a40, ?LINE}|get(react)]),
+            ?REACT({a40, ?LINE}),

            %% TODO: I believe that membership in the
            %% P_newprop#projection.down is not sufficient for long
@ -642,25 +642,25 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
            %%  * if we accept ring1's proj: two functioning chains
            %%    ([ring1,ring2] and [ring4,ring5] indepependently)
            %%    but unstable: we're probably going to flap back & forth?!
-
-            ?D({{{{{yoyoyo_A40c}}}}}),
            react_to_env_C300(P_newprop, P_latest, S);

        true ->
-            put(react, [{a40, ?LINE}|get(react)]),
+            ?REACT({a40, ?LINE}),
+
            {{no_change, P_latest#projection.epoch_number}, S}
    end.

 react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
                 Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) ->
-    put(react, [b10|get(react)]),
+    ?REACT(b10),
    if
        LatestUnanimousP ->
-            put(react, [{b10, ?LINE}|get(react)]),
+            ?REACT({b10, ?LINE}),
+
            react_to_env_C100(P_newprop, P_latest, S);

        Retries > 2 ->
-            put(react, [{b10, ?LINE}|get(react)]),
+            ?REACT({b10, ?LINE}),

            %% The author of P_latest is too slow or crashed.
            %% Let's try to write P_newprop and see what happens!
@ -669,7 +669,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
        Rank_latest >= Rank_newprop
        andalso
        P_latest#projection.author_server /= MyName ->
-            put(react, [{b10, ?LINE}|get(react)]),
+            ?REACT({b10, ?LINE}),

            %% Give the author of P_latest an opportunite to write a
            %% new projection in a new epoch to resolve this mixed
@ -677,7 +677,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
            react_to_env_C200(Retries, P_latest, S);

        true ->
-            put(react, [{b10, ?LINE}|get(react)]),
+            ?REACT({b10, ?LINE}),

            %% P_newprop is best, so let's write it.
            react_to_env_C300(P_newprop, P_latest, S)
@ -685,9 +685,10 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,

 react_to_env_C100(P_newprop, P_latest,
                  #ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
-    put(react, [c100|get(react)]),
+    ?REACT(c100),
    I_am_UPI_in_newprop_p = lists:member(MyFLU, P_newprop#projection.upi),
-    I_am_Repairing_in_latest_p = lists:member(MyFLU, P_latest#projection.repairing),
+    I_am_Repairing_in_latest_p = lists:member(MyFLU,
+                                              P_latest#projection.repairing),
    ShortCircuit_p =
        P_latest#projection.epoch_number > P_current#projection.epoch_number
        andalso
@ -698,26 +699,25 @@ react_to_env_C100(P_newprop, P_latest,
    case {ShortCircuit_p, projection_transition_is_sane(P_current, P_latest,
                                                        MyFLU)} of
        {true, _} ->
+            ?REACT({c100, repairing_short_circuit}),
            %% Someone else believes that I am repairing.  We assume
-            %% that nobody is being Byzantine, so we'll believe it.
-            %% We ignore our proposal and try to go with the latest.
+            %% that nobody is being Byzantine, so we'll believe that I
+            %% am/should be repairing.  We ignore our proposal and try
+            %% to go with the latest.
            react_to_env_C110(P_latest, S);
        {_, true} ->
+            ?REACT({c100, sane}),
            react_to_env_C110(P_latest, S);
        {_, _AnyOtherReturnValue} ->
-            %% %% P_latest is known to be crap.
-            %% %% By process of elimination, P_newprop is best,
-            %% %% so let's write it.
-            %% MaxEpoch = erlang:max(P_newprop#projection.epoch_number,
-            %%                       P_latest#projection.epoch_number) + 1,
-            %% P_newprop2 = update_projection_checksum(
-            %%                P_newprop#projection{epoch_number=MaxEpoch}),
-            %% react_to_env_C300(P_newprop2, P_latest, S)
+            ?REACT({c100, not_sane}),
+            %% P_latest is not sane.
+            %% By process of elimination, P_newprop is best,
+            %% so let's write it.
            react_to_env_C300(P_newprop, P_latest, S)
    end.

 react_to_env_C110(P_latest, #ch_mgr{myflu=MyFLU} = S) ->
-    put(react, [c110|get(react)]),
+    ?REACT(c110),
    %% TOOD: Should we carry along any extra info that that would be useful
    %%       in the dbg2 list?
    Extra_todo = [],
@ -725,18 +725,19 @@ react_to_env_C110(P_latest, #ch_mgr{myflu=MyFLU} = S) ->
    Islands = proplists:get_value(network_islands, RunEnv),
    P_latest2 = update_projection_dbg2(
                  P_latest,
-                  [{network_islands, Islands},{hooray, {v2, date(), time()}}|Extra_todo]),
+                  [{network_islands, Islands},
+                   {hooray, {v2, date(), time()}}|Extra_todo]),
    Epoch = P_latest2#projection.epoch_number,
    ok = machi_flu0:proj_write(MyFLU, Epoch, private, P_latest2),
    react_to_env_C120(P_latest, S).

 react_to_env_C120(P_latest, S) ->
-    put(react, [c120|get(react)]),
+    ?REACT(c120),
    {{now_using, P_latest#projection.epoch_number},
     S#ch_mgr{proj=P_latest, proj_proposed=none}}.

 react_to_env_C200(Retries, P_latest, S) ->
-    put(react, [c200|get(react)]),
+    ?REACT(c200),
    try
        yo:tell_author_yo(P_latest#projection.author_server)
    catch Type:Err ->
@ -746,30 +747,27 @@ react_to_env_C200(Retries, P_latest, S) ->
    react_to_env_C210(Retries, S).

 react_to_env_C210(Retries, #ch_mgr{myflu=MyFLU, proj=Proj} = S) ->
-    put(react, [c210|get(react)]),
-    %% TODO: implement the ranked sleep thingie?
+    ?REACT(c210),
    sleep_ranked_order(10, 100, MyFLU, Proj#projection.all_members),
    react_to_env_C220(Retries, S).

 react_to_env_C220(Retries, S) ->
-    put(react, [c220|get(react)]),
+    ?REACT(c220),
    react_to_env_A20(Retries + 1, S).

 react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop,
-                  #projection{epoch_number=_Epoch_latest}=_P_latest, S) ->
-    put(react, [c300|get(react)]),
-    NewEpoch = erlang:max(Epoch_newprop, _Epoch_latest) + 1,
+                  #projection{epoch_number=Epoch_latest}=_P_latest, S) ->
+    ?REACT(c300),
+    NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
    P_newprop2 = P_newprop#projection{epoch_number=NewEpoch},
-    %% %% Let's return to the old epoch thingie and see what happens.........
-    %% Epoch = Epoch_newprop,
-    %% P_newprop2 = P_newprop#projection{epoch_number=Epoch + 1},
    react_to_env_C310(update_projection_checksum(P_newprop2), S).

 react_to_env_C310(P_newprop, S) ->
-    put(react, [{c310,make_projection_summary(P_newprop)}|get(react)]),
+    ?REACT(c310),
    Epoch = P_newprop#projection.epoch_number,
    {_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
-    put(react, [{c310,_Res}|get(react)]),
+    ?REACT({c310,make_projection_summary(P_newprop)}),
+    ?REACT({c310,_Res}),
    
    react_to_env_A10(S2).

@ -794,6 +792,16 @@ projection_transition_is_sane(
              dbg=Dbg2} = P2,
  RelativeToServer) ->
 try
+    %% General notes:
+    %%
+    %% I'm making no attempt to be "efficient" here.  All of these data
+    %% structures are small, and they're not called zillions of times per
+    %% second.
+    %%
+    %% The chain sequence/order checks at the bottom of this function aren't
+    %% as easy-to-read as they ought to be.  However, I'm moderately confident
+    %% that it isn't buggy.  TODO: refactor them for clarity.
+
    true = is_integer(Epoch1) andalso is_integer(Epoch2),
    true = is_binary(CSum1) andalso is_binary(CSum2),
    {_,_,_} = CreationTime1,