Clean up cruft, add more comments
This commit is contained in:
parent
32cfcccf34
commit
30f5a84cea
1 changed files with 90 additions and 82 deletions
|
@ -33,6 +33,9 @@
|
|||
-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
|
||||
-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
|
||||
|
||||
%% Keep a history of our flowchart execution in the process dictionary.
|
||||
-define(REACT(T), put(react, [T|get(react)])).
|
||||
|
||||
%% API
|
||||
-export([start_link/3, stop/1, ping/1,
|
||||
calculate_projection_internal_old/1]).
|
||||
|
@ -92,7 +95,8 @@ test_calc_proposed_projection(Pid) ->
|
|||
gen_server:cast(Pid, {test_calc_proposed_projection}).
|
||||
|
||||
test_read_latest_public_projection(Pid, ReadRepairP) ->
|
||||
gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP}, infinity).
|
||||
gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP},
|
||||
infinity).
|
||||
|
||||
test_react_to_env(Pid) ->
|
||||
gen_server:call(Pid, {test_react_to_env}, infinity).
|
||||
|
@ -133,7 +137,8 @@ init({MyName, All_list, MyFLUPid}) ->
|
|||
|
||||
handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
|
||||
{reply, not_initialized, S};
|
||||
handle_call({calculate_projection_internal_old}, _From, #ch_mgr{myflu=MyFLU}=S) ->
|
||||
handle_call({calculate_projection_internal_old}, _From,
|
||||
#ch_mgr{myflu=MyFLU}=S) ->
|
||||
RelativeToServer = MyFLU,
|
||||
{Reply, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
|
||||
{reply, Reply, S2};
|
||||
|
@ -148,7 +153,8 @@ handle_call({ping}, _From, S) ->
|
|||
{reply, pong, S};
|
||||
handle_call({stop}, _From, S) ->
|
||||
{stop, normal, ok, S};
|
||||
handle_call({test_calc_projection, KeepRunenvP}, _From, #ch_mgr{myflu=MyFLU}=S) ->
|
||||
handle_call({test_calc_projection, KeepRunenvP}, _From,
|
||||
#ch_mgr{myflu=MyFLU}=S) ->
|
||||
RelativeToServer = MyFLU,
|
||||
{P, S2} = calc_projection(S, RelativeToServer, [{author_proc, call}]),
|
||||
{reply, {ok, P}, if KeepRunenvP -> S2;
|
||||
|
@ -253,7 +259,7 @@ cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP,
|
|||
end.
|
||||
|
||||
cl_write_public_proj_remote(FLUs, Partitions, Epoch, Proj, S) ->
|
||||
%% We're going to be very cavalier about this write because we'll rely
|
||||
%% We're going to be very care-free about this write because we'll rely
|
||||
%% on the read side to do any read repair.
|
||||
DoIt = fun(X) -> machi_flu0:proj_write(X, Epoch, public, Proj) end,
|
||||
Rs = [{FLU, perhaps_call_t(S, Partitions, FLU, fun() -> DoIt(FLU) end)} ||
|
||||
|
@ -306,17 +312,6 @@ cl_read_latest_public_projection(#ch_mgr{proj=CurrentProj}=S) ->
|
|||
{UnanimousTag, BestProj, Extra2, S2}
|
||||
end.
|
||||
|
||||
%% 1. Do the results contain a projection?
|
||||
%% perhaps figure that in cl_read_latest_public_projection()?
|
||||
%% 2. Were there any error_unwritten?
|
||||
%% 3. Repair the unwritten FLUs.
|
||||
%% 4. Nothing to do with timeouts, right? They're
|
||||
%% hopeless for the moment, need to wait.
|
||||
%%
|
||||
%% For read-repair, just choose the best and then brute-
|
||||
%% force write, don't care about write status, then
|
||||
%% repeat do_cl_read_latest_public_projection() ??
|
||||
|
||||
do_read_repair(FLUsRs, _Extra, #ch_mgr{proj=CurrentProj} = S) ->
|
||||
Unwrittens = [x || {_FLU, error_unwritten} <- FLUsRs],
|
||||
Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection)],
|
||||
|
@ -379,7 +374,7 @@ calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S, RelativeToServer,
|
|||
RelativeToServer, Dbg, S).
|
||||
|
||||
%% OldThreshold: Percent chance of using the old/previous network partition list
|
||||
%% NoPartitionThreshold: If the network partition changes, what are the odds
|
||||
%% NoPartitionThreshold: If the network partition changes, what percent chance
|
||||
%% that there are no partitions at all?
|
||||
|
||||
calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
|
||||
|
@ -446,7 +441,6 @@ calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
|
|||
P = make_projection(OldEpochNum + 1,
|
||||
MyName, All_list, Down, NewUPI, NewRepairing,
|
||||
Dbg ++ [{nodes_up, Up}]),
|
||||
if P#projection.upi == [] -> io:format(user, "old: ~w\n", [make_projection_summary(LastProj)]), io:format(user, "new: ~w\n", [make_projection_summary(P)]), exit(zoozoo); true -> ok end,
|
||||
{P, S#ch_mgr{runenv=RunEnv3}}.
|
||||
|
||||
calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
|
||||
|
@ -456,9 +450,9 @@ calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
|
|||
{UpNodes, Partitions, S#ch_mgr{runenv=RunEnv2}}.
|
||||
|
||||
calc_up_nodes(MyName, AllMembers, RunEnv1) ->
|
||||
%% Seed1 = proplists:get_value(seed, RunEnv1),
|
||||
{Partitions2, Islands2} = machi_partition_simulator:get(AllMembers),
|
||||
catch put(react, [{partitions,Partitions2},{islands,Islands2}|get(react)]),
|
||||
catch ?REACT({partitions,Partitions2}),
|
||||
catch ?REACT({islands,Islands2}),
|
||||
UpNodes = lists:sort(
|
||||
[Node || Node <- AllMembers,
|
||||
not lists:member({MyName, Node}, Partitions2),
|
||||
|
@ -474,11 +468,6 @@ replace(PropList, Items) ->
|
|||
lists:keyreplace(Key, 1, Ps, {Key,Val})
|
||||
end, PropList, Items).
|
||||
|
||||
-ifdef(TEST).
|
||||
mps(P) ->
|
||||
make_projection_summary(P).
|
||||
-endif. % TEST
|
||||
|
||||
make_projection_summary(#projection{epoch_number=EpochNum,
|
||||
all_members=_All_list,
|
||||
down=Down_list,
|
||||
|
@ -522,22 +511,24 @@ do_react_to_env(S) ->
|
|||
react_to_env_A10(S).
|
||||
|
||||
react_to_env_A10(S) ->
|
||||
put(react, [a10|get(react)]),
|
||||
?REACT(a10),
|
||||
react_to_env_A20(0, S).
|
||||
|
||||
react_to_env_A20(Retries, #ch_mgr{myflu=MyFLU} = S) ->
|
||||
put(react, [a20|get(react)]),
|
||||
%% io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),
|
||||
?REACT(a20),
|
||||
RelativeToServer = MyFLU,
|
||||
{P_newprop, S2} = calc_projection(S, RelativeToServer,
|
||||
[{author_proc, react}]),
|
||||
%% if P_newprop#projection.upi == [] -> io:format(user, "current: ~w\n", [make_projection_summary(S#ch_mgr.proj)]),io:format(user, "proposed: ~w\n", [make_projection_summary(P_newprop)]), timer:sleep(100); true -> ok end,
|
||||
react_to_env_A30(Retries, P_newprop, S2).
|
||||
|
||||
react_to_env_A30(Retries, P_newprop, S) ->
|
||||
put(react, [a30|get(react)]),
|
||||
?REACT(a30),
|
||||
{UnanimousTag, P_latest, ReadExtra, S2} =
|
||||
do_cl_read_latest_public_projection(true, S),
|
||||
|
||||
%% The UnanimousTag isn't quite sufficient for our needs. We need
|
||||
%% to determine if *all* of the UPI+Repairing FLUs are members of
|
||||
%% the unanimous server replies.
|
||||
UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)),
|
||||
UPI_Repairing_FLUs = lists:sort(P_latest#projection.upi ++
|
||||
P_latest#projection.repairing),
|
||||
|
@ -545,35 +536,34 @@ react_to_env_A30(Retries, P_newprop, S) ->
|
|||
LatestUnanimousP =
|
||||
if UnanimousTag == unanimous
|
||||
andalso
|
||||
All_UPI_Repairing_were_unanimous -> put(react, [{a30,?LINE}|get(react)]),true;
|
||||
UnanimousTag == unanimous -> put(react, [{a30,?LINE,UPI_Repairing_FLUs, UnanimousFLUs}|get(react)]),false;
|
||||
UnanimousTag == not_unanimous -> put(react, [{a30,?LINE}|get(react)]),false;
|
||||
true -> exit({badbad, UnanimousTag})
|
||||
All_UPI_Repairing_were_unanimous ->
|
||||
?REACT({a30,?LINE}),
|
||||
true;
|
||||
UnanimousTag == unanimous ->
|
||||
?REACT({a30,?LINE,UPI_Repairing_FLUs,UnanimousFLUs}),
|
||||
false;
|
||||
UnanimousTag == not_unanimous ->
|
||||
?REACT({a30,?LINE}),
|
||||
false;
|
||||
true ->
|
||||
exit({badbad, UnanimousTag})
|
||||
end,
|
||||
react_to_env_A40(Retries, P_newprop, P_latest,
|
||||
LatestUnanimousP, S2).
|
||||
|
||||
react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||
#ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
|
||||
put(react, [a40|get(react)]),
|
||||
?REACT(a40),
|
||||
[{Rank_newprop, _}] = rank_projections([P_newprop], P_current),
|
||||
[{Rank_latest, _}] = rank_projections([P_latest], P_current),
|
||||
LatestAuthorDownP = lists:member(P_latest#projection.author_server,
|
||||
P_newprop#projection.down),
|
||||
|
||||
if P_newprop#projection.epoch_number == 666
|
||||
orelse P_latest#projection.epoch_number == 666 ->
|
||||
io:format(user, "\nBUMMER\nLatest ~p\nNewprop ~p\nRunenv ~p\nTsns ~w", [make_projection_summary(P_newprop), make_projection_summary(P_latest), S#ch_mgr.runenv, lists:reverse(get(react))]),
|
||||
exit(bummer);
|
||||
true ->
|
||||
ok
|
||||
end,
|
||||
|
||||
if
|
||||
P_latest#projection.epoch_number > P_current#projection.epoch_number
|
||||
orelse
|
||||
not LatestUnanimousP ->
|
||||
put(react, [{a40, ?LINE, P_latest#projection.epoch_number > P_current#projection.epoch_number, not LatestUnanimousP}|get(react)]),
|
||||
?REACT({a40, ?LINE,P_latest#projection.epoch_number > P_current#projection.epoch_number, not LatestUnanimousP}),
|
||||
|
||||
%% 1st clause: someone else has written a newer projection
|
||||
%% 2nd clause: a network partition has healed, revealing a
|
||||
|
@ -584,7 +574,7 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
P_latest#projection.epoch_number < P_current#projection.epoch_number
|
||||
orelse
|
||||
P_latest /= P_current ->
|
||||
put(react, [{a40, ?LINE}|get(react)]),
|
||||
?REACT({a40, ?LINE}),
|
||||
|
||||
%% Both of these cases are rare. Elsewhere, the code
|
||||
%% assumes that the local FLU's projection store is always
|
||||
|
@ -609,7 +599,16 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
|
||||
%% A40a (see flowchart)
|
||||
Rank_newprop > Rank_latest ->
|
||||
put(react, [{a40, ?LINE}|get(react)]),
|
||||
?REACT({a40, ?LINE}),
|
||||
|
||||
%% TODO: There may be an "improvement" here. If we're the
|
||||
%% highest-ranking FLU in the all_members list, then if we make a
|
||||
%% projection where our UPI list is the same as P_latest's, and
|
||||
%% our repairing list is the same as P_latest's, then it may not
|
||||
%% be necessary to write our projection: it doesn't "improve"
|
||||
%% anything UPI-wise or repairing-wise. But it isn't clear to me
|
||||
%% if it's 100% correct to "improve" here and skip writing
|
||||
%% P_newprop, yet.
|
||||
react_to_env_C300(P_newprop, P_latest, S);
|
||||
|
||||
%% A40b (see flowchart)
|
||||
|
@ -618,12 +617,13 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
(P_newprop#projection.upi /= P_latest#projection.upi
|
||||
orelse
|
||||
P_newprop#projection.repairing /= P_latest#projection.repairing) ->
|
||||
put(react, [{a40, ?LINE}|get(react)]),
|
||||
?REACT({a40, ?LINE}),
|
||||
|
||||
react_to_env_C300(P_newprop, P_latest, S);
|
||||
|
||||
%% A40c (see flowchart)
|
||||
LatestAuthorDownP ->
|
||||
put(react, [{a40, ?LINE}|get(react)]),
|
||||
?REACT({a40, ?LINE}),
|
||||
|
||||
%% TODO: I believe that membership in the
|
||||
%% P_newprop#projection.down is not sufficient for long
|
||||
|
@ -642,25 +642,25 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
%% * if we accept ring1's proj: two functioning chains
|
||||
%% ([ring1,ring2] and [ring4,ring5] indepependently)
|
||||
%% but unstable: we're probably going to flap back & forth?!
|
||||
|
||||
?D({{{{{yoyoyo_A40c}}}}}),
|
||||
react_to_env_C300(P_newprop, P_latest, S);
|
||||
|
||||
true ->
|
||||
put(react, [{a40, ?LINE}|get(react)]),
|
||||
?REACT({a40, ?LINE}),
|
||||
|
||||
{{no_change, P_latest#projection.epoch_number}, S}
|
||||
end.
|
||||
|
||||
react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
||||
Rank_newprop, Rank_latest, #ch_mgr{name=MyName}=S) ->
|
||||
put(react, [b10|get(react)]),
|
||||
?REACT(b10),
|
||||
if
|
||||
LatestUnanimousP ->
|
||||
put(react, [{b10, ?LINE}|get(react)]),
|
||||
?REACT({b10, ?LINE}),
|
||||
|
||||
react_to_env_C100(P_newprop, P_latest, S);
|
||||
|
||||
Retries > 2 ->
|
||||
put(react, [{b10, ?LINE}|get(react)]),
|
||||
?REACT({b10, ?LINE}),
|
||||
|
||||
%% The author of P_latest is too slow or crashed.
|
||||
%% Let's try to write P_newprop and see what happens!
|
||||
|
@ -669,7 +669,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
Rank_latest >= Rank_newprop
|
||||
andalso
|
||||
P_latest#projection.author_server /= MyName ->
|
||||
put(react, [{b10, ?LINE}|get(react)]),
|
||||
?REACT({b10, ?LINE}),
|
||||
|
||||
%% Give the author of P_latest an opportunite to write a
|
||||
%% new projection in a new epoch to resolve this mixed
|
||||
|
@ -677,7 +677,7 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
react_to_env_C200(Retries, P_latest, S);
|
||||
|
||||
true ->
|
||||
put(react, [{b10, ?LINE}|get(react)]),
|
||||
?REACT({b10, ?LINE}),
|
||||
|
||||
%% P_newprop is best, so let's write it.
|
||||
react_to_env_C300(P_newprop, P_latest, S)
|
||||
|
@ -685,9 +685,10 @@ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
|
|||
|
||||
react_to_env_C100(P_newprop, P_latest,
|
||||
#ch_mgr{myflu=MyFLU, proj=P_current}=S) ->
|
||||
put(react, [c100|get(react)]),
|
||||
?REACT(c100),
|
||||
I_am_UPI_in_newprop_p = lists:member(MyFLU, P_newprop#projection.upi),
|
||||
I_am_Repairing_in_latest_p = lists:member(MyFLU, P_latest#projection.repairing),
|
||||
I_am_Repairing_in_latest_p = lists:member(MyFLU,
|
||||
P_latest#projection.repairing),
|
||||
ShortCircuit_p =
|
||||
P_latest#projection.epoch_number > P_current#projection.epoch_number
|
||||
andalso
|
||||
|
@ -698,26 +699,25 @@ react_to_env_C100(P_newprop, P_latest,
|
|||
case {ShortCircuit_p, projection_transition_is_sane(P_current, P_latest,
|
||||
MyFLU)} of
|
||||
{true, _} ->
|
||||
?REACT({c100, repairing_short_circuit}),
|
||||
%% Someone else believes that I am repairing. We assume
|
||||
%% that nobody is being Byzantine, so we'll believe it.
|
||||
%% We ignore our proposal and try to go with the latest.
|
||||
%% that nobody is being Byzantine, so we'll believe that I
|
||||
%% am/should be repairing. We ignore our proposal and try
|
||||
%% to go with the latest.
|
||||
react_to_env_C110(P_latest, S);
|
||||
{_, true} ->
|
||||
?REACT({c100, sane}),
|
||||
react_to_env_C110(P_latest, S);
|
||||
{_, _AnyOtherReturnValue} ->
|
||||
%% %% P_latest is known to be crap.
|
||||
%% %% By process of elimination, P_newprop is best,
|
||||
%% %% so let's write it.
|
||||
%% MaxEpoch = erlang:max(P_newprop#projection.epoch_number,
|
||||
%% P_latest#projection.epoch_number) + 1,
|
||||
%% P_newprop2 = update_projection_checksum(
|
||||
%% P_newprop#projection{epoch_number=MaxEpoch}),
|
||||
%% react_to_env_C300(P_newprop2, P_latest, S)
|
||||
?REACT({c100, not_sane}),
|
||||
%% P_latest is not sane.
|
||||
%% By process of elimination, P_newprop is best,
|
||||
%% so let's write it.
|
||||
react_to_env_C300(P_newprop, P_latest, S)
|
||||
end.
|
||||
|
||||
react_to_env_C110(P_latest, #ch_mgr{myflu=MyFLU} = S) ->
|
||||
put(react, [c110|get(react)]),
|
||||
?REACT(c110),
|
||||
%% TOOD: Should we carry along any extra info that that would be useful
|
||||
%% in the dbg2 list?
|
||||
Extra_todo = [],
|
||||
|
@ -725,18 +725,19 @@ react_to_env_C110(P_latest, #ch_mgr{myflu=MyFLU} = S) ->
|
|||
Islands = proplists:get_value(network_islands, RunEnv),
|
||||
P_latest2 = update_projection_dbg2(
|
||||
P_latest,
|
||||
[{network_islands, Islands},{hooray, {v2, date(), time()}}|Extra_todo]),
|
||||
[{network_islands, Islands},
|
||||
{hooray, {v2, date(), time()}}|Extra_todo]),
|
||||
Epoch = P_latest2#projection.epoch_number,
|
||||
ok = machi_flu0:proj_write(MyFLU, Epoch, private, P_latest2),
|
||||
react_to_env_C120(P_latest, S).
|
||||
|
||||
react_to_env_C120(P_latest, S) ->
|
||||
put(react, [c120|get(react)]),
|
||||
?REACT(c120),
|
||||
{{now_using, P_latest#projection.epoch_number},
|
||||
S#ch_mgr{proj=P_latest, proj_proposed=none}}.
|
||||
|
||||
react_to_env_C200(Retries, P_latest, S) ->
|
||||
put(react, [c200|get(react)]),
|
||||
?REACT(c200),
|
||||
try
|
||||
yo:tell_author_yo(P_latest#projection.author_server)
|
||||
catch Type:Err ->
|
||||
|
@ -746,30 +747,27 @@ react_to_env_C200(Retries, P_latest, S) ->
|
|||
react_to_env_C210(Retries, S).
|
||||
|
||||
react_to_env_C210(Retries, #ch_mgr{myflu=MyFLU, proj=Proj} = S) ->
|
||||
put(react, [c210|get(react)]),
|
||||
%% TODO: implement the ranked sleep thingie?
|
||||
?REACT(c210),
|
||||
sleep_ranked_order(10, 100, MyFLU, Proj#projection.all_members),
|
||||
react_to_env_C220(Retries, S).
|
||||
|
||||
react_to_env_C220(Retries, S) ->
|
||||
put(react, [c220|get(react)]),
|
||||
?REACT(c220),
|
||||
react_to_env_A20(Retries + 1, S).
|
||||
|
||||
react_to_env_C300(#projection{epoch_number=Epoch_newprop}=P_newprop,
|
||||
#projection{epoch_number=_Epoch_latest}=_P_latest, S) ->
|
||||
put(react, [c300|get(react)]),
|
||||
NewEpoch = erlang:max(Epoch_newprop, _Epoch_latest) + 1,
|
||||
#projection{epoch_number=Epoch_latest}=_P_latest, S) ->
|
||||
?REACT(c300),
|
||||
NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
|
||||
P_newprop2 = P_newprop#projection{epoch_number=NewEpoch},
|
||||
%% %% Let's return to the old epoch thingie and see what happens.........
|
||||
%% Epoch = Epoch_newprop,
|
||||
%% P_newprop2 = P_newprop#projection{epoch_number=Epoch + 1},
|
||||
react_to_env_C310(update_projection_checksum(P_newprop2), S).
|
||||
|
||||
react_to_env_C310(P_newprop, S) ->
|
||||
put(react, [{c310,make_projection_summary(P_newprop)}|get(react)]),
|
||||
?REACT(c310),
|
||||
Epoch = P_newprop#projection.epoch_number,
|
||||
{_Res, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
|
||||
put(react, [{c310,_Res}|get(react)]),
|
||||
?REACT({c310,make_projection_summary(P_newprop)}),
|
||||
?REACT({c310,_Res}),
|
||||
|
||||
react_to_env_A10(S2).
|
||||
|
||||
|
@ -794,6 +792,16 @@ projection_transition_is_sane(
|
|||
dbg=Dbg2} = P2,
|
||||
RelativeToServer) ->
|
||||
try
|
||||
%% General notes:
|
||||
%%
|
||||
%% I'm making no attempt to be "efficient" here. All of these data
|
||||
%% structures are small, and they're not called zillions of times per
|
||||
%% second.
|
||||
%%
|
||||
%% The chain sequence/order checks at the bottom of this function aren't
|
||||
%% as easy-to-read as they ought to be. However, I'm moderately confident
|
||||
%% that it isn't buggy. TODO: refactor them for clarity.
|
||||
|
||||
true = is_integer(Epoch1) andalso is_integer(Epoch2),
|
||||
true = is_binary(CSum1) andalso is_binary(CSum2),
|
||||
{_,_,_} = CreationTime1,
|
||||
|
|
Loading…
Reference in a new issue