WIP, tests pass again, includign the newest one
This commit is contained in:
parent
65993dfcb6
commit
1675020150
5 changed files with 105 additions and 54 deletions
|
@ -37,6 +37,7 @@ func, and pattern match Erlang style in that func.
|
|||
*** TODO Fixes the atom table leak
|
||||
*** TODO Fixes the problem of having active sequencer for the same prefix
|
||||
on two FLUS in the same VM
|
||||
** TODO Eliminate the timeout exception for the client: just {error,timeout} ret
|
||||
** TODO Change all protocol ops to enforce the epoch ID
|
||||
** TODO Add projection wedging logic to each FLU.
|
||||
|
||||
|
|
|
@ -65,7 +65,8 @@
|
|||
-define(REACT(T), put(react, [T|get(react)])).
|
||||
|
||||
%% API
|
||||
-export([start_link/2, start_link/3, stop/1, ping/1]).
|
||||
-export([start_link/2, start_link/3, stop/1, ping/1,
|
||||
set_chain_members/2]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
||||
terminate/2, code_change/3]).
|
||||
|
||||
|
@ -104,6 +105,9 @@ stop(Pid) ->
|
|||
ping(Pid) ->
|
||||
gen_server:call(Pid, {ping}, infinity).
|
||||
|
||||
set_chain_members(Pid, MembersDict) ->
|
||||
gen_server:call(Pid, {set_chain_members, MembersDict}, infinity).
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
%% Test/debugging code only.
|
||||
|
@ -150,6 +154,7 @@ test_react_to_env(Pid) ->
|
|||
%% local projection store.
|
||||
|
||||
init({MyName, InitMembersDict, MgrOpts}) ->
|
||||
init_remember_partition_hack(),
|
||||
ZeroAll_list = [P#p_srvr.name || {_,P} <- orddict:to_list(InitMembersDict)],
|
||||
ZeroProj = make_none_projection(MyName, ZeroAll_list, InitMembersDict),
|
||||
ok = store_zeroth_projection_maybe(ZeroProj, MgrOpts),
|
||||
|
@ -163,35 +168,30 @@ init({MyName, InitMembersDict, MgrOpts}) ->
|
|||
{network_islands, Opt(network_islands, [])},
|
||||
{flapping_i, Opt(flapping, [])},
|
||||
{up_nodes, Opt(up_nodes, not_init_yet)}],
|
||||
ActiveP = Opt(active_mode, true),
|
||||
NoneProj = make_none_projection(MyName, All_list, MembersDict),
|
||||
Proxies = orddict:fold(
|
||||
fun(K, P, Acc) ->
|
||||
{ok, Pid} = ?FLU_PC:start_link(P),
|
||||
[{K, Pid}|Acc]
|
||||
end, [], MembersDict),
|
||||
ActiveP = Opt(active_mode, false),
|
||||
S = #ch_mgr{name=MyName,
|
||||
%% TODO 2015-03-04: revisit, should this constant be bigger?
|
||||
%% Yes, this should be bigger, but it's a hack. There is
|
||||
%% no guarantee that all parties will advance to a minimum
|
||||
%% flap awareness in the amount of time that this mgr will.
|
||||
flap_limit=length(All_list) + 50,
|
||||
proj=NoneProj,
|
||||
timer='undefined',
|
||||
proj_history=queue:new(),
|
||||
runenv=RunEnv,
|
||||
opts=MgrOpts,
|
||||
members_dict=MembersDict,
|
||||
proxies_dict=orddict:from_list(Proxies)},
|
||||
S2 = if ActiveP == false ->
|
||||
S;
|
||||
opts=MgrOpts},
|
||||
{_, S2} = do_set_chain_members(MembersDict, S),
|
||||
S3 = if ActiveP == false ->
|
||||
S2;
|
||||
ActiveP == true ->
|
||||
set_active_timer(S)
|
||||
set_active_timer(S2)
|
||||
end,
|
||||
{ok, S2}.
|
||||
{ok, S3}.
|
||||
|
||||
handle_call({ping}, _From, S) ->
|
||||
{reply, pong, S};
|
||||
handle_call({set_chain_members, MembersDict}, _From, S) ->
|
||||
{Reply, S2} = do_set_chain_members(MembersDict, S),
|
||||
{reply, Reply, S2};
|
||||
handle_call({stop}, _From, S) ->
|
||||
{stop, normal, ok, S};
|
||||
handle_call({test_calc_projection, KeepRunenvP}, _From,
|
||||
|
@ -311,7 +311,7 @@ cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP,
|
|||
Else when SkipLocalWriteErrorP ->
|
||||
{XX, SS} = Continue(),
|
||||
{{local_write_result, Else, XX}, SS};
|
||||
Else when Else == error_written; Else == timeout; Else == t_timeout ->
|
||||
Else ->
|
||||
{Else, S2}
|
||||
end.
|
||||
|
||||
|
@ -350,13 +350,16 @@ read_latest_projection_call_only(ProjectionType, AllHosed,
|
|||
|
||||
{_UpNodes, Partitions, S2} = calc_up_nodes(S),
|
||||
DoIt = fun(Pid) ->
|
||||
case ?FLU_PC:read_latest_projection(Pid, ProjectionType, ?TO) of
|
||||
case (?FLU_PC:read_latest_projection(Pid, ProjectionType, ?TO)) of
|
||||
{ok, P} -> P;
|
||||
Else -> Else
|
||||
end
|
||||
end,
|
||||
%% io:format(user, "All_queried_list ~p\n", [All_queried_list]),
|
||||
Rs = [perhaps_call_t(S, Partitions, FLU, fun(Pid) -> DoIt(Pid) end) ||
|
||||
FLU <- All_queried_list],
|
||||
%% Rs = [perhaps_call_t(S, Partitions, FLU, fun(Pid) -> DoIt(Pid) end) ||
|
||||
%% FLU <- All_queried_list],
|
||||
FLUsRs = lists:zip(All_queried_list, Rs),
|
||||
{All_queried_list, FLUsRs, S2}.
|
||||
|
||||
|
@ -555,15 +558,17 @@ calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
|
|||
{UpNodes, Partitions, S#ch_mgr{runenv=RunEnv2}}.
|
||||
|
||||
calc_up_nodes(MyName, AllMembers, RunEnv1) ->
|
||||
{Partitions2, Islands2} =
|
||||
case proplists:get_value(use_partition_simulator, RunEnv1) of
|
||||
true ->
|
||||
machi_partition_simulator:get(AllMembers);
|
||||
false ->
|
||||
{[], [AllMembers]}
|
||||
end,
|
||||
catch ?REACT({partitions,Partitions2}),
|
||||
catch ?REACT({islands,Islands2}),
|
||||
case proplists:get_value(use_partition_simulator, RunEnv1) of
|
||||
true ->
|
||||
calc_up_nodes_sim(MyName, AllMembers, RunEnv1);
|
||||
false ->
|
||||
{AllMembers -- get(remember_partition_hack), [], RunEnv1}
|
||||
end.
|
||||
|
||||
calc_up_nodes_sim(MyName, AllMembers, RunEnv1) ->
|
||||
{Partitions2, Islands2} = machi_partition_simulator:get(AllMembers),
|
||||
catch ?REACT({calc_up_nodes,?LINE,[{partitions,Partitions2},
|
||||
{islands,Islands2}]}),
|
||||
UpNodes = lists:sort(
|
||||
[Node || Node <- AllMembers,
|
||||
not lists:member({MyName, Node}, Partitions2),
|
||||
|
@ -633,6 +638,23 @@ rank_projection(#projection_v1{author_server=Author,
|
|||
( N * length(Repairing_list)) +
|
||||
(N*N * length(UPI_list)).
|
||||
|
||||
do_set_chain_members(MembersDict,
|
||||
#ch_mgr{name=MyName, proxies_dict=OldProxiesDict}=S) ->
|
||||
catch orddict:fold(
|
||||
fun(_K, Pid, _Acc) ->
|
||||
_ = (catch ?FLU_PC:quit(Pid))
|
||||
end, [], OldProxiesDict),
|
||||
All_list = [P#p_srvr.name || {_, P} <- orddict:to_list(MembersDict)],
|
||||
NoneProj = make_none_projection(MyName, All_list, MembersDict),
|
||||
Proxies = orddict:fold(
|
||||
fun(K, P, Acc) ->
|
||||
{ok, Pid} = ?FLU_PC:start_link(P),
|
||||
[{K, Pid}|Acc]
|
||||
end, [], MembersDict),
|
||||
{ok, S#ch_mgr{proj=NoneProj,
|
||||
members_dict=MembersDict,
|
||||
proxies_dict=orddict:from_list(Proxies)}}.
|
||||
|
||||
do_react_to_env(#ch_mgr{proj=#projection_v1{members_dict=[]}}=S) ->
|
||||
{empty_members_dict, S};
|
||||
do_react_to_env(S) ->
|
||||
|
@ -645,15 +667,15 @@ react_to_env_A10(S) ->
|
|||
|
||||
react_to_env_A20(Retries, S) ->
|
||||
?REACT(a20),
|
||||
init_remember_partition_hack(),
|
||||
{UnanimousTag, P_latest, ReadExtra, S2} =
|
||||
do_cl_read_latest_public_projection(true, S),
|
||||
|
||||
%% The UnanimousTag isn't quite sufficient for our needs. We need
|
||||
%% to determine if *all* of the UPI+Repairing FLUs are members of
|
||||
%% the unanimous server replies.
|
||||
io:format(user, "\nReact ~P\n", [lists:reverse(get(react)), 10]),
|
||||
io:format(user, "\nReadExtra ~p\n", [ReadExtra]),
|
||||
io:format(user, "\nP_latest ~p\n", [P_latest]),
|
||||
%% the unanimous server replies. All Repairing FLUs should be up
|
||||
%% now (because if they aren't then they cannot be repairing), so
|
||||
%% all Repairing FLUs have no non-race excuse not to be in UnanimousFLUs.
|
||||
UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)),
|
||||
UPI_Repairing_FLUs = lists:sort(P_latest#projection_v1.upi ++
|
||||
P_latest#projection_v1.repairing),
|
||||
|
@ -687,9 +709,6 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
|
|||
?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}),
|
||||
|
||||
%% Are we flapping yet?
|
||||
io:format(user, "React 2 ~P\n", [lists:reverse(get(react)), 109999]),
|
||||
io:format(user, "NewProp1 ~p\n", [P_newprop1]),
|
||||
io:format(user, "Current ~p\n", [P_current]),
|
||||
{P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2),
|
||||
|
||||
%% Move the epoch number up ... originally done in C300.
|
||||
|
@ -1166,7 +1185,10 @@ react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) ->
|
|||
P_latest2 = machi_projection:update_dbg2(P_latest, Extra_todo),
|
||||
|
||||
MyNamePid = proxy_pid(MyName, S),
|
||||
ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO),
|
||||
%% This is the local projection store. Use a larger timeout, so
|
||||
%% that things locally are pretty horrible if we're killed by a
|
||||
%% timeout exception.
|
||||
ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO*30),
|
||||
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
||||
true ->
|
||||
{_,_,C} = os:timestamp(),
|
||||
|
@ -1262,7 +1284,7 @@ calculate_flaps(P_newprop, _P_current, _FlapLimit,
|
|||
P#projection_v1.repairing,
|
||||
P#projection_v1.down} || P <- Ps]),
|
||||
|
||||
QQQ =
|
||||
_QQQ =
|
||||
{_WhateverUnanimous, BestP, Props, _S} =
|
||||
cl_read_latest_projection(private, S),
|
||||
NotBestPs = proplists:get_value(not_unanimous_answers, Props, []),
|
||||
|
@ -1708,8 +1730,6 @@ merge_flap_counts([FlapCount|Rest], D1) ->
|
|||
end, D1, D2),
|
||||
merge_flap_counts(Rest, D3).
|
||||
|
||||
%% proxy_pid(Name, #ch_mgr{proxies_dict=[]}) ->
|
||||
%% throw(empty_proxies_dict);
|
||||
proxy_pid(Name, #ch_mgr{proxies_dict=ProxiesDict}) ->
|
||||
orddict:fetch(Name, ProxiesDict).
|
||||
|
||||
|
@ -1748,16 +1768,23 @@ perhaps_call_t(S, Partitions, FLU, DoIt) ->
|
|||
perhaps_call(S, Partitions, FLU, DoIt)
|
||||
catch
|
||||
exit:timeout ->
|
||||
t_timeout
|
||||
{error, partition};
|
||||
exit:{timeout,_} ->
|
||||
{error, partition}
|
||||
end.
|
||||
|
||||
perhaps_call(#ch_mgr{name=MyName}=S, Partitions, FLU, DoIt) ->
|
||||
ProxyPid = proxy_pid(FLU, S),
|
||||
RemoteFLU_p = FLU /= MyName,
|
||||
try
|
||||
erase(bad_sock),
|
||||
case RemoteFLU_p andalso lists:member({MyName, FLU}, Partitions) of
|
||||
false ->
|
||||
Res = DoIt(ProxyPid),
|
||||
if Res == {error, partition} ->
|
||||
remember_partition_hack(FLU);
|
||||
true ->
|
||||
ok
|
||||
end,
|
||||
case RemoteFLU_p andalso lists:member({FLU, MyName}, Partitions) of
|
||||
false ->
|
||||
Res;
|
||||
|
@ -1768,10 +1795,12 @@ perhaps_call(#ch_mgr{name=MyName}=S, Partitions, FLU, DoIt) ->
|
|||
_ ->
|
||||
(catch put(react, [{timeout1,me,MyName,to,FLU,RemoteFLU_p,Partitions}|get(react)])),
|
||||
exit(timeout)
|
||||
end
|
||||
catch throw:empty_proxies_dict ->
|
||||
asdflkjweoiasd
|
||||
end.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
init_remember_partition_hack() ->
|
||||
put(remember_partition_hack, []).
|
||||
|
||||
remember_partition_hack(FLU) ->
|
||||
put(remember_partition_hack, [FLU|get(remember_partition_hack)]).
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
|
@ -250,7 +250,7 @@ do_req(Req, S) ->
|
|||
end
|
||||
end;
|
||||
false ->
|
||||
{{error, not_connected}, S2}
|
||||
{{error, partition}, S2}
|
||||
end.
|
||||
|
||||
make_req_fun({append_chunk, EpochID, Prefix, Chunk}, #state{sock=Sock}) ->
|
||||
|
|
|
@ -24,17 +24,37 @@
|
|||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
smoke_test() ->
|
||||
[os:cmd("rm -rf " ++ X) || X <- ["./data.a", "./data.b", "/data.c"] ],
|
||||
-include("machi_projection.hrl").
|
||||
|
||||
%% smoke_test2() will try repeatedly to make a TCP connection to ports
|
||||
%% on localhost that have no listener.
|
||||
%% If you use 'sysctl -w net.inet.icmp.icmplim=3' before running this
|
||||
%% test, you'll get to exercise some timeout handling in
|
||||
%% machi_chain_manager1:perhaps_call_t().
|
||||
%% The default for net.inet.icmp.icmplim is 50.
|
||||
|
||||
smoke_test_() ->
|
||||
{timeout, 5*60, fun() -> smoke_test2() end}.
|
||||
|
||||
smoke_test2() ->
|
||||
Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}},
|
||||
{b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}},
|
||||
{c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}}
|
||||
],
|
||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
||||
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||
try
|
||||
{ok, _} = machi_flu_psup:start_flu_package(a, 5555, "./data.a",
|
||||
[{active_mode,false}]),
|
||||
{ok, _} = machi_flu_psup:start_flu_package(b, 5556, "./data.b",
|
||||
[{active_mode,false}]),
|
||||
{ok, _} = machi_flu_psup:start_flu_package(c, 5557, "./data.c",
|
||||
[{active_mode,false}]),
|
||||
[begin
|
||||
#p_srvr{name=Name, port=Port, props=Dir} = P,
|
||||
{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, [])
|
||||
end || {_,P} <- [hd(Ps)]],
|
||||
%% end || {_,P} <- Ps],
|
||||
|
||||
[begin
|
||||
_QQ = machi_chain_manager1:test_react_to_env(a_chmgr),
|
||||
ok
|
||||
end || _ <- lists:seq(1,5)],
|
||||
machi_chain_manager1:set_chain_members(a_chmgr, orddict:from_list(Ps)),
|
||||
[begin
|
||||
_QQ = machi_chain_manager1:test_react_to_env(a_chmgr),
|
||||
ok
|
||||
|
@ -42,6 +62,7 @@ smoke_test() ->
|
|||
ok
|
||||
after
|
||||
exit(SupPid, normal),
|
||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
||||
machi_util:wait_for_death(SupPid, 100),
|
||||
ok
|
||||
end.
|
||||
|
|
|
@ -49,7 +49,7 @@ api_smoke_test() ->
|
|||
{error,_} = ?MUT:append_chunk(Prox1,
|
||||
FakeEpoch, <<"prefix">>, <<"data">>,
|
||||
infinity),
|
||||
{error,not_connected} = ?MUT:append_chunk(Prox1,
|
||||
{error,partition} = ?MUT:append_chunk(Prox1,
|
||||
FakeEpoch, <<"prefix">>, <<"data">>,
|
||||
infinity),
|
||||
%% Start the FLU again, we should be able to do stuff immediately
|
||||
|
|
Loading…
Reference in a new issue