diff --git a/test/machi_ap_repair_eqc.erl b/test/machi_ap_repair_eqc.erl index 85bb697..e9d576e 100644 --- a/test/machi_ap_repair_eqc.erl +++ b/test/machi_ap_repair_eqc.erl @@ -47,6 +47,8 @@ %% - Operations other than append, write, trim %% - Use checksum instead of binary to save memory %% - More variety for partitioning pattern: non-constant failure +%% - Stop and restart +%% - Suspend and resume of some erlang processes -module(machi_ap_repair_eqc). @@ -60,6 +62,16 @@ -include_lib("eqc/include/eqc_statem.hrl"). -include_lib("eunit/include/eunit.hrl"). +-record(target, {verbose=false, + flu_names, + mgr_names}). + +-record(state, {num, + verbose=false, + flu_names, + mgr_names, + cr_count}). + %% ETS table names -define(WRITTEN_TAB, written). % Successfully written data -define(ACCPT_TAB, accpt). % Errors with no harm, e.g. timeout @@ -91,29 +103,19 @@ prop_repair_par_test_() -> eqc:quickcheck(eqc:testing_time( PropTO, ?QC_OUT(noshrink(prop_repair_par(Verbose))))))}]}. -%% SHELL HELPERS -test() -> test(100). -test(N) -> test(N, true). -test(N, Verbose) -> quickcheck(numtests(N, noshrink(prop_repair_par(Verbose)))). -check() -> check(prop_repair_par(true), current_counterexample()). - --record(state, {num, % Number of FLU servers - seed, % Seed for partition simulator - verbose=false, % Verbose output for debugging - flu_names, % List of FLU names - mgr_names, % List of chain manager names - fc_list, % List of FLU1 proxy clients - cr_list}). % List of CR clients +%% Model weight(_S, change_partition) -> 20; weight(_S, _) -> 100. -%% append +%% Append -append_args(#state{cr_list=CRList}=S) -> - [elements(CRList), chunk(), S]. +append_args(#state{cr_count=CRCount}=S) -> + [choose(1, CRCount), chunk(), S]. -append({_SimSelfName, C}, Bin, #state{verbose=V}=S) -> +append(CRIndex, Bin, #state{verbose=V}=S) -> + CRList = cr_list(), + {_SimSelfName, C} = lists:nth(CRIndex, CRList), Prefix = <<"pre">>, Len = byte_size(Bin), Res = (catch machi_cr_client:append_chunk(C, Prefix, Bin, {sec(1), sec(1)})), @@ -171,7 +173,7 @@ append({_SimSelfName, C}, Bin, #state{verbose=V}=S) -> {other_error, Other} end. -%% change partition +%% Change partition change_partition_args(#state{flu_names=FLUNames}=S) -> %% [partition(FLUNames), S]. @@ -194,7 +196,22 @@ change_partition(Partition, num() -> choose(2, 5). - %% return(3). + +cr_count(Num) -> + Num * 3. + +%% Returns a list like +%% `[{#p_srvr{name=a, port=7501, ..}, "./eqc/data.eqc.a/"}, ...]' +all_list_extra(Num) -> + {PortBase, DirBase} = get_port_dir_base(), + [begin + FLUNameStr = [$a + I - 1], + FLUName = list_to_atom(FLUNameStr), + MgrName = machi_flu_psup:make_mgr_supname(FLUName), + {#p_srvr{name=FLUName, address="localhost", port=PortBase+I, + props=[{chmgr, MgrName}]}, + DirBase ++ "/data.eqc." ++ FLUNameStr} + end || I <- lists:seq(1, Num)]. %% Generator for possibly assymmetric partition information partition(FLUNames) -> @@ -216,7 +233,7 @@ flu_pairs(FLUNames) -> chunk() -> non_empty(binary(10)). -%% Property +%% Properties prop_repair(Verbose) -> error_logger:tty(false), @@ -225,17 +242,17 @@ prop_repair(Verbose) -> Seed = {1445,935441,287549}, ?FORALL(Num, num(), - ?FORALL(Cmds, commands(?MODULE, initial_state(Num, Seed, Verbose)), + ?FORALL(Cmds, commands(?MODULE, initial_state(Num, Verbose)), begin - SetupState = setup_chain(Num, Seed, Verbose), - {H, S1, Res} = run_commands(?MODULE, Cmds), + Target = setup_target(Num, Seed, Verbose), + {H, S1, Res0} = run_commands(?MODULE, Cmds), %% ?V("S1=~w~n", [S1]), ?V("==== Start post operations, stabilize and confirm results~n", []), - {_Res2, S2} = stabilize(commands_len(Cmds), SetupState), - {Dataloss, Critical} = confirm_result(S2), - _ = cleanup(SetupState), + _ = stabilize(commands_len(Cmds), Target), + {Dataloss, Critical} = confirm_result(Target), + _ = cleanup(Target), pretty_commands( - ?MODULE, Cmds, {H, S1, Res}, + ?MODULE, Cmds, {H, S1, Res0}, aggregate(with_title(cmds), command_names(Cmds), collect(with_title(length5), (length(Cmds) div 5) * 5, {Dataloss, Critical} =:= {0, 0}))) @@ -251,35 +268,35 @@ prop_repair_par(Verbose) -> ?FORALL(Cmds, %% Now try-and-err'ing, how to control command length and concurrency? ?SUCHTHAT(Cmds0, ?SIZED(Size, resize(Size, - parallel_commands(?MODULE, initial_state(Num, Seed, Verbose)))), + parallel_commands(?MODULE, initial_state(Num, Verbose)))), commands_len(Cmds0) > 20 andalso concurrency(Cmds0) > 2), begin CmdsLen= commands_len(Cmds), - SetupState = setup_chain(Num, Seed, Verbose), - {Seq, Par, Res} = run_parallel_commands(?MODULE, Cmds), + Target = setup_target(Num, Seed, Verbose), + {Seq, Par, Res0} = run_parallel_commands(?MODULE, Cmds), %% ?V("Seq=~w~n", [Seq]), %% ?V("Par=~w~n", [Par]), ?V("==== Start post operations, stabilize and confirm results~n", []), - {Dataloss, Critical} = - case Res of + {FinalRes, {Dataloss, Critical}} = + case Res0 of ok -> - {_Res2, S2} = stabilize(CmdsLen, SetupState), - confirm_result(S2); + Res1 = stabilize(CmdsLen, Target), + {Res1, confirm_result(Target)}; _ -> - ?V("Res=~w~n", [Res]), - {undefined, undefined} + ?V("Res0=~w~n", [Res0]), + {Res0, {undefined, undefined}} end, - _ = cleanup(SetupState), + _ = cleanup(Target), %% Process is leaking? This log line can be removed after fix. - ?V("process_count=~w~n", [erlang:system_info(process_count)]), + [?V("process_count=~w~n", [erlang:system_info(process_count)]) || Verbose], pretty_commands( - ?MODULE, Cmds, {Seq, Par, Res}, + ?MODULE, Cmds, {Seq, Par, Res0}, aggregate(with_title(cmds), command_names(Cmds), collect(with_title(length5), (CmdsLen div 5) * 5, collect(with_title(conc), concurrency(Cmds), - {Dataloss, Critical} =:= {0, 0}))) + {FinalRes, {Dataloss, Critical}} =:= {ok, {0, 0}}))) ) end)). @@ -289,41 +306,35 @@ prop_repair_par(Verbose) -> %% > eqc_gen:sample(eqc_statem:commands(machi_ap_repair_eqc)). %% but not so helpful. initial_state() -> - #state{cr_list=[a,b,c]}. + #state{cr_count=3}. -initial_state(Num, Seed, Verbose) -> +initial_state(Num, Verbose) -> AllListE = all_list_extra(Num), FLUNames = [P#p_srvr.name || {P, _Dir} <- AllListE], MgrNames = [{Name, machi_flu_psup:make_mgr_supname(Name)} || Name <- FLUNames], - Dict = orddict:from_list([{P#p_srvr.name, P} || {P, _Dir} <- AllListE]), - - FCList = [begin - {ok, PCPid} = machi_proxy_flu1_client:start_link(P), - {Name, PCPid} - end || {_, #p_srvr{name=Name}=P} <- Dict], - %% CR clients are pooled, each has "name" which is interpreted "From" - %% side of simulated partition. - CRListCount = 10, % ad-hoc - SimSelfNames = lists:append(lists:duplicate(CRListCount div Num +1, FLUNames)), - CRList = [begin - {ok, C} = machi_cr_client:start_link( - [P || {_, P} <- Dict], - [{use_partition_simulator, true}, - {simulator_self_name, SimSelfName}, - {simulator_members, FLUNames}]), - {SimSelfName, C} - end || SimSelfName <- SimSelfNames], - #state{num=Num, seed=Seed, verbose=Verbose, + #state{num=Num, verbose=Verbose, flu_names=FLUNames, mgr_names=MgrNames, - cr_list=CRList, fc_list=FCList}. + cr_count=cr_count(Num)}. -setup_chain(Num, Seed, Verbose) -> - %% ?V("setup_chain(Num=~w, Seed=~w~nn", [Num, Seed]), +setup_target(Num, Seed, Verbose) -> + %% ?V("setup_target(Num=~w, Seed=~w~nn", [Num, Seed]), AllListE = all_list_extra(Num), FLUNames = [P#p_srvr.name || {P, _Dir} <- AllListE], MgrNames = [{Name, machi_flu_psup:make_mgr_supname(Name)} || Name <- FLUNames], Dict = orddict:from_list([{P#p_srvr.name, P} || {P, _Dir} <- AllListE]), + setup_chain(Seed, AllListE, FLUNames, MgrNames, Dict), + _ = setup_cpool(AllListE, FLUNames, Dict), + + Target = #target{flu_names=FLUNames, mgr_names=MgrNames, + verbose=Verbose}, + %% Don't wait for complete chain. Even partialy completed, the chain + %% should work fine. Right? + wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames, + 20, Verbose), + Target. + +setup_chain(Seed, AllListE, FLUNames, MgrNames, Dict) -> ok = shutdown_hard(), [begin machi_flu1_test:clean_up_data_dir(Dir), @@ -351,29 +362,55 @@ setup_chain(Num, Seed, Verbose) -> [{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, FLUOpts) || {#p_srvr{name=Name, port=Port}, Dir} <- AllListE], [machi_chain_manager1:set_chain_members(MgrName, Dict) || {_, MgrName} <- MgrNames], + ok. - State = initial_state(Num, Seed, Verbose), - %% Don't wait for complete chain. Even partialy completed, the chain - %% should work fine. Right? - wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames, - State#state.fc_list, 20, Verbose), - State. +setup_cpool(AllListE, FLUNames, Dict) -> + Num = length(AllListE), + FCList = [begin + {ok, PCPid} = machi_proxy_flu1_client:start_link(P), + {Name, PCPid} + end || {_, #p_srvr{name=Name}=P} <- Dict], + %% CR clients are pooled, each has "name" which is interpreted "From" + %% side of simulated partition. + SimSelfNames = lists:append(lists:duplicate(cr_count(Num), FLUNames)), + CRList = [begin + {ok, C} = machi_cr_client:start_link( + [P || {_, P} <- Dict], + [{use_partition_simulator, true}, + {simulator_self_name, SimSelfName}, + {simulator_members, FLUNames}]), + {SimSelfName, C} + end || SimSelfName <- SimSelfNames], + catch ets:delete(cpool), + ets:new(cpool, [set, protected, named_table, {read_concurrency, true}]), + ets:insert(cpool, {fc_list, FCList}), + ets:insert(cpool, {cr_list, CRList}), + {CRList, FCList}. -%% Post commands +fc_list() -> + [{fc_list, FCList}] = ets:lookup(cpool, fc_list), + FCList. -stabilize(0, S) -> - {ok, S}; -stabilize(_CmdsLen, #state{flu_names=FLUNames, mgr_names=MgrNames, - fc_list=FCList, verbose=Verbose}=S) -> +cr_list() -> + [{cr_list, CRList}] = ets:lookup(cpool, cr_list), + CRList. + +%% Post run_commands + +stabilize(0, _T) -> + ok; +stabilize(_CmdsLen, #target{flu_names=FLUNames, mgr_names=MgrNames, + verbose=Verbose}) -> machi_partition_simulator:no_partitions(), wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames, - FCList, 100, Verbose), - {ok, S}. + 100, Verbose), + ok. chain_state_all_ok(FLUNames) -> [{FLUName, {FLUNames, [], []}} || FLUName <- FLUNames]. -confirm_result(#state{cr_list=[{_, C}|_]}=_S) -> +confirm_result(_T) -> + [{_, C} | _] = cr_list(), [{written, _Written}, {accpt, Accpt}, {failed, Failed}, {critical, Critical}] = tab_counts(), {OK, Dataloss} = confirm_written(C), @@ -426,12 +463,12 @@ assert_chunk(C, {Off, Len, FileName}=Key, Bin) -> {error, Other} end. -cleanup(#state{fc_list=FCList, cr_list=CRList}=_S) -> - [catch machi_proxy_flu1_client:quit(FC) || FC <- FCList], - [catch machi_cr_client:quit(CR) || CR <- CRList], +cleanup(_Target) -> + [begin unlink(FC), catch exit(FC, kill) end || {_, FC} <- fc_list()], + [begin unlink(CR), catch exit(CR, kill) end || {_, CR} <- cr_list()], _ = shutdown_hard(). -%% Internal utilities +%% Internal misc utilities eqc_verbose() -> os:getenv("EQC_VERBOSE") =:= "true". @@ -443,19 +480,6 @@ eqc_timeout(Default) -> end, {PropTimeout, PropTimeout * 300}. -%% Returns a list like -%% `[{#p_srvr{name=a, port=7501, ..}, "./eqc/data.eqc.a/"}, ...]' -all_list_extra(Num) -> - {PortBase, DirBase} = get_port_dir_base(), - [begin - FLUNameStr = [$a + I - 1], - FLUName = list_to_atom(FLUNameStr), - MgrName = machi_flu_psup:make_mgr_supname(FLUName), - {#p_srvr{name=FLUName, address="localhost", port=PortBase+I, - props=[{chmgr, MgrName}]}, - DirBase ++ "/data.eqc." ++ FLUNameStr} - end || I <- lists:seq(1, Num)]. - get_port_dir_base() -> I = case os:getenv("EQC_BASE_PORT") of false -> 0; @@ -475,15 +499,16 @@ shutdown_hard() -> timer:sleep(100). tick(#state{flu_names=FLUNames, mgr_names=MgrNames, - fc_list=FCList, verbose=Verbose}) -> - tick(FLUNames, MgrNames, FCList, Verbose). + verbose=Verbose}) -> + tick(FLUNames, MgrNames, Verbose). -tick(FLUNames, MgrNames, FCList, Verbose) -> - tick(FLUNames, MgrNames, 2, 100, FCList, Verbose). +tick(FLUNames, MgrNames, Verbose) -> + tick(FLUNames, MgrNames, 2, 100, Verbose). -tick(FLUNames, MgrNames, Iter, SleepMax, FCList, Verbose) -> +tick(FLUNames, MgrNames, Iter, SleepMax, Verbose) -> TickFun = tick_fun(FLUNames, MgrNames, self()), TickFun(Iter, 0, SleepMax), + FCList = fc_list(), [?V("## Chain state after tick()=~w~n", [chain_state(FCList)]) || Verbose]. tick_fun(FLUNames, MgrNames, Parent) -> @@ -516,11 +541,12 @@ tick_fun(FLUNames, MgrNames, Parent) -> end || {ThePid, M_name} <- Pids] end. -wait_until_stable(ExpectedChainState, FLUNames, MgrNames, FCList, Verbose) -> - wait_until_stable(ExpectedChainState, FLUNames, MgrNames, FCList, 20, Verbose). +wait_until_stable(ExpectedChainState, FLUNames, MgrNames, Verbose) -> + wait_until_stable(ExpectedChainState, FLUNames, MgrNames, 20, Verbose). -wait_until_stable(ExpectedChainState, FLUNames, MgrNames, FCList, Retries, Verbose) -> +wait_until_stable(ExpectedChainState, FLUNames, MgrNames, Retries, Verbose) -> TickFun = tick_fun(FLUNames, MgrNames, self()), + FCList = fc_list(), wait_until_stable1(ExpectedChainState, TickFun, FCList, Retries, Verbose). wait_until_stable1(_ExpectedChainState, _TickFun, FCList, 0, _Verbose) ->