From 32cfcccf34f1d89159dc027b322642ff225242a7 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 4 Nov 2014 21:25:35 +0900 Subject: [PATCH] First part of larger sanity test is now prototyped. This is some brute-force-and-not-subtle hackery, but it looks like I've got the basis for a test that a model checker (QuickCheck or Concuerror or something else) can use for a good/bad check. The following properties are examined (but not enforced): * At each epoch, are each of the chains disjoint? I.e. no single FLU is a member of different chains at the same epoch. This is a safety/sanity check. * For each unique chain UPI list at each epoch, are all of the FLUs in that chain unanimous in their agreement: agreed_membership: all UPI FLUs agree about the UPI list not_agreed: the membership algorithm has not yet agreed on the UPI list This is not a safety/sanity check per se, but it can be useful input into a good safety check. Some examples: * At epoch 0, there is no agreement on UPI membership of the one [a,b,c] chain. * At epoch 1, there is full agreement, * At epoch 4, we're back to no agreement. * At epoch 17, there's agreement on a small chain with UPI list=[a]. (This agreement continues until epoch 216, but that history is omitted here.) [{0, {ok_disjoint,[{[a,b,c], not_unique,0, [<<159,215,105,140,29,151,142,2,162,90,225,209,10,102,119, 193,110,72,75,245>>, <<213,46,129,248,23,50,210,247,145,68,65,112,232,101,28,56, 239,12,78,227>>, <<230,146,66,183,10,218,57,29,233,166,108,176,118,109, 226,186,190,56,174,108>>]}]}}, {1,{ok_disjoint,[{agreed_membership,[a,b,c]}]}}, {4, {ok_disjoint,[{not_unique,[a,b,c], [not_in_this_epoch, <<208,227,221,233,254,160,36,134,252,106, 124,192,101,171,168,68,169,55,2,54>>]}]}}, {6, {ok_disjoint,[{not_unique,[a,b,c], [not_in_this_epoch, <<191,47,203,143,195,230,71,162,39,132,188, 128,64,39,18,9,73,148,207,220>>]}]}}, {17,{ok_disjoint,[{agreed_membership,[a]}]}}, {24,{ok_disjoint,[{agreed_membership,[a]}]}}, [...] Starting at epoch 419, the network stabilized, but not fully, into two "islands" of servers, a alone and b&c together. At epoch 486, the network is fully stabilized with the same network partition. We see rapid convergence to two chains, [a] and [b,c]. {419,{ok_disjoint,[{agreed_membership,[a]}]}}, {425,{ok_disjoint,[{agreed_membership,[b]}]}}, {436,{ok_disjoint,[{agreed_membership,[b]}]}}, {442,{ok_disjoint,[{agreed_membership,[b]}]}}, {444,{ok_disjoint,[{agreed_membership,[b]}]}}, {454,{ok_disjoint,[{agreed_membership,[b]}]}}, {456,{ok_disjoint,[{agreed_membership,[b]}]}}, {458,{ok_disjoint,[{agreed_membership,[b]}]}}, {463,{ok_disjoint,[{agreed_membership,[b]}]}}, {468,{ok_disjoint,[{agreed_membership,[b]}]}}, {479,{ok_disjoint,[{agreed_membership,[b]}]}}, {482,{ok_disjoint,[{agreed_membership,[b]}]}}, {486,{ok_disjoint,[{agreed_membership,[a]}]}}, {488,{ok_disjoint,[{agreed_membership,[b]}]}}, {490,{ok_disjoint,[{agreed_membership,[b,c]}]}}, {492,{ok_disjoint,[{agreed_membership,[b,c]}]}}] foo --- .../test/machi_chain_manager1_test.erl | 73 +++++++++++++++++-- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/prototype/poc-machi/test/machi_chain_manager1_test.erl b/prototype/poc-machi/test/machi_chain_manager1_test.erl index 77a5e06..ebf5d7b 100644 --- a/prototype/poc-machi/test/machi_chain_manager1_test.erl +++ b/prototype/poc-machi/test/machi_chain_manager1_test.erl @@ -189,14 +189,14 @@ zoof_test() -> DoIt(), machi_partition_simulator:reset_thresholds(999, 0), + [DoIt() || _ <- [1,2,3]], + %% TODO: We should be stable now ... analyze it. + + machi_partition_simulator:reset_thresholds(10, 50), DoIt(), - - - %% [begin - %% La = machi_flu0:proj_list_all(FLU, Type), - %% [io:format(user, "~p ~p ~p: ~w\n", [FLUName, Type, Epoch, make_projection_summary(catch element(2,machi_flu0:proj_read(FLU, Epoch, Type)))]) || Epoch <- La] - %% end || {FLUName, FLU} <- [{a, FLUa}, {b, FLUb}], - %% Type <- [public, private] ], + machi_partition_simulator:reset_thresholds(999, 0), + [DoIt() || _ <- [1,2,3]], + %% TODO: We should be stable now ... analyze it. %% Dump the public [begin @@ -232,6 +232,9 @@ zoof_test() -> if FLUName == c -> io:format(user, "\n", []); true -> ok end end || Epoch <- UniquePrivateEs, {FLUName, FLU} <- Namez], + R = unanimous_report(Namez), + ?D(R), + ok after ok = ?MGR:stop(Ma), @@ -241,4 +244,60 @@ zoof_test() -> ok = machi_partition_simulator:stop() end. +unanimous_report(Namez) -> + UniquePrivateEs = + lists:usort(lists:flatten( + [machi_flu0:proj_list_all(FLU, private) || + {_FLUName, FLU} <- Namez])), + [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs]. + +unanimous_report(Epoch, Namez) -> + %% AllFLUNames = lists:sort([FLU || {FLUName, _FLU} <- Namez} + + %% Projs = lists:append([case machi_flu0:proj_read(FLU, Epoch, private) of + %% {ok, T} -> [T]; + %% _Else -> [] + %% end || {_FLUName, FLU} <- Namez]), + %% UPI_Sums = [{Proj#projection.upi, Proj#projection.epoch_csum} || + %% Proj <- Projs], + Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of + {ok, T} -> T; + _Else -> not_in_this_epoch + end} || {FLUName, FLU} <- Namez], + UPI_Sums = [{Proj#projection.upi, Proj#projection.epoch_csum} || + {_FLUname, Proj} <- Projs, + is_record(Proj, projection)], + UniqueUPIs = lists:usort([UPI || {UPI, _CSum} <- UPI_Sums]), + Res = + [begin + case lists:usort([CSum || {U, CSum} <- UPI_Sums, U == UPI]) of + [_] -> + %% Yay, there's only 1 checksum. Let's check + %% that all FLUs are in agreement. + Tmp = [{FLU, case proplists:get_value(FLU, Projs) of + P when is_record(P, projection) -> + P#projection.epoch_csum; + Else -> + Else + end} || FLU <- UPI], + case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of + [_] -> + {agreed_membership, UPI}; + Else2 -> + {not_agreed, UPI, Else2} + end; + _Else -> + {UPI, not_unique, Epoch, _Else} + end + end || UPI <- UniqueUPIs], + UniqueResUPIs = [UPI || {unique, UPI} <- Res], + Tag = case lists:usort(lists:flatten(UniqueResUPIs)) == + lists:sort(lists:flatten(UniqueResUPIs)) of + true -> + ok_disjoint; + false -> + bummer_NOT_DISJOINT + end, + {Epoch, {Tag, Res}}. + -endif.