First part of larger sanity test is now prototyped.

This is some brute-force-and-not-subtle hackery, but it looks like I've
got the basis for a test that a model checker (QuickCheck or Concuerror
or something else) can use for a good/bad check.

The following properties are examined (but not enforced):

* At each epoch, are each of the chains disjoint?  I.e. no single FLU
  is a member of different chains at the same epoch.

  This is a safety/sanity check.

* For each unique chain UPI list at each epoch, are all of the FLUs in that
  chain unanimous in their agreement:
    agreed_membership: all UPI FLUs agree about the UPI list
    not_agreed: the membership algorithm has not yet agreed on
                the UPI list

  This is not a safety/sanity check per se, but it can be useful input
  into a good safety check.

Some examples:

* At epoch 0, there is no agreement on UPI membership of the one [a,b,c]
  chain.
* At epoch 1, there is full agreement,
* At epoch 4, we're back to no agreement.
* At epoch 17, there's agreement on a small chain with UPI list=[a].
  (This agreement continues until epoch 216, but that history is omitted
  here.)

   [{0,
     {ok_disjoint,[{[a,b,c],
                    not_unique,0,
                    [<<159,215,105,140,29,151,142,2,162,90,225,209,10,102,119,
                       193,110,72,75,245>>,
                     <<213,46,129,248,23,50,210,247,145,68,65,112,232,101,28,56,
                       239,12,78,227>>,
                     <<230,146,66,183,10,218,57,29,233,166,108,176,118,109,
                       226,186,190,56,174,108>>]}]}},
    {1,{ok_disjoint,[{agreed_membership,[a,b,c]}]}},
    {4,
     {ok_disjoint,[{not_unique,[a,b,c],
                               [not_in_this_epoch,
                                <<208,227,221,233,254,160,36,134,252,106,
                                  124,192,101,171,168,68,169,55,2,54>>]}]}},
    {6,
     {ok_disjoint,[{not_unique,[a,b,c],
                               [not_in_this_epoch,
                                <<191,47,203,143,195,230,71,162,39,132,188,
                                  128,64,39,18,9,73,148,207,220>>]}]}},
    {17,{ok_disjoint,[{agreed_membership,[a]}]}},
    {24,{ok_disjoint,[{agreed_membership,[a]}]}},
    [...]

Starting at epoch 419, the network stabilized, but not fully,
into two "islands" of servers, a alone and b&c together.
At epoch 486, the network is fully stabilized with the same network
partition.  We see rapid convergence to two chains, [a] and [b,c].

    {419,{ok_disjoint,[{agreed_membership,[a]}]}},
    {425,{ok_disjoint,[{agreed_membership,[b]}]}},
    {436,{ok_disjoint,[{agreed_membership,[b]}]}},
    {442,{ok_disjoint,[{agreed_membership,[b]}]}},
    {444,{ok_disjoint,[{agreed_membership,[b]}]}},
    {454,{ok_disjoint,[{agreed_membership,[b]}]}},
    {456,{ok_disjoint,[{agreed_membership,[b]}]}},
    {458,{ok_disjoint,[{agreed_membership,[b]}]}},
    {463,{ok_disjoint,[{agreed_membership,[b]}]}},
    {468,{ok_disjoint,[{agreed_membership,[b]}]}},
    {479,{ok_disjoint,[{agreed_membership,[b]}]}},
    {482,{ok_disjoint,[{agreed_membership,[b]}]}},
    {486,{ok_disjoint,[{agreed_membership,[a]}]}},
    {488,{ok_disjoint,[{agreed_membership,[b]}]}},
    {490,{ok_disjoint,[{agreed_membership,[b,c]}]}},
    {492,{ok_disjoint,[{agreed_membership,[b,c]}]}}]

foo
This commit is contained in:
Scott Lystig Fritchie 2014-11-04 21:25:35 +09:00
parent 4def1ad026
commit 32cfcccf34

View file

@ -189,14 +189,14 @@ zoof_test() ->
DoIt(),
machi_partition_simulator:reset_thresholds(999, 0),
[DoIt() || _ <- [1,2,3]],
%% TODO: We should be stable now ... analyze it.
machi_partition_simulator:reset_thresholds(10, 50),
DoIt(),
%% [begin
%% La = machi_flu0:proj_list_all(FLU, Type),
%% [io:format(user, "~p ~p ~p: ~w\n", [FLUName, Type, Epoch, make_projection_summary(catch element(2,machi_flu0:proj_read(FLU, Epoch, Type)))]) || Epoch <- La]
%% end || {FLUName, FLU} <- [{a, FLUa}, {b, FLUb}],
%% Type <- [public, private] ],
machi_partition_simulator:reset_thresholds(999, 0),
[DoIt() || _ <- [1,2,3]],
%% TODO: We should be stable now ... analyze it.
%% Dump the public
[begin
@ -232,6 +232,9 @@ zoof_test() ->
if FLUName == c -> io:format(user, "\n", []); true -> ok end
end || Epoch <- UniquePrivateEs, {FLUName, FLU} <- Namez],
R = unanimous_report(Namez),
?D(R),
ok
after
ok = ?MGR:stop(Ma),
@ -241,4 +244,60 @@ zoof_test() ->
ok = machi_partition_simulator:stop()
end.
unanimous_report(Namez) ->
UniquePrivateEs =
lists:usort(lists:flatten(
[machi_flu0:proj_list_all(FLU, private) ||
{_FLUName, FLU} <- Namez])),
[unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
unanimous_report(Epoch, Namez) ->
%% AllFLUNames = lists:sort([FLU || {FLUName, _FLU} <- Namez}
%% Projs = lists:append([case machi_flu0:proj_read(FLU, Epoch, private) of
%% {ok, T} -> [T];
%% _Else -> []
%% end || {_FLUName, FLU} <- Namez]),
%% UPI_Sums = [{Proj#projection.upi, Proj#projection.epoch_csum} ||
%% Proj <- Projs],
Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
{ok, T} -> T;
_Else -> not_in_this_epoch
end} || {FLUName, FLU} <- Namez],
UPI_Sums = [{Proj#projection.upi, Proj#projection.epoch_csum} ||
{_FLUname, Proj} <- Projs,
is_record(Proj, projection)],
UniqueUPIs = lists:usort([UPI || {UPI, _CSum} <- UPI_Sums]),
Res =
[begin
case lists:usort([CSum || {U, CSum} <- UPI_Sums, U == UPI]) of
[_] ->
%% Yay, there's only 1 checksum. Let's check
%% that all FLUs are in agreement.
Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
P when is_record(P, projection) ->
P#projection.epoch_csum;
Else ->
Else
end} || FLU <- UPI],
case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
[_] ->
{agreed_membership, UPI};
Else2 ->
{not_agreed, UPI, Else2}
end;
_Else ->
{UPI, not_unique, Epoch, _Else}
end
end || UPI <- UniqueUPIs],
UniqueResUPIs = [UPI || {unique, UPI} <- Res],
Tag = case lists:usort(lists:flatten(UniqueResUPIs)) ==
lists:sort(lists:flatten(UniqueResUPIs)) of
true ->
ok_disjoint;
false ->
bummer_NOT_DISJOINT
end,
{Epoch, {Tag, Res}}.
-endif.