From 99bfa2a3b8df8607a7849c05419855ed441589f3 Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Mon, 6 Apr 2015 14:16:20 +0900
Subject: [PATCH 01/22] Import of machi_chain_manager1.erl and friends; tests
broken
---
include/machi_chain_manager.hrl | 41 +
src/machi_chain_manager1.erl | 1547 +++++++++++++++++++++++++++
test/machi_chain_manager1_pulse.erl | 379 +++++++
test/machi_chain_manager1_test.erl | 589 ++++++++++
test/machi_partition_simulator.erl | 239 +++++
5 files changed, 2795 insertions(+)
create mode 100644 include/machi_chain_manager.hrl
create mode 100644 src/machi_chain_manager1.erl
create mode 100644 test/machi_chain_manager1_pulse.erl
create mode 100644 test/machi_chain_manager1_test.erl
create mode 100644 test/machi_partition_simulator.erl
diff --git a/include/machi_chain_manager.hrl b/include/machi_chain_manager.hrl
new file mode 100644
index 0000000..7a100b1
--- /dev/null
+++ b/include/machi_chain_manager.hrl
@@ -0,0 +1,41 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-define(NOT_FLAPPING, {0,0,0}).
+
+-type projection() :: #projection_v1{}.
+
+-record(ch_mgr, {
+ init_finished :: boolean(),
+ name :: pv1_server(),
+ proj :: projection(),
+ proj_history :: queue(),
+ myflu :: pid() | atom(),
+ flap_limit :: non_neg_integer(),
+ %%
+ runenv :: list(), %proplist()
+ opts :: list(), %proplist()
+ flaps=0 :: integer(),
+ flap_start = ?NOT_FLAPPING
+ :: erlang:now(),
+
+ %% Deprecated ... TODO: remove when old test unit test code is removed
+ proj_proposed :: 'none' | projection()
+ }).
diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl
new file mode 100644
index 0000000..5f662f6
--- /dev/null
+++ b/src/machi_chain_manager1.erl
@@ -0,0 +1,1547 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1).
+
+%% TODO: I am going to sever the connection between the flowchart and the
+%% code. That diagram is really valuable, but it also takes a long time
+%% to make any kind of edit; the process is too slow. This is a todo
+%% item a reminder that the flowchart is important documentation and
+%% must be brought back into sync with the code soon.
+
+-behaviour(gen_server).
+
+-include("machi_projection.hrl").
+-include("machi_chain_manager.hrl").
+
+-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
+-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+
+%% Keep a history of our flowchart execution in the process dictionary.
+-define(REACT(T), put(react, [T|get(react)])).
+
+%% API
+-export([start_link/3, start_link/4, stop/1, ping/1]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([make_projection_summary/1, projection_transitions_are_sane/2]).
+
+-ifdef(TEST).
+
+-export([test_calc_projection/2,
+ test_calc_proposed_projection/1,
+ test_write_proposed_projection/1,
+ test_read_latest_public_projection/2,
+ test_react_to_env/1,
+ get_all_hosed/1]).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-endif.
+-ifdef(PULSE).
+-compile({parse_transform, pulse_instrument}).
+-endif.
+
+-include_lib("eunit/include/eunit.hrl").
+-compile(export_all).
+-endif. %TEST
+
+start_link(MyName, All_list, MyFLUPid) ->
+ start_link(MyName, All_list, MyFLUPid, []).
+
+start_link(MyName, All_list, MyFLUPid, MgrOpts) ->
+ gen_server:start_link(?MODULE, {MyName, All_list, MyFLUPid, MgrOpts}, []).
+
+stop(Pid) ->
+ gen_server:call(Pid, {stop}, infinity).
+
+ping(Pid) ->
+ gen_server:call(Pid, {ping}, infinity).
+
+-ifdef(TEST).
+
+%% Test/debugging code only.
+
+test_write_proposed_projection(Pid) ->
+ gen_server:call(Pid, {test_write_proposed_projection}, infinity).
+
+%% Calculate a projection and return it to us.
+%% If KeepRunenvP is true, the server will retain its change in its
+%% runtime environment, e.g., changes in simulated network partitions.
+%% The server's internal proposed projection is not altered.
+test_calc_projection(Pid, KeepRunenvP) ->
+ gen_server:call(Pid, {test_calc_projection, KeepRunenvP}, infinity).
+
+%% Async!
+%% The server's internal proposed projection *is* altered.
+test_calc_proposed_projection(Pid) ->
+ gen_server:cast(Pid, {test_calc_proposed_projection}).
+
+test_read_latest_public_projection(Pid, ReadRepairP) ->
+ gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP},
+ infinity).
+
+test_react_to_env(Pid) ->
+ gen_server:call(Pid, {test_react_to_env}, infinity).
+
+-endif. % TEST
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init({MyName, All_list, MyFLUPid, MgrOpts}) ->
+ RunEnv = [%% {seed, Seed},
+ {seed, now()},
+ {network_partitions, []},
+ {network_islands, []},
+ {flapping_i, []},
+ {up_nodes, not_init_yet}],
+ BestProj = make_initial_projection(MyName, All_list, All_list,
+ [], []),
+ NoneProj = make_initial_projection(MyName, All_list, [],
+ [], []),
+ S = #ch_mgr{init_finished=false,
+ name=MyName,
+ proj=NoneProj,
+ proj_history=queue:new(),
+ myflu=MyFLUPid, % pid or atom local name
+ %% TODO 2015-03-04: revisit, should this constant be bigger?
+ %% Yes, this should be bigger, but it's a hack. There is
+ %% no guarantee that all parties will advance to a minimum
+ %% flap awareness in the amount of time that this mgr will.
+ flap_limit=length(All_list) + 50,
+ runenv=RunEnv,
+ opts=MgrOpts},
+
+ %% TODO: There is a bootstrapping problem there that needs to be
+ %% solved eventually: someone/something needs to set the initial
+ %% state for the chain.
+ %%
+ %% The PoC hack here will set the chain to all members. That may
+ %% be fine for testing purposes, but it won't work for real life.
+ %% For example, if chain C has been running with [a,b] for a
+ %% while, then we start c. We don't want c to immediately say,
+ %% hey, let's do [a,b,c] immediately ... UPI invariant requires
+ %% repair, etc. etc.
+
+ self() ! {finish_init, BestProj},
+ {ok, S}.
+
+handle_call(_Call, _From, #ch_mgr{init_finished=false} = S) ->
+ {reply, not_initialized, S};
+handle_call({test_write_proposed_projection}, _From, S) ->
+ if S#ch_mgr.proj_proposed == none ->
+ {reply, none, S};
+ true ->
+ {Res, S2} = do_cl_write_proposed_proj(S),
+ {reply, Res, S2}
+ end;
+handle_call({ping}, _From, S) ->
+ {reply, pong, S};
+handle_call({stop}, _From, S) ->
+ {stop, normal, ok, S};
+handle_call({test_calc_projection, KeepRunenvP}, _From,
+ #ch_mgr{name=MyName}=S) ->
+ RelativeToServer = MyName,
+ {P, S2} = calc_projection(S, RelativeToServer),
+ {reply, {ok, P}, if KeepRunenvP -> S2;
+ true -> S
+ end};
+handle_call({test_read_latest_public_projection, ReadRepairP}, _From, S) ->
+ {Perhaps, Val, ExtraInfo, S2} =
+ do_cl_read_latest_public_projection(ReadRepairP, S),
+ Res = {Perhaps, Val, ExtraInfo},
+ {reply, Res, S2};
+handle_call({test_react_to_env}, _From, S) ->
+ {TODOtodo, S2} = do_react_to_env(S),
+ {reply, TODOtodo, S2};
+handle_call(_Call, _From, S) ->
+ {reply, whaaaaaaaaaa, S}.
+
+handle_cast(_Cast, #ch_mgr{init_finished=false} = S) ->
+ {noreply, S};
+handle_cast({test_calc_proposed_projection}, #ch_mgr{name=MyName}=S) ->
+ RelativeToServer = MyName,
+ {Proj, S2} = calc_projection(S, RelativeToServer),
+ {noreply, S2#ch_mgr{proj_proposed=Proj}};
+handle_cast(_Cast, S) ->
+ ?D({cast_whaaaaaaaaaaa, _Cast}),
+ {noreply, S}.
+
+handle_info({finish_init, BestProj}, S) ->
+ S2 = finish_init(BestProj, S),
+ {noreply, S2};
+handle_info(Msg, S) ->
+ exit({bummer, Msg}),
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+finish_init(BestProj, #ch_mgr{init_finished=false, myflu=MyFLU} = S) ->
+ case machi_flu0:proj_read_latest(MyFLU, private) of
+ error_unwritten ->
+ Epoch = BestProj#projection_v1.epoch_number,
+ case machi_flu0:proj_write(MyFLU, Epoch, private, BestProj) of
+ ok ->
+ S#ch_mgr{init_finished=true, proj=BestProj};
+ error_written ->
+ exit({yo_impossible, ?LINE});
+ Else ->
+ ?D({retry,Else}),
+ timer:sleep(100),
+ finish_init(BestProj, S)
+ end;
+ {ok, Proj} ->
+ S#ch_mgr{init_finished=true, proj=Proj};
+ Else ->
+ ?D({todo, fix_up_eventually, Else}),
+ exit({yo_weird, Else})
+ end.
+
+do_cl_write_proposed_proj(#ch_mgr{proj_proposed=Proj} = S) ->
+ #projection_v1{epoch_number=Epoch} = Proj,
+ case cl_write_public_proj(Epoch, Proj, S) of
+ {ok, _S2}=Res ->
+ Res;
+ {_Other2, _S2}=Else2 ->
+ Else2
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+cl_write_public_proj(Epoch, Proj, S) ->
+ cl_write_public_proj(Epoch, Proj, false, S).
+
+cl_write_public_proj_skip_local_error(Epoch, Proj, S) ->
+ cl_write_public_proj(Epoch, Proj, true, S).
+
+cl_write_public_proj(Epoch, Proj, SkipLocalWriteErrorP, S) ->
+ %% Write to local public projection store first, and if it succeeds,
+ %% then write to all remote public projection stores.
+ cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP, S).
+
+cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP,
+ #ch_mgr{myflu=MyFLU}=S) ->
+ {_UpNodes, Partitions, S2} = calc_up_nodes(S),
+ Res0 = perhaps_call_t(
+ S, Partitions, MyFLU,
+ fun() -> machi_flu0:proj_write(MyFLU, Epoch, public, Proj) end),
+ Continue = fun() ->
+ FLUs = Proj#projection_v1.all_members -- [MyFLU],
+ cl_write_public_proj_remote(FLUs, Partitions, Epoch, Proj, S)
+ end,
+ case Res0 of
+ ok ->
+ {XX, SS} = Continue(),
+ {{local_write_result, ok, XX}, SS};
+ Else when SkipLocalWriteErrorP ->
+ {XX, SS} = Continue(),
+ {{local_write_result, Else, XX}, SS};
+ Else when Else == error_written; Else == timeout; Else == t_timeout ->
+ {Else, S2}
+ end.
+
+cl_write_public_proj_remote(FLUs, Partitions, Epoch, Proj, S) ->
+ %% We're going to be very care-free about this write because we'll rely
+ %% on the read side to do any read repair.
+ DoIt = fun(X) -> machi_flu0:proj_write(X, Epoch, public, Proj) end,
+ Rs = [{FLU, perhaps_call_t(S, Partitions, FLU, fun() -> DoIt(FLU) end)} ||
+ FLU <- FLUs],
+ {{remote_write_results, Rs}, S}.
+
+do_cl_read_latest_public_projection(ReadRepairP,
+ #ch_mgr{proj=Proj1, myflu=_MyFLU} = S) ->
+ _Epoch1 = Proj1#projection_v1.epoch_number,
+ case cl_read_latest_projection(public, S) of
+ {needs_repair, FLUsRs, Extra, S3} ->
+ if not ReadRepairP ->
+ {not_unanimous, todoxyz, [{results, FLUsRs}|Extra], S3};
+ true ->
+ {_Status, S4} = do_read_repair(FLUsRs, Extra, S3),
+ do_cl_read_latest_public_projection(ReadRepairP, S4)
+ end;
+ {UnanimousTag, Proj2, Extra, S3}=_Else ->
+ {UnanimousTag, Proj2, Extra, S3}
+ end.
+
+read_latest_projection_call_only(ProjectionType, AllHosed,
+ #ch_mgr{proj=CurrentProj}=S) ->
+ #projection_v1{all_members=All_list} = CurrentProj,
+ All_queried_list = All_list -- AllHosed,
+
+ {_UpNodes, Partitions, S2} = calc_up_nodes(S),
+ DoIt = fun(X) ->
+ case machi_flu0:proj_read_latest(X, ProjectionType) of
+ {ok, P} -> P;
+ Else -> Else
+ end
+ end,
+ Rs = [perhaps_call_t(S, Partitions, FLU, fun() -> DoIt(FLU) end) ||
+ FLU <- All_queried_list],
+ FLUsRs = lists:zip(All_queried_list, Rs),
+ {All_queried_list, FLUsRs, S2}.
+
+cl_read_latest_projection(ProjectionType, S) ->
+ AllHosed = [],
+ cl_read_latest_projection(ProjectionType, AllHosed, S).
+
+cl_read_latest_projection(ProjectionType, AllHosed, S) ->
+ {All_queried_list, FLUsRs, S2} =
+ read_latest_projection_call_only(ProjectionType, AllHosed, S),
+
+ rank_and_sort_projections_with_extra(All_queried_list, FLUsRs, S2).
+
+rank_and_sort_projections_with_extra(All_queried_list, FLUsRs,
+ #ch_mgr{proj=CurrentProj}=S) ->
+ UnwrittenRs = [x || {_, error_unwritten} <- FLUsRs],
+ Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection_v1)],
+ BadAnswerFLUs = [FLU || {FLU, Answer} <- FLUsRs,
+ not is_record(Answer, projection_v1)],
+
+ if All_queried_list == []
+ orelse
+ length(UnwrittenRs) == length(FLUsRs) ->
+ {error_unwritten, FLUsRs, [todo_fix_caller_perhaps], S};
+ UnwrittenRs /= [] ->
+ {needs_repair, FLUsRs, [flarfus], S};
+ true ->
+ [{_Rank, BestProj}|_] = rank_and_sort_projections(Ps, CurrentProj),
+ NotBestPs = [Proj || Proj <- Ps, Proj /= BestProj],
+ UnanimousTag = if NotBestPs == [] -> unanimous;
+ true -> not_unanimous
+ end,
+ Extra = [{all_members_replied, length(FLUsRs) == length(All_queried_list)}],
+ Best_FLUs = [FLU || {FLU, Projx} <- FLUsRs, Projx == BestProj],
+ TransAllHosed = lists:usort(
+ lists:flatten([get_all_hosed(P) || P <- Ps])),
+ AllFlapCounts = merge_flap_counts([get_all_flap_counts(P) ||
+ P <- Ps]),
+ Extra2 = [{all_queried_list, All_queried_list},
+ {flus_rs, FLUsRs},
+ {unanimous_flus,Best_FLUs},
+ {not_unanimous_flus, All_queried_list --
+ (Best_FLUs ++ BadAnswerFLUs)},
+ {bad_answer_flus, BadAnswerFLUs},
+ {not_unanimous_answers, NotBestPs},
+ {trans_all_hosed, TransAllHosed},
+ {trans_all_flap_counts, AllFlapCounts}|Extra],
+ {UnanimousTag, BestProj, Extra2, S}
+ end.
+
+do_read_repair(FLUsRs, _Extra, #ch_mgr{proj=CurrentProj} = S) ->
+ Unwrittens = [x || {_FLU, error_unwritten} <- FLUsRs],
+ Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection_v1)],
+ if Unwrittens == [] orelse Ps == [] ->
+ {nothing_to_do, S};
+ true ->
+ %% We have at least one unwritten and also at least one proj.
+ %% Pick the best one, then spam it everywhere.
+
+ [{_Rank, BestProj}|_] = rank_and_sort_projections(Ps, CurrentProj),
+ Epoch = BestProj#projection_v1.epoch_number,
+
+ %% We're doing repair, so use the flavor that will
+ %% continue to all others even if there is an
+ %% error_written on the local FLU.
+ {_DontCare, _S2}=Res = cl_write_public_proj_skip_local_error(
+ Epoch, BestProj, S),
+ Res
+ end.
+
+make_initial_projection(MyName, All_list, UPI_list, Repairing_list, Ps) ->
+ make_projection(0, MyName, All_list, [], UPI_list, Repairing_list, Ps).
+
+make_projection(EpochNum,
+ MyName, All_list, Down_list, UPI_list, Repairing_list,
+ Dbg) ->
+ make_projection(EpochNum,
+ MyName, All_list, Down_list, UPI_list, Repairing_list,
+ Dbg, []).
+
+make_projection(EpochNum,
+ MyName, All_list, Down_list, UPI_list, Repairing_list,
+ Dbg, Dbg2) ->
+ P = #projection_v1{epoch_number=EpochNum,
+ epoch_csum= <<>>, % always checksums as <<>>
+ creation_time=now(),
+ author_server=MyName,
+ all_members=All_list,
+ down=Down_list,
+ upi=UPI_list,
+ repairing=Repairing_list,
+ dbg=Dbg,
+ dbg2=[] % always checksums as []
+ },
+ P2 = update_projection_checksum(P),
+ P2#projection_v1{dbg2=Dbg2}.
+
+update_projection_checksum(#projection_v1{dbg2=Dbg2} = P) ->
+ CSum = crypto:hash(sha, term_to_binary(P#projection_v1{dbg2=[]})),
+ P#projection_v1{epoch_csum=CSum, dbg2=Dbg2}.
+
+update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
+ P#projection_v1{dbg2=Dbg2}.
+
+calc_projection(S, RelativeToServer) ->
+ calc_projection(S, RelativeToServer, []).
+
+calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S,
+ RelativeToServer, AllHosed) ->
+ Dbg = [],
+ OldThreshold = proplists:get_value(old_threshold, RunEnv),
+ NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv),
+ calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
+ RelativeToServer, AllHosed, Dbg, S).
+
+%% OldThreshold: Percent chance of using the old/previous network partition list
+%% NoPartitionThreshold: If the network partition changes, what percent chance
+%% that there are no partitions at all?
+
+calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
+ RelativeToServer, AllHosed, Dbg,
+ #ch_mgr{name=MyName, runenv=RunEnv1}=S) ->
+ #projection_v1{epoch_number=OldEpochNum,
+ all_members=All_list,
+ upi=OldUPI_list,
+ repairing=OldRepairing_list
+ } = LastProj,
+ LastUp = lists:usort(OldUPI_list ++ OldRepairing_list),
+ AllMembers = (S#ch_mgr.proj)#projection_v1.all_members,
+ {Up0, Partitions, RunEnv2} = calc_up_nodes(MyName,
+ AllMembers, RunEnv1),
+ Up = Up0 -- AllHosed,
+
+ NewUp = Up -- LastUp,
+ Down = AllMembers -- Up,
+
+ NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)],
+ Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)],
+ {NewUPI_list3, Repairing_list3, RunEnv3} =
+ case {NewUp, Repairing_list2} of
+ {[], []} ->
+D_foo=[],
+ {NewUPI_list, [], RunEnv2};
+ {[], [H|T]} when RelativeToServer == hd(NewUPI_list) ->
+ %% The author is head of the UPI list. Let's see if
+ %% *everyone* in the UPI+repairing lists are using our
+ %% projection. This is to simulate a requirement that repair
+ %% a real repair process cannot take place until the chain is
+ %% stable, i.e. everyone is in the same epoch.
+
+ %% TODO create a real API call for fetching this info.
+ SameEpoch_p = check_latest_private_projections(
+ tl(NewUPI_list) ++ Repairing_list2,
+ S#ch_mgr.proj, Partitions, S),
+ if not SameEpoch_p ->
+D_foo=[],
+ {NewUPI_list, OldRepairing_list, RunEnv2};
+ true ->
+D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}],
+ {NewUPI_list ++ [H], T, RunEnv2}
+ end;
+ {_, _} ->
+D_foo=[],
+ {NewUPI_list, OldRepairing_list, RunEnv2}
+ end,
+ Repairing_list4 = case NewUp of
+ [] -> Repairing_list3;
+ NewUp -> Repairing_list3 ++ NewUp
+ end,
+ Repairing_list5 = Repairing_list4 -- Down,
+
+ TentativeUPI = NewUPI_list3,
+ TentativeRepairing = Repairing_list5,
+
+ {NewUPI, NewRepairing} =
+ if TentativeUPI == [] andalso TentativeRepairing /= [] ->
+ [FirstRepairing|TailRepairing] = TentativeRepairing,
+ {[FirstRepairing], TailRepairing};
+ true ->
+ {TentativeUPI, TentativeRepairing}
+ end,
+
+ P = make_projection(OldEpochNum + 1,
+ MyName, All_list, Down, NewUPI, NewRepairing,
+ D_foo ++
+ Dbg ++ [{ps, Partitions},{nodes_up, Up}]),
+ {P, S#ch_mgr{runenv=RunEnv3}}.
+
+check_latest_private_projections(FLUs, MyProj, Partitions, S) ->
+ FoldFun = fun(_FLU, false) ->
+ false;
+ (FLU, true) ->
+ F = fun() ->
+ machi_flu0:proj_read_latest(FLU, private)
+ end,
+ case perhaps_call_t(S, Partitions, FLU, F) of
+ {ok, RemotePrivateProj} ->
+ %% TODO: For use inside the simulator, this
+ %% function needs to check if RemotePrivateProj
+ %% contains a nested inner projection and, if
+ %% so, compare epoch# and upi & repairing lists.
+ %% If the nested inner proj is not checked here,
+ %% then a FLU in asymmetric partition flapping
+ %% case will appear in the simulator to be stuck
+ %% in repairing state.
+ if MyProj#projection_v1.epoch_number ==
+ RemotePrivateProj#projection_v1.epoch_number
+ andalso
+ MyProj#projection_v1.epoch_csum ==
+ RemotePrivateProj#projection_v1.epoch_csum ->
+ true;
+ true ->
+ false
+ end;
+ _ ->
+ false
+ end
+ end,
+ lists:foldl(FoldFun, true, FLUs).
+
+calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
+ AllMembers = Proj#projection_v1.all_members,
+ {UpNodes, Partitions, RunEnv2} =
+ calc_up_nodes(MyName, AllMembers, RunEnv1),
+ {UpNodes, Partitions, S#ch_mgr{runenv=RunEnv2}}.
+
+calc_up_nodes(MyName, AllMembers, RunEnv1) ->
+ {Partitions2, Islands2} = machi_partition_simulator:get(AllMembers),
+ catch ?REACT({partitions,Partitions2}),
+ catch ?REACT({islands,Islands2}),
+ UpNodes = lists:sort(
+ [Node || Node <- AllMembers,
+ not lists:member({MyName, Node}, Partitions2),
+ not lists:member({Node, MyName}, Partitions2)]),
+ RunEnv2 = replace(RunEnv1,
+ [{network_partitions, Partitions2},
+ {network_islands, Islands2},
+ {up_nodes, UpNodes}]),
+ {UpNodes, Partitions2, RunEnv2}.
+
+replace(PropList, Items) ->
+ proplists:compact(Items ++ PropList).
+
+make_projection_summary(#projection_v1{epoch_number=EpochNum,
+ all_members=_All_list,
+ down=Down_list,
+ author_server=Author,
+ upi=UPI_list,
+ repairing=Repairing_list,
+ dbg=Dbg, dbg2=Dbg2}) ->
+ [{epoch,EpochNum},{author,Author},
+ {upi,UPI_list},{repair,Repairing_list},{down,Down_list},
+ {d,Dbg}, {d2,Dbg2}].
+
+rank_and_sort_projections(Ps, CurrentProj) ->
+ Epoch = lists:max([Proj#projection_v1.epoch_number || Proj <- Ps]),
+ MaxPs = [Proj || Proj <- Ps,
+ Proj#projection_v1.epoch_number == Epoch],
+ %% Sort with highest rank first (custom sort)
+ lists:sort(fun({RankA,_}, {RankB,_}) -> RankA > RankB end,
+ rank_projections(MaxPs, CurrentProj)).
+
+%% Caller must ensure all Projs are of the same epoch number.
+%% If the caller gives us projections with different epochs, we assume
+%% that the caller is doing an OK thing.
+
+rank_projections(Projs, CurrentProj) ->
+ #projection_v1{all_members=All_list} = CurrentProj,
+ MemberRank = orddict:from_list(
+ lists:zip(All_list, lists:seq(1, length(All_list)))),
+ N = length(All_list),
+ [{rank_projection(Proj, MemberRank, N), Proj} || Proj <- Projs].
+
+rank_projection(#projection_v1{upi=[]}, _MemberRank, _N) ->
+ -100;
+rank_projection(#projection_v1{author_server=Author,
+ upi=UPI_list,
+ repairing=Repairing_list}, MemberRank, N) ->
+ AuthorRank = orddict:fetch(Author, MemberRank),
+ %% (AuthorRank-AuthorRank) + % feels unstable????
+ AuthorRank + % feels stable
+ ( N * length(Repairing_list)) +
+ (N*N * length(UPI_list)).
+
+do_react_to_env(S) ->
+ put(react, []),
+ react_to_env_A10(S).
+
+react_to_env_A10(S) ->
+ ?REACT(a10),
+ react_to_env_A20(0, S).
+
+react_to_env_A20(Retries, S) ->
+ ?REACT(a20),
+ {UnanimousTag, P_latest, ReadExtra, S2} =
+ do_cl_read_latest_public_projection(true, S),
+
+ %% The UnanimousTag isn't quite sufficient for our needs. We need
+ %% to determine if *all* of the UPI+Repairing FLUs are members of
+ %% the unanimous server replies.
+ UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)),
+ UPI_Repairing_FLUs = lists:sort(P_latest#projection_v1.upi ++
+ P_latest#projection_v1.repairing),
+ All_UPI_Repairing_were_unanimous = UPI_Repairing_FLUs == UnanimousFLUs,
+ %% TODO: investigate if the condition below is more correct?
+ %% All_UPI_Repairing_were_unanimous = (UPI_Repairing_FLUs -- UnanimousFLUs) == [],
+ LatestUnanimousP =
+ if UnanimousTag == unanimous
+ andalso
+ All_UPI_Repairing_were_unanimous ->
+ ?REACT({a20,?LINE}),
+ true;
+ UnanimousTag == unanimous ->
+ ?REACT({a20,?LINE,[{upi_repairing,UPI_Repairing_FLUs},
+ {unanimous,UnanimousFLUs}]}),
+ false;
+ UnanimousTag == not_unanimous ->
+ ?REACT({a20,?LINE}),
+ false;
+ true ->
+ exit({badbad, UnanimousTag})
+ end,
+ react_to_env_A30(Retries, P_latest, LatestUnanimousP, ReadExtra, S2).
+
+react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
+ #ch_mgr{name=MyName, proj=P_current,
+ flap_limit=FlapLimit} = S) ->
+ ?REACT(a30),
+ RelativeToServer = MyName,
+ {P_newprop1, S2} = calc_projection(S, RelativeToServer),
+ ?REACT({a30, ?LINE, [{newprop1, make_projection_summary(P_newprop1)}]}),
+
+ %% Are we flapping yet?
+ {P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2),
+
+ %% Move the epoch number up ... originally done in C300.
+ #projection_v1{epoch_number=Epoch_newprop2}=P_newprop2,
+ #projection_v1{epoch_number=Epoch_latest}=P_latest,
+ NewEpoch = erlang:max(Epoch_newprop2, Epoch_latest) + 1,
+ P_newprop3 = P_newprop2#projection_v1{epoch_number=NewEpoch},
+ ?REACT({a30, ?LINE, [{newprop3, make_projection_summary(P_newprop3)}]}),
+
+ {P_newprop10, S10} =
+ case get_flap_count(P_newprop3) of
+ %% TODO: refactor to eliminate cut-and-paste code in 'when'
+ {_, P_newprop3_flap_count} when P_newprop3_flap_count >= FlapLimit ->
+ AllHosed = get_all_hosed(S3),
+ {P_i, S_i} = calc_projection(S3, MyName, AllHosed),
+ P_inner = case lists:member(MyName, AllHosed) of
+ false ->
+ P_i;
+ true ->
+ P_i#projection_v1{upi=[MyName],
+ repairing=[],
+ down=P_i#projection_v1.all_members
+ -- [MyName]}
+ end,
+
+ %% TODO FIXME A naive assignment here will cause epoch #
+ %% instability of the inner projection. We need a stable
+ %% epoch number somehow. ^_^
+ %% P_inner2 = P_inner#projection_v1{epoch_number=P_newprop3#projection_v1.epoch_number},
+
+ FinalInnerEpoch =
+ case proplists:get_value(inner_projection,
+ P_current#projection_v1.dbg) of
+ undefined ->
+ AllFlapCounts_epk =
+ [Epk || {{Epk,_FlTime}, _FlCount} <-
+ get_all_flap_counts(P_newprop3)],
+ case AllFlapCounts_epk of
+ [] ->
+ P_newprop3#projection_v1.epoch_number;
+ [_|_] ->
+ lists:max(AllFlapCounts_epk)
+ end;
+ P_oldinner ->
+ if P_oldinner#projection_v1.upi == P_inner#projection_v1.upi
+ andalso
+ P_oldinner#projection_v1.repairing == P_inner#projection_v1.repairing
+ andalso
+ P_oldinner#projection_v1.down == P_inner#projection_v1.down ->
+ P_oldinner#projection_v1.epoch_number;
+ true ->
+ P_oldinner#projection_v1.epoch_number + 1
+ end
+ end,
+
+ P_inner2 = P_inner#projection_v1{epoch_number=FinalInnerEpoch},
+ InnerInfo = [{inner_summary, make_projection_summary(P_inner2)},
+ {inner_projection, P_inner2}],
+ DbgX = replace(P_newprop3#projection_v1.dbg, InnerInfo),
+ ?REACT({a30, ?LINE, [qqqwww|DbgX]}),
+ {P_newprop3#projection_v1{dbg=DbgX}, S_i};
+ _ ->
+ {P_newprop3, S3}
+ end,
+
+ react_to_env_A40(Retries, P_newprop10, P_latest,
+ LatestUnanimousP, S10).
+
+react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
+ #ch_mgr{name=MyName, proj=P_current}=S) ->
+ ?REACT(a40),
+ [{Rank_newprop, _}] = rank_projections([P_newprop], P_current),
+ [{Rank_latest, _}] = rank_projections([P_latest], P_current),
+ LatestAuthorDownP = lists:member(P_latest#projection_v1.author_server,
+ P_newprop#projection_v1.down),
+
+ if
+ P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number
+ orelse
+ not LatestUnanimousP ->
+ ?REACT({a40, ?LINE,
+ [{latest_epoch, P_latest#projection_v1.epoch_number},
+ {current_epoch, P_current#projection_v1.epoch_number},
+ {latest_unanimous_p, LatestUnanimousP}]}),
+
+ %% 1st clause: someone else has written a newer projection
+ %% 2nd clause: a network partition has healed, revealing a
+ %% differing opinion.
+ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest, S);
+
+ P_latest#projection_v1.epoch_number < P_current#projection_v1.epoch_number
+ orelse
+ P_latest /= P_current ->
+ ?REACT({a40, ?LINE,
+ [{latest_epoch, P_latest#projection_v1.epoch_number},
+ {current_epoch, P_current#projection_v1.epoch_number},
+ {neq, P_latest /= P_current}]}),
+
+ %% Both of these cases are rare. Elsewhere, the code
+ %% assumes that the local FLU's projection store is always
+ %% available, so reads & writes to it aren't going to fail
+ %% willy-nilly. If that assumption is true, then we can
+ %% reason as follows:
+ %%
+ %% a. If we can always read from the local FLU projection
+ %% store, then the 1st clause isn't possible because
+ %% P_latest's epoch # must be at least as large as
+ %% P_current's epoch #
+ %%
+ %% b. If P_latest /= P_current, then there can't be a
+ %% unanimous reply for P_latest, so the earlier 'if'
+ %% clause would be triggered and so we could never reach
+ %% this clause.
+ %%
+ %% I'm keeping this 'if' clause just in case the local FLU
+ %% projection store assumption changes.
+ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest, S);
+
+ %% A40a (see flowchart)
+ Rank_newprop > Rank_latest ->
+ ?REACT({b10, ?LINE,
+ [{rank_latest, Rank_latest},
+ {rank_newprop, Rank_newprop},
+ {latest_author, P_latest#projection_v1.author_server}]}),
+
+ %% TODO: There may be an "improvement" here. If we're the
+ %% highest-ranking FLU in the all_members list, then if we make a
+ %% projection where our UPI list is the same as P_latest's, and
+ %% our repairing list is the same as P_latest's, then it may not
+ %% be necessary to write our projection: it doesn't "improve"
+ %% anything UPI-wise or repairing-wise. But it isn't clear to me
+ %% if it's 100% correct to "improve" here and skip writing
+ %% P_newprop, yet.
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ %% A40b (see flowchart)
+ P_latest#projection_v1.author_server == MyName
+ andalso
+ (P_newprop#projection_v1.upi /= P_latest#projection_v1.upi
+ orelse
+ P_newprop#projection_v1.repairing /= P_latest#projection_v1.repairing) ->
+ ?REACT({a40, ?LINE,
+ [{latest_author, P_latest#projection_v1.author_server},
+ {newprop_upi, P_newprop#projection_v1.upi},
+ {latest_upi, P_latest#projection_v1.upi},
+ {newprop_repairing, P_newprop#projection_v1.repairing},
+ {latest_repairing, P_latest#projection_v1.repairing}]}),
+
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ %% A40c (see flowchart)
+ LatestAuthorDownP ->
+ ?REACT({a40, ?LINE,
+ [{latest_author, P_latest#projection_v1.author_server},
+ {author_is_down_p, LatestAuthorDownP}]}),
+
+ %% TODO: I believe that membership in the
+ %% P_newprop#projection_v1.down is not sufficient for long
+ %% chains. Rather, we ought to be using a full broadcast
+ %% gossip of server up status.
+ %%
+ %% Imagine 5 servers in an "Olympic Rings" style
+ %% overlapping network paritition, where ring1 = upper
+ %% leftmost and ring5 = upper rightmost. It's both
+ %% possible and desirable for ring5's projection to be
+ %% seen (public) by ring1. Ring5's projection's rank is
+ %% definitely higher than ring1's proposed projection's
+ %% rank ... but we're in a crazy netsplit where:
+ %% * if we accept ring5's proj: only one functioning chain
+ %% ([ring4,ring5] but stable
+ %% * if we accept ring1's proj: two functioning chains
+ %% ([ring1,ring2] and [ring4,ring5] indepependently)
+ %% but unstable: we're probably going to flap back & forth?!
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ true ->
+ ?REACT({a40, ?LINE, [true]}),
+
+ react_to_env_A50(P_latest, S)
+ end.
+
+react_to_env_A50(P_latest, S) ->
+ ?REACT(a50),
+
+ HH = get(react),
+ io:format(user, "HEE50s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- HH, is_atom(X)])]),
+ %% io:format(user, "HEE50 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(HH)]),
+
+ ?REACT({a50, ?LINE, [{latest_epoch, P_latest#projection_v1.epoch_number}]}),
+ {{no_change, P_latest#projection_v1.epoch_number}, S}.
+
+react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest,
+ #ch_mgr{name=MyName, flap_limit=FlapLimit}=S) ->
+ ?REACT(b10),
+
+ {_P_newprop_flap_time, P_newprop_flap_count} = get_flap_count(P_newprop),
+ LatestAllFlapCounts = get_all_flap_counts_counts(P_latest),
+ P_latest_trans_flap_count = my_find_minmost(LatestAllFlapCounts),
+
+ if
+ LatestUnanimousP ->
+ ?REACT({b10, ?LINE, [{latest_unanimous_p, LatestUnanimousP}]}),
+ put(b10_hack, false),
+
+ react_to_env_C100(P_newprop, P_latest, S);
+
+ P_newprop_flap_count >= FlapLimit ->
+ %% I am flapping ... what else do I do?
+ ?REACT({b10, ?LINE, [i_am_flapping,
+ {newprop_flap_count, P_newprop_flap_count},
+ {latest_trans_flap_count, P_latest_trans_flap_count},
+ {flap_limit, FlapLimit}]}),
+ _B10Hack = get(b10_hack),
+ %% if _B10Hack == false andalso P_newprop_flap_count - FlapLimit - 3 =< 0 -> io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]), put(b10_hack, true); true -> ok end,
+ io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]),
+
+ if
+ %% So, if we noticed a flap count by some FLU X with a
+ %% count below FlapLimit, then X crashes so that X's
+ %% flap count remains below FlapLimit, then we could get
+ %% stuck forever? Hrm, except that 'crashes' ought to be
+ %% detected by our own failure detector and get us out of
+ %% this current flapping situation, right? TODO
+ %%
+ %% 2015-04-05: If we add 'orelse AllSettled' to this 'if'
+ %% clause, then we can end up short-circuiting too
+ %% early. (Where AllSettled comes from the runenv's
+ %% flapping_i prop.) So, I believe that we need to
+ %% rely on the failure detector to rescue us.
+ %%
+ %% TODO About the above ^^ I think that was based on buggy
+ %% calculation of AllSettled. Recheck!
+ %%
+ %% TODO Yay, another magic constant below, added to
+ %% FlapLimit, that needs thorough examination and
+ %% hopefully elimination. I'm adding it to try to
+ %% make it more likely that someone's private proj
+ %% will include all_flap_counts_settled,true 100%
+ %% of the time. But I'm not sure how important that
+ %% really is.
+ %% That settled flag can lag behind after a change in
+ %% network conditions, so I'm not sure how big its
+ %% value is, if any.
+% QQQ TODO
+% P_latest_trans_flap_count >= FlapLimit + 20 ->
+% %% Everyone that's flapping together now has flap_count
+% %% that's larger than the limit. So it's safe and good
+% %% to stop here, so we can break the cycle of flapping.
+% ?REACT({b10, ?LINE, [flap_stop]}),
+% react_to_env_A50(P_latest, S);
+
+ true ->
+ %% It is our moral imperative to write so that the flap
+ %% cycle continues enough times so that everyone notices
+ %% and thus the earlier clause above fires.
+ ?REACT({b10, ?LINE, [flap_continue]}),
+ react_to_env_C300(P_newprop, P_latest, S)
+ end;
+
+ Retries > 2 ->
+ ?REACT({b10, ?LINE, [{retries, Retries}]}),
+ put(b10_hack, false),
+
+ %% The author of P_latest is too slow or crashed.
+ %% Let's try to write P_newprop and see what happens!
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ Rank_latest >= Rank_newprop
+ andalso
+ P_latest#projection_v1.author_server /= MyName ->
+ ?REACT({b10, ?LINE,
+ [{rank_latest, Rank_latest},
+ {rank_newprop, Rank_newprop},
+ {latest_author, P_latest#projection_v1.author_server}]}),
+ put(b10_hack, false),
+
+ %% Give the author of P_latest an opportunite to write a
+ %% new projection in a new epoch to resolve this mixed
+ %% opinion.
+ react_to_env_C200(Retries, P_latest, S);
+
+ true ->
+ ?REACT({b10, ?LINE}),
+ put(b10_hack, false),
+
+ %% P_newprop is best, so let's write it.
+ react_to_env_C300(P_newprop, P_latest, S)
+ end.
+
+react_to_env_C100(P_newprop, P_latest,
+ #ch_mgr{name=MyName, proj=P_current}=S) ->
+ ?REACT(c100),
+ I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi),
+ I_am_Repairing_in_latest_p = lists:member(MyName,
+ P_latest#projection_v1.repairing),
+ ShortCircuit_p =
+ P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number
+ andalso
+ I_am_UPI_in_newprop_p
+ andalso
+ I_am_Repairing_in_latest_p,
+
+ case {ShortCircuit_p, projection_transition_is_sane(P_current, P_latest,
+ MyName)} of
+ {true, _} ->
+ %% Someone else believes that I am repairing. We assume
+ %% that nobody is being Byzantine, so we'll believe that I
+ %% am/should be repairing. We ignore our proposal and try
+ %% to go with the latest.
+ ?REACT({c100, ?LINE, [repairing_short_circuit]}),
+ react_to_env_C110(P_latest, S);
+ {_, true} ->
+ ?REACT({c100, ?LINE, [sane]}),
+ react_to_env_C110(P_latest, S);
+ {_, _AnyOtherReturnValue} ->
+ %% P_latest is not sane.
+ %% By process of elimination, P_newprop is best,
+ %% so let's write it.
+ ?REACT({c100, ?LINE, [not_sane]}),
+ react_to_env_C300(P_newprop, P_latest, S)
+ end.
+
+react_to_env_C110(P_latest, #ch_mgr{myflu=MyFLU} = S) ->
+ ?REACT(c110),
+ %% TOOD: Should we carry along any extra info that that would be useful
+ %% in the dbg2 list?
+ Extra_todo = [],
+ RunEnv = S#ch_mgr.runenv,
+ Islands = proplists:get_value(network_islands, RunEnv),
+ P_latest2 = update_projection_dbg2(
+ P_latest,
+ [%% {network_islands, Islands},
+ %% {hooray, {v2, date(), time()}}
+ Islands--Islands
+ |Extra_todo]),
+
+ Epoch = P_latest2#projection_v1.epoch_number,
+ ok = machi_flu0:proj_write(MyFLU, Epoch, private, P_latest2),
+ case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
+ true ->
+ {_,_,C} = os:timestamp(),
+ MSec = trunc(C / 1000),
+ {HH,MM,SS} = time(),
+ io:format(user, "\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses: ~w\n",
+ [HH,MM,SS,MSec, S#ch_mgr.name,
+ make_projection_summary(P_latest2)]);
+ _ ->
+ ok
+ end,
+ react_to_env_C120(P_latest, S).
+
+react_to_env_C120(P_latest, #ch_mgr{proj_history=H} = S) ->
+ ?REACT(c120),
+ H2 = queue:in(P_latest, H),
+ H3 = case queue:len(H2) of
+ %% TODO: revisit this constant? Is this too long as a base?
+ %% My hunch is that it's fine and that the flap_limit needs to
+ %% be raised much higher (because it can increase several ticks
+ %% without a newer public epoch proposed anywhere).
+ X when X > length(P_latest#projection_v1.all_members) * 2 ->
+ {_V, Hxx} = queue:out(H2),
+ Hxx;
+ _ ->
+ H2
+ end,
+
+ HH = get(react),
+ io:format(user, "HEE120s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- HH, is_atom(X)])]),
+ %% io:format(user, "HEE120 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(HH)]),
+
+ ?REACT({c120, [{latest, make_projection_summary(P_latest)}]}),
+ {{now_using, P_latest#projection_v1.epoch_number},
+ S#ch_mgr{proj=P_latest, proj_history=H3, proj_proposed=none}}.
+
+react_to_env_C200(Retries, P_latest, S) ->
+ ?REACT(c200),
+ try
+ %% TODO: This code works "well enough" without actually
+ %% telling anybody anything. Do we want to rip this out?
+ %% Actually implement it? None of the above?
+ yo:tell_author_yo(P_latest#projection_v1.author_server)
+ catch _Type:_Err ->
+ %% io:format(user, "TODO: tell_author_yo is broken: ~p ~p\n",
+ %% [_Type, _Err]),
+ ok
+ end,
+ react_to_env_C210(Retries, S).
+
+react_to_env_C210(Retries, #ch_mgr{name=MyName, proj=Proj} = S) ->
+ ?REACT(c210),
+ sleep_ranked_order(10, 100, MyName, Proj#projection_v1.all_members),
+ react_to_env_C220(Retries, S).
+
+react_to_env_C220(Retries, S) ->
+ ?REACT(c220),
+ react_to_env_A20(Retries + 1, S).
+
+react_to_env_C300(#projection_v1{epoch_number=_Epoch_newprop}=P_newprop,
+ #projection_v1{epoch_number=_Epoch_latest}=_P_latest, S) ->
+ ?REACT(c300),
+
+ %% This logic moved to A30.
+ %% NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
+ %% P_newprop2 = P_newprop#projection_v1{epoch_number=NewEpoch},
+ %% react_to_env_C310(update_projection_checksum(P_newprop2), S).
+
+ react_to_env_C310(update_projection_checksum(P_newprop), S).
+
+react_to_env_C310(P_newprop, S) ->
+ ?REACT(c310),
+ Epoch = P_newprop#projection_v1.epoch_number,
+ {WriteRes, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
+ ?REACT({c310, ?LINE,
+ [{newprop, make_projection_summary(P_newprop)},
+ {write_result, WriteRes}]}),
+ react_to_env_A10(S2).
+
+calculate_flaps(P_newprop, _P_current, FlapLimit,
+ #ch_mgr{name=MyName, proj_history=H, flap_start=FlapStart,
+ flaps=Flaps, runenv=RunEnv0} = S) ->
+ RunEnv1 = replace(RunEnv0, [{flapping_i, []}]),
+ HistoryPs = queue:to_list(H),
+ Ps = HistoryPs ++ [P_newprop],
+ UniqueProposalSummaries = lists:usort([{P#projection_v1.upi,
+ P#projection_v1.repairing,
+ P#projection_v1.down} || P <- Ps]),
+
+ {_WhateverUnanimous, BestP, Props, _S} =
+ cl_read_latest_projection(private, S),
+ NotBestPs = proplists:get_value(not_unanimous_answers, Props),
+ DownUnion = lists:usort(
+ lists:flatten(
+ [P#projection_v1.down ||
+ P <- [BestP|NotBestPs]])),
+ HosedTransUnion = proplists:get_value(trans_all_hosed, Props),
+ TransFlapCounts0 = proplists:get_value(trans_all_flap_counts, Props),
+
+ _Unanimous = proplists:get_value(unanimous_flus, Props),
+ _NotUnanimous = proplists:get_value(not_unanimous_flus, Props),
+ %% NOTE: bad_answer_flus are probably due to timeout or some other network
+ %% glitch, i.e., anything other than {ok, P::projection()}
+ %% response from machi_flu0:proj_read_latest().
+ BadFLUs = proplists:get_value(bad_answer_flus, Props),
+
+ RemoteTransFlapCounts1 = lists:keydelete(MyName, 1, TransFlapCounts0),
+ RemoteTransFlapCounts =
+ [X || {_FLU, {{_FlEpk,FlTime}, _FlapCount}}=X <- RemoteTransFlapCounts1,
+ FlTime /= ?NOT_FLAPPING],
+ TempNewFlaps = Flaps + 1,
+ TempAllFlapCounts = lists:sort([{MyName, {FlapStart, TempNewFlaps}}|
+ RemoteTransFlapCounts]),
+ %% Sanity check.
+ true = lists:all(fun({_,{_,_}}) -> true;
+ (_) -> false end, TempAllFlapCounts),
+
+ %% H is the bounded history of all of this manager's private
+ %% projection store writes. If we've proposed the *same*
+ %% {UPI+Repairing, Down} combination for the entire length of our
+ %% bounded size of H, then we're flapping.
+ %%
+ %% If we're flapping, then we use our own flap counter and that of
+ %% all of our peer managers to see if we've all got flap counters
+ %% that exceed the flap_limit. If that global condition appears
+ %% true, then we "blow the circuit breaker" by stopping our
+ %% participation in the flapping store (via the shortcut to A50).
+ %%
+ %% We reset our flap counter on any of several conditions:
+ %%
+ %% 1. If our bounded history H contains more than one proposal,
+ %% then by definition we are not flapping.
+ %% 2. If a remote manager is flapping and has re-started a new
+ %% flapping episode.
+ %% 3. If one of the remote managers that we saw earlier has
+ %% stopped flapping.
+
+ ?REACT({calculate_flaps, queue:len(H), UniqueProposalSummaries}),
+ case {queue:len(H), UniqueProposalSummaries} of
+ {N, [_]} when N >= length(P_newprop#projection_v1.all_members) ->
+ NewFlaps = TempNewFlaps,
+ if element(2,FlapStart) == ?NOT_FLAPPING ->
+ NewFlapStart = {{epk,P_newprop#projection_v1.epoch_number},now()};
+ true ->
+ NewFlapStart = FlapStart
+ end,
+
+ %% Wow, this behavior is almost spooky.
+ %%
+ %% For an example partition map [{c,a}], on the very first
+ %% time this 'if' clause is hit by FLU b, AllHosed=[a,c].
+ %% How the heck does B know that??
+ %%
+ %% If I use:
+ %% DownUnionQQQ = [{P#projection_v1.epoch_number, P#projection_v1.author_server, P#projection_v1.down} || P <- [BestP|NotBestPs]],
+ %% AllHosed = [x_1] ++ DownUnion ++ [x_2] ++ HosedTransUnion ++ [x_3] ++ BadFLUs ++ [{downunionqqq, DownUnionQQQ}];
+ %%
+ %% ... then b sees this when proposing epoch 451:
+ %%
+ %% {all_hosed,
+ %% [x_1,a,c,x_2,x_3,
+ %% {downunionqqq,
+ %% [{450,a,[c]},{449,b,[]},{448,c,[a]},{441,d,[]}]}]},
+ %%
+ %% So b's working on epoch 451 at the same time that d's latest
+ %% public projection is only epoch 441. But there's enough
+ %% lag so that b can "see" that a's bad=[c] (due to t_timeout!)
+ %% and c's bad=[a]. So voila, b magically knows about both
+ %% problem FLUs. Weird/cool.
+
+ AllFlapCounts = TempAllFlapCounts,
+ AllHosed = lists:usort(DownUnion ++ HosedTransUnion ++ BadFLUs);
+ {_N, _} ->
+ NewFlaps = 0,
+ NewFlapStart = {{epk,-1},?NOT_FLAPPING},
+ AllFlapCounts = [],
+ AllHosed = []
+ end,
+
+ %% If there's at least one count in AllFlapCounts that isn't my
+ %% flap count, and if it's over the flap limit, then consider them
+ %% settled.
+ AllFlapCountsSettled = lists:keydelete(MyName, 1, AllFlapCounts) /= []
+ andalso
+ my_find_minmost(AllFlapCounts) >= FlapLimit,
+ FlappingI = {flapping_i, [{flap_count, {NewFlapStart, NewFlaps}},
+ {all_hosed, AllHosed},
+ {all_flap_counts, lists:sort(AllFlapCounts)},
+ {all_flap_counts_settled, AllFlapCountsSettled},
+ {bad,BadFLUs},
+ {da_downu, DownUnion}, % debugging aid
+ {da_hosedtu, HosedTransUnion}, % debugging aid
+ {da_downreports, [{P#projection_v1.epoch_number, P#projection_v1.author_server, P#projection_v1.down} || P <- [BestP|NotBestPs]]} % debugging aid
+ ]},
+ Dbg2 = [FlappingI|P_newprop#projection_v1.dbg],
+ %% SLF TODO: 2015-03-04: I'm growing increasingly suspicious of
+ %% the 'runenv' variable that's threaded through all this code.
+ %% It isn't doing what I'd originally intended. And I think that
+ %% the flapping information that we've just constructed here is
+ %% going to get lost, and that's a shame. Fix it.
+ RunEnv2 = replace(RunEnv1, [FlappingI]),
+ %% NOTE: If we'd increment of flaps here, that doesn't mean that
+ %% someone's public proj store has been updated. For example,
+ %% if we loop through states C2xx a few times, we would incr
+ %% flaps each time ... but the C2xx path doesn't write a new
+ %% proposal to everyone's public proj stores, and there's no
+ %% guarantee that anyone else as written a new public proj either.
+ {update_projection_checksum(P_newprop#projection_v1{dbg=Dbg2}),
+ S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv2}}.
+
+projection_transitions_are_sane(Ps, RelativeToServer) ->
+ projection_transitions_are_sane(Ps, RelativeToServer, false).
+
+-ifdef(TEST).
+projection_transitions_are_sane_retrospective(Ps, RelativeToServer) ->
+ projection_transitions_are_sane(Ps, RelativeToServer, true).
+-endif. % TEST
+
+projection_transitions_are_sane([], _RelativeToServer, _RetrospectiveP) ->
+ true;
+projection_transitions_are_sane([_], _RelativeToServer, _RetrospectiveP) ->
+ true;
+projection_transitions_are_sane([P1, P2|T], RelativeToServer, RetrospectiveP) ->
+ case projection_transition_is_sane(P1, P2, RelativeToServer,
+ RetrospectiveP) of
+ true ->
+ projection_transitions_are_sane([P2|T], RelativeToServer,
+ RetrospectiveP);
+ Else ->
+ Else
+ end.
+
+projection_transition_is_sane(P1, P2, RelativeToServer) ->
+ projection_transition_is_sane(P1, P2, RelativeToServer, false).
+
+-ifdef(TEST).
+projection_transition_is_sane_retrospective(P1, P2, RelativeToServer) ->
+ projection_transition_is_sane(P1, P2, RelativeToServer, true).
+-endif. % TEST
+
+projection_transition_is_sane(
+ #projection_v1{epoch_number=Epoch1,
+ epoch_csum=CSum1,
+ creation_time=CreationTime1,
+ author_server=AuthorServer1,
+ all_members=All_list1,
+ down=Down_list1,
+ upi=UPI_list1,
+ repairing=Repairing_list1,
+ dbg=Dbg1} = P1,
+ #projection_v1{epoch_number=Epoch2,
+ epoch_csum=CSum2,
+ creation_time=CreationTime2,
+ author_server=AuthorServer2,
+ all_members=All_list2,
+ down=Down_list2,
+ upi=UPI_list2,
+ repairing=Repairing_list2,
+ dbg=Dbg2} = P2,
+ RelativeToServer, RetrospectiveP) ->
+ try
+ %% General notes:
+ %%
+ %% I'm making no attempt to be "efficient" here. All of these data
+ %% structures are small, and they're not called zillions of times per
+ %% second.
+ %%
+ %% The chain sequence/order checks at the bottom of this function aren't
+ %% as easy-to-read as they ought to be. However, I'm moderately confident
+ %% that it isn't buggy. TODO: refactor them for clarity.
+
+ true = is_integer(Epoch1) andalso is_integer(Epoch2),
+ true = is_binary(CSum1) andalso is_binary(CSum2),
+ {_,_,_} = CreationTime1,
+ {_,_,_} = CreationTime2,
+ true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo will probably change
+ true = is_list(All_list1) andalso is_list(All_list2),
+ true = is_list(Down_list1) andalso is_list(Down_list2),
+ true = is_list(UPI_list1) andalso is_list(UPI_list2),
+ true = is_list(Repairing_list1) andalso is_list(Repairing_list2),
+ true = is_list(Dbg1) andalso is_list(Dbg2),
+
+ true = Epoch2 > Epoch1,
+ All_list1 = All_list2, % todo will probably change
+
+ %% No duplicates
+ true = lists:sort(Down_list2) == lists:usort(Down_list2),
+ true = lists:sort(UPI_list2) == lists:usort(UPI_list2),
+ true = lists:sort(Repairing_list2) == lists:usort(Repairing_list2),
+
+ %% Disjoint-ness
+ true = lists:sort(All_list2) == lists:sort(Down_list2 ++ UPI_list2 ++
+ Repairing_list2),
+ [] = [X || X <- Down_list2, not lists:member(X, All_list2)],
+ [] = [X || X <- UPI_list2, not lists:member(X, All_list2)],
+ [] = [X || X <- Repairing_list2, not lists:member(X, All_list2)],
+ DownS2 = sets:from_list(Down_list2),
+ UPIS2 = sets:from_list(UPI_list2),
+ RepairingS2 = sets:from_list(Repairing_list2),
+ true = sets:is_disjoint(DownS2, UPIS2),
+ true = sets:is_disjoint(DownS2, RepairingS2),
+ true = sets:is_disjoint(UPIS2, RepairingS2),
+
+ %% The author must not be down.
+ false = lists:member(AuthorServer1, Down_list1),
+ false = lists:member(AuthorServer2, Down_list2),
+ %% The author must be in either the UPI or repairing list.
+ true = lists:member(AuthorServer1, UPI_list1 ++ Repairing_list1),
+ true = lists:member(AuthorServer2, UPI_list2 ++ Repairing_list2),
+
+ %% Additions to the UPI chain may only be at the tail
+ UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2),
+ if UPI_common_prefix == [] ->
+ if UPI_list1 == [] orelse UPI_list2 == [] ->
+ %% If the common prefix is empty, then one of the
+ %% inputs must be empty.
+ true;
+ true ->
+ %% Otherwise, we have a case of UPI changing from
+ %% one of these two situations:
+ %%
+ %% UPI_list1 -> UPI_list2
+ %% -------------------------------------------------
+ %% [d,c,b,a] -> [c,a]
+ %% [d,c,b,a] -> [c,a,repair_finished_added_to_tail].
+ NotUPI2 = (Down_list2 ++ Repairing_list2),
+ case lists:prefix(UPI_list1 -- NotUPI2, UPI_list2) of
+ true ->
+ true;
+ false ->
+ %% Here's a possible failure scenario:
+ %% UPI_list1 -> UPI_list2
+ %% Repairing_list1 -> Repairing_list2
+ %% -----------------------------------
+ %% [a,b,c] author=a -> [c,a] author=c
+ %% [] [b]
+ %%
+ %% ... where RelativeToServer=b. In this case, b
+ %% has been partitions for a while and has only
+ %% now just learned of several epoch transitions.
+ %% If the author of both is also in the UPI of
+ %% both, then those authors would not have allowed
+ %% a bad transition, so we will assume this
+ %% transition is OK.
+ lists:member(AuthorServer1, UPI_list1)
+ andalso
+ lists:member(AuthorServer2, UPI_list2)
+ end
+ end;
+ true ->
+ true
+ end,
+ true = lists:prefix(UPI_common_prefix, UPI_list1),
+ true = lists:prefix(UPI_common_prefix, UPI_list2),
+ UPI_1_suffix = UPI_list1 -- UPI_common_prefix,
+ UPI_2_suffix = UPI_list2 -- UPI_common_prefix,
+
+ MoreCheckingP =
+ RelativeToServer == undefined
+ orelse
+ not (lists:member(RelativeToServer, Down_list2) orelse
+ lists:member(RelativeToServer, Repairing_list2)),
+
+ if not MoreCheckingP ->
+ ok;
+ MoreCheckingP ->
+ %% Where did elements in UPI_2_suffix come from?
+ %% Only two sources are permitted.
+ [lists:member(X, Repairing_list1) % X added after repair done
+ orelse
+ lists:member(X, UPI_list1) % X in UPI_list1 after common pref
+ || X <- UPI_2_suffix],
+
+ %% The UPI_2_suffix must exactly be equal to: ordered items from
+ %% UPI_list1 concat'ed with ordered items from Repairing_list1.
+ %% Both temp vars below preserve relative order!
+ UPI_2_suffix_from_UPI1 = [X || X <- UPI_1_suffix,
+ lists:member(X, UPI_list2)],
+ UPI_2_suffix_from_Repairing1 = [X || X <- UPI_2_suffix,
+ lists:member(X, Repairing_list1)],
+ %% true?
+ UPI_2_concat = (UPI_2_suffix_from_UPI1 ++ UPI_2_suffix_from_Repairing1),
+ if UPI_2_suffix == UPI_2_concat ->
+ ok;
+ true ->
+ if RetrospectiveP ->
+ %% We are in retrospective mode. But there are
+ %% some transitions that are difficult to find
+ %% when standing outside of all of the FLUs and
+ %% examining their behavior. (In contrast to
+ %% this same function being called "in the path"
+ %% of a projection transition by a particular FLU
+ %% which knows exactly its prior projection and
+ %% exactly what it intends to do.) Perhaps this
+ %% exception clause here can go away with
+ %% better/more clever retrospection analysis?
+ %%
+ %% Here's a case that PULSE found:
+ %% FLU B:
+ %% E=257: UPI=[c,a], REPAIRING=[b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %% FLU a:
+ %% E=251: UPI=[c], REPAIRING=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %% FLU c:
+ %% E=282: UPI=[c], REPAIRING=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %%
+ %% From the perspective of each individual FLU,
+ %% the unanimous transition at epoch #284 is
+ %% good. The repair that is done by FLU c -> a
+ %% is likewise good.
+ %%
+ %% From a retrospective point of view (and the
+ %% current implementation), there's a bad-looking
+ %% transition from epoch #269 to #284. This is
+ %% from the point of view of the last two
+ %% unanimous private projection store epochs:
+ %%
+ %% E=269: UPI=[c], REPAIRING=[], DOWN=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %%
+ %% The retrospective view by
+ %% machi_chain_manager1_pulse.erl just can't
+ %% reason correctly about this situation. We
+ %% will instead rely on the non-introspective
+ %% sanity checking that each FLU does before it
+ %% writes to its private projection store and
+ %% then adopts that projection (and unwedges
+ %% itself, etc etc).
+
+ %% io:format(user, "QQQ: RetrospectiveP ~p\n", [RetrospectiveP]),
+ %% io:format(user, "QQQ: UPI_2_suffix ~p\n", [UPI_2_suffix]),
+ %% io:format(user, "QQQ: UPI_2_suffix_from_UPI1 ~p\n", [UPI_2_suffix_from_UPI1]),
+ %% io:format(user, "QQQ: UPI_2_suffix_from_Repairing1 ~p\n", [UPI_2_suffix_from_Repairing1]),
+ io:format(user, "|~p,~p TODO revisit|",
+ [?MODULE, ?LINE]),
+ ok;
+ not RetrospectiveP ->
+ exit({upi_2_suffix_error})
+ end
+ end
+ end,
+ true
+ catch
+ _Type:_Err ->
+ S1 = make_projection_summary(P1),
+ S2 = make_projection_summary(P2),
+ Trace = erlang:get_stacktrace(),
+ %% TODO: this history goop is useful sometimes for debugging but
+ %% not for any "real" use. Get rid of it, for the long term.
+ H = (catch [{FLUName, Type, P#projection_v1.epoch_number, make_projection_summary(P)} ||
+ FLUName <- P1#projection_v1.all_members,
+ Type <- [public,private],
+ P <- machi_flu0:proj_get_all(FLUName, Type)]),
+ {err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer,
+ history, (catch lists:sort(H)),
+ stack, Trace}
+ end.
+
+find_common_prefix([], _) ->
+ [];
+find_common_prefix(_, []) ->
+ [];
+find_common_prefix([H|L1], [H|L2]) ->
+ [H|find_common_prefix(L1, L2)];
+find_common_prefix(_, _) ->
+ [].
+
+sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list) ->
+ Front = lists:takewhile(fun(X) -> X /=FLU end, FLU_list),
+ Index = length(Front) + 1,
+ NumNodes = length(FLU_list),
+ SleepIndex = NumNodes - Index,
+ SleepChunk = MaxSleep div NumNodes,
+ SleepTime = MinSleep + (SleepChunk * SleepIndex),
+ timer:sleep(SleepTime),
+ SleepTime.
+
+my_find_minmost([]) ->
+ 0;
+my_find_minmost([{_,_}|_] = TransFlapCounts0) ->
+ lists:min([FlapCount || {_T, {_FlTime, FlapCount}} <- TransFlapCounts0]);
+my_find_minmost(TransFlapCounts0) ->
+ lists:min(TransFlapCounts0).
+
+get_raw_flapping_i(#projection_v1{dbg=Dbg}) ->
+ proplists:get_value(flapping_i, Dbg, []).
+
+get_flap_count(P) ->
+ proplists:get_value(flap_count, get_raw_flapping_i(P), 0).
+
+get_all_flap_counts(P) ->
+ proplists:get_value(all_flap_counts, get_raw_flapping_i(P), []).
+
+get_all_flap_counts_counts(P) ->
+ case get_all_flap_counts(P) of
+ [] ->
+ [];
+ [{_,{_,_}}|_] = Cs ->
+ [Count || {_FLU, {_Time, Count}} <- Cs]
+ end.
+
+get_all_hosed(P) when is_record(P, projection_v1)->
+ proplists:get_value(all_hosed, get_raw_flapping_i(P), []);
+get_all_hosed(S) when is_record(S, ch_mgr) ->
+ proplists:get_value(all_hosed,
+ proplists:get_value(flapping_i, S#ch_mgr.runenv, []),
+ []).
+
+merge_flap_counts(FlapCounts) ->
+ merge_flap_counts(FlapCounts, orddict:new()).
+
+merge_flap_counts([], D) ->
+ orddict:to_list(D);
+merge_flap_counts([FlapCount|Rest], D1) ->
+ %% We know that FlapCount is list({Actor, {{_epk,FlapStartTime},NumFlaps}}).
+ D2 = orddict:from_list(FlapCount),
+ D2 = orddict:from_list(FlapCount),
+ %% If the FlapStartTimes are identical, then pick the bigger flap count.
+ %% If the FlapStartTimes differ, then pick the larger start time tuple.
+ D3 = orddict:merge(fun(_Key, {{_,T1}, NF1}= V1, {{_,T2}, NF2}=V2)
+ when T1 == T2 ->
+ if NF1 > NF2 ->
+ V1;
+ true ->
+ V2
+ end;
+ (_Key, {{_,T1},_NF1}= V1, {{_,T2},_NF2}=V2) ->
+ if T1 > T2 ->
+ V1;
+ true ->
+ V2
+ end;
+ (_Key, V1, V2) ->
+ exit({bad_merge_2tuples,mod,?MODULE,line,?LINE,
+ _Key, V1, V2})
+ end, D1, D2),
+ merge_flap_counts(Rest, D3).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+perhaps_call_t(S, Partitions, FLU, DoIt) ->
+ try
+ perhaps_call(S, Partitions, FLU, DoIt)
+ catch
+ exit:timeout ->
+ t_timeout
+ end.
+
+perhaps_call(#ch_mgr{name=MyName, myflu=MyFLU}, Partitions, FLU, DoIt) ->
+ RemoteFLU_p = FLU /= MyFLU,
+ case RemoteFLU_p andalso lists:member({MyName, FLU}, Partitions) of
+ false ->
+ Res = DoIt(),
+ case RemoteFLU_p andalso lists:member({FLU, MyName}, Partitions) of
+ false ->
+ Res;
+ _ ->
+ (catch put(react, [{timeout2,me,MyFLU,to,FLU,RemoteFLU_p,Partitions}|get(react)])),
+ exit(timeout)
+ end;
+ _ ->
+ (catch put(react, [{timeout1,me,MyFLU,to,FLU,RemoteFLU_p,Partitions}|get(react)])),
+ exit(timeout)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
diff --git a/test/machi_chain_manager1_pulse.erl b/test/machi_chain_manager1_pulse.erl
new file mode 100644
index 0000000..b95cf00
--- /dev/null
+++ b/test/machi_chain_manager1_pulse.erl
@@ -0,0 +1,379 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1_pulse).
+
+%% The while module is ifdef:ed, rebar should set PULSE
+-ifdef(PULSE).
+
+-compile(export_all).
+
+-include_lib("eqc/include/eqc.hrl").
+-include_lib("eqc/include/eqc_statem.hrl").
+
+-include("machi.hrl").
+
+-include_lib("eunit/include/eunit.hrl").
+
+-compile({parse_transform, pulse_instrument}).
+-compile({pulse_replace_module, [{application, pulse_application}]}).
+%% The following functions contains side_effects but are run outside
+%% PULSE, i.e. PULSE needs to leave them alone
+-compile({pulse_skip,[{prop_pulse_test_,0}]}).
+-compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}).
+
+%% Used for output within EUnit...
+-define(QC_FMT(Fmt, Args),
+ io:format(user, Fmt, Args)).
+
+%% And to force EUnit to output QuickCheck output...
+-define(QC_OUT(P),
+ eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)).
+
+-define(MGR, machi_chain_manager1).
+-define(MGRTEST, machi_chain_manager1_test).
+
+-record(state, {
+ step=0,
+ num_pids,
+ pids,
+ dump_state
+ }).
+
+initial_state() ->
+ #state{}.
+
+gen_num_pids() ->
+ choose(2, 5).
+
+gen_seed() ->
+ noshrink({choose(1, 10000), choose(1, 10000), choose(1, 10000)}).
+
+gen_old_threshold() ->
+ noshrink(choose(1, 100)).
+
+gen_no_partition_threshold() ->
+ noshrink(choose(1, 100)).
+
+command(#state{step=0}) ->
+ {call, ?MODULE, setup, [gen_num_pids(), gen_seed()]};
+command(S) ->
+ frequency([
+ { 1, {call, ?MODULE, change_partitions,
+ [gen_old_threshold(), gen_no_partition_threshold()]}},
+ {50, {call, ?MODULE, do_ticks,
+ [choose(5, 100), S#state.pids,
+ gen_old_threshold(), gen_no_partition_threshold()]}}
+ ]).
+
+precondition(_S, _) ->
+ true.
+
+next_state(#state{step=Step}=S, Res, Call) ->
+ next_state2(S#state{step=Step + 1}, Res, Call).
+
+next_state2(S, Res, {call, _, setup, [NumPids, _Seed]}) ->
+ S#state{num_pids=NumPids, pids=Res};
+next_state2(S, Res, {call, _, dump_state, _Args}) ->
+ S#state{dump_state=Res};
+next_state2(S, _Res, {call, _, _Func, _Args}) ->
+ S.
+
+postcondition(_S, {call, _, _Func, _Args}, _Res) ->
+ true.
+
+all_list() ->
+ [a,b,c].
+ %% [a,b,c,d,e].
+
+setup(_Num, Seed) ->
+ ?QC_FMT("\nsetup,", []),
+ All_list = all_list(),
+ _ = machi_partition_simulator:start_link(Seed, 0, 100),
+ _Partitions = machi_partition_simulator:get(All_list),
+
+ FLU_pids = [begin
+ {ok, FLUPid} = machi_flu0:start_link(Name),
+ _ = machi_flu0:get_epoch(FLUPid),
+ FLUPid
+ end || Name <- All_list],
+ Namez = lists:zip(All_list, FLU_pids),
+ Mgr_pids = [begin
+ {ok, Mgr} = ?MGR:start_link(Name, All_list, FLU_pid),
+ Mgr
+ end || {Name, FLU_pid} <- Namez],
+ timer:sleep(1),
+ {ok, P1} = ?MGR:test_calc_projection(hd(Mgr_pids), false),
+ P1Epoch = P1#projection.epoch_number,
+ [ok = machi_flu0:proj_write(FLU, P1Epoch, public, P1) || FLU <- FLU_pids],
+ [?MGR:test_react_to_env(Mgr) || Mgr <- Mgr_pids],
+
+ Res = {FLU_pids, Mgr_pids},
+ put(manager_pids_hack, Res),
+ Res.
+
+change_partitions(OldThreshold, NoPartitionThreshold) ->
+ machi_partition_simulator:reset_thresholds(OldThreshold,
+ NoPartitionThreshold).
+
+always_last_partitions() ->
+ machi_partition_simulator:always_last_partitions().
+
+private_stable_check(FLUs) ->
+ {_FLU_pids, Mgr_pids} = get(manager_pids_hack),
+ Res = private_projections_are_stable_check(FLUs, Mgr_pids),
+ if not Res ->
+ io:format(user, "BUMMER: private stable check failed!\n", []);
+ true ->
+ ok
+ end,
+ Res.
+
+do_ticks(Num, PidsMaybe, OldThreshold, NoPartitionThreshold) ->
+ io:format(user, "~p,~p,~p|", [Num, OldThreshold, NoPartitionThreshold]),
+ {_FLU_pids, Mgr_pids} = case PidsMaybe of
+ undefined -> get(manager_pids_hack);
+ _ -> PidsMaybe
+ end,
+ if is_integer(OldThreshold) ->
+ machi_partition_simulator:reset_thresholds(OldThreshold,
+ NoPartitionThreshold);
+ true ->
+ ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number()]),
+ machi_partition_simulator:no_partitions()
+ end,
+ Res = exec_ticks(Num, Mgr_pids),
+ if not is_integer(OldThreshold) ->
+ ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number()]);
+ true ->
+ ok
+ end,
+ Res.
+
+get_biggest_private_epoch_number() ->
+ lists:last(
+ lists:usort(
+ lists:flatten(
+ [machi_flu0:proj_list_all(FLU, private) ||
+ FLU <- all_list()]))).
+
+dump_state() ->
+ try
+ ?QC_FMT("dump_state(", []),
+ {FLU_pids, _Mgr_pids} = get(manager_pids_hack),
+ Namez = zip(all_list(), FLU_pids),
+ Report = ?MGRTEST:unanimous_report(Namez),
+ %% ?QC_FMT("Report ~p\n", [Report]),
+
+ Diag1 = [begin
+ Ps = machi_flu0:proj_get_all(FLU, Type),
+ [io_lib:format("~p ~p ~p: ~w\n", [FLUName, Type, P#projection.epoch_number, ?MGR:make_projection_summary(P)]) || P <- Ps]
+ end || {FLUName, FLU} <- Namez,
+ Type <- [public] ],
+
+ UniquePrivateEs =
+ lists:usort(lists:flatten(
+ [machi_flu0:proj_list_all(FLU, private) ||
+ {_FLUName, FLU} <- Namez])),
+ P_lists0 = [{FLUName, Type, machi_flu0:proj_get_all(FLUPid, Type)} ||
+ {FLUName, FLUPid} <- Namez, Type <- [public,private]],
+ P_lists = [{FLUName, Type, P} || {FLUName, Type, Ps} <- P_lists0,
+ P <- Ps],
+ AllDict = lists:foldl(fun({FLU, Type, P}, D) ->
+ K = {FLU, Type, P#projection.epoch_number},
+ dict:store(K, P, D)
+ end, dict:new(), lists:flatten(P_lists)),
+ DumbFinderBackward =
+ fun(FLUName) ->
+ fun(E, error_unwritten) ->
+ case dict:find({FLUName, private, E}, AllDict) of
+ {ok, T} -> T;
+ error -> error_unwritten
+ end;
+ %% case machi_flu0:proj_read(FLU, E, private) of
+ %% {ok, T} -> T;
+ %% Else -> Else
+ %% end;
+ (_E, Acc) ->
+ Acc
+ end
+ end,
+ Diag2 = [[
+ io_lib:format("~p private: ~w\n",
+ [FLUName,
+ ?MGR:make_projection_summary(
+ lists:foldl(DumbFinderBackward(FLUName),
+ error_unwritten,
+ lists:seq(Epoch, 0, -1)))])
+ || {FLUName, _FLU} <- Namez]
+ || Epoch <- UniquePrivateEs],
+
+ ?QC_FMT(")", []),
+ {Report, lists:flatten([Diag1, Diag2])}
+ catch XX:YY ->
+ ?QC_FMT("OUCH: ~p ~p @ ~p\n", [XX, YY, erlang:get_stacktrace()])
+ end.
+
+prop_pulse() ->
+ ?FORALL({Cmds0, Seed}, {non_empty(commands(?MODULE)), pulse:seed()},
+ ?IMPLIES(1 < length(Cmds0) andalso length(Cmds0) < 5,
+ begin
+ ok = shutdown_hard(),
+ %% PULSE can be really unfair, of course, including having exec_ticks
+ %% run where all of FLU a does its ticks then FLU b. Such a situation
+ %% doesn't always allow unanimous private projection store values:
+ %% FLU a might need one more tick to write its private projection, but
+ %% it isn't given a chance at the end of the PULSE run. So we cheat
+ Stabilize1 = [{set,{var,99999995},
+ {call, ?MODULE, always_last_partitions, []}}],
+ Stabilize2 = [{set,{var,99999996},
+ {call, ?MODULE, private_stable_check, [all_list()]}}],
+ LastTriggerTicks = {set,{var,99999997},
+ {call, ?MODULE, do_ticks, [25, undefined, no, no]}},
+ Cmds1 = lists:duplicate(2, LastTriggerTicks),
+ %% Cmds1 = lists:duplicate(length(all_list())*2, LastTriggerTicks),
+ Cmds = Cmds0 ++
+ Stabilize1 ++
+ Cmds1 ++
+ Stabilize2 ++
+ [{set,{var,99999999}, {call, ?MODULE, dump_state, []}}],
+ {_H2, S2, Res} = pulse:run(
+ fun() ->
+ {_H, _S, _R} = run_commands(?MODULE, Cmds)
+ end, [{seed, Seed},
+ {strategy, unfair}]),
+ ok = shutdown_hard(),
+
+ {Report, Diag} = S2#state.dump_state,
+
+ %% Report is ordered by Epoch. For each private projection
+ %% written during any given epoch, confirm that all chain
+ %% members appear in only one unique chain, i.e., the sets of
+ %% unique chains are disjoint.
+ AllDisjointP = ?MGRTEST:all_reports_are_disjoint(Report),
+
+ %% Given the report, we flip it around so that we observe the
+ %% sets of chain transitions relative to each FLU.
+ R_Chains = [?MGRTEST:extract_chains_relative_to_flu(FLU, Report) ||
+ FLU <- all_list()],
+ R_Projs = [{FLU, [?MGRTEST:chain_to_projection(
+ FLU, Epoch, UPI, Repairing, all_list()) ||
+ {Epoch, UPI, Repairing} <- E_Chains]} ||
+ {FLU, E_Chains} <- R_Chains],
+
+ %% For each chain transition experienced by a particular FLU,
+ %% confirm that each state transition is OK.
+ Sane =
+ [{FLU,_SaneRes} = {FLU,?MGR:projection_transitions_are_sane_retrospective(
+ Ps, FLU)} ||
+ {FLU, Ps} <- R_Projs],
+ SaneP = lists:all(fun({_FLU, SaneRes}) -> SaneRes == true end, Sane),
+
+ %% The final report item should say that all are agreed_membership.
+ {_LastEpoch, {ok_disjoint, LastRepXs}} = lists:last(Report),
+ AgreedOrNot = lists:usort([element(1, X) || X <- LastRepXs]),
+
+ %% TODO: Check that we've converged to a single chain with no repairs.
+ SingleChainNoRepair = case LastRepXs of
+ [{agreed_membership,{_UPI,[]}}] ->
+ true;
+ _ ->
+ LastRepXs
+ end,
+
+ ?WHENFAIL(
+ begin
+ ?QC_FMT("Res = ~p\n", [Res]),
+ ?QC_FMT("Diag = ~s\n", [Diag]),
+ ?QC_FMT("Report = ~p\n", [Report]),
+ ?QC_FMT("Sane = ~p\n", [Sane]),
+ ?QC_FMT("SingleChainNoRepair failure =\n ~p\n", [SingleChainNoRepair])
+ end,
+ conjunction([{res, Res == true orelse Res == ok},
+ {all_disjoint, AllDisjointP},
+ {sane, SaneP},
+ {all_agreed_at_end, AgreedOrNot == [agreed_membership]},
+ {single_chain_no_repair, SingleChainNoRepair}
+ ]))
+ end)).
+
+prop_pulse_test_() ->
+ Timeout = case os:getenv("PULSE_TIME") of
+ false -> 60;
+ Val -> list_to_integer(Val)
+ end,
+ ExtraTO = case os:getenv("PULSE_SHRINK_TIME") of
+ false -> 0;
+ Val2 -> list_to_integer(Val2)
+ end,
+ {timeout, (Timeout+ExtraTO+300), % 300 = a bit more fudge time
+ fun() ->
+ ?assert(eqc:quickcheck(eqc:testing_time(Timeout,
+ ?QC_OUT(prop_pulse()))))
+ end}.
+
+shutdown_hard() ->
+ (catch machi_partition_simulator:stop()),
+ [(catch machi_flu0:stop(X)) || X <- all_list()],
+ timer:sleep(1),
+ (catch exit(whereis(machi_partition_simulator), kill)),
+ [(catch exit(whereis(X), kill)) || X <- all_list()],
+ erlang:yield(),
+ ok.
+
+exec_ticks(Num, Mgr_pids) ->
+ Parent = self(),
+ Pids = [spawn_link(fun() ->
+ [begin
+ erlang:yield(),
+ Max = 10,
+ Elapsed =
+ ?MGR:sleep_ranked_order(1, Max, M_name, all_list()),
+ Res = ?MGR:test_react_to_env(MMM),
+ timer:sleep(erlang:max(0, Max - Elapsed)),
+ Res=Res %% ?D({self(), Res})
+ end || _ <- lists:seq(1,Num)],
+ Parent ! done
+ end) || {M_name, MMM} <- lists:zip(all_list(), Mgr_pids) ],
+ [receive
+ done ->
+ ok
+ after 5000 ->
+ exit(icky_timeout)
+ end || _ <- Pids],
+ ok.
+
+private_projections_are_stable_check(All_list, Mgr_pids) ->
+ %% TODO: extend the check to look not only for latest num, but
+ %% also check for flapping, and if yes, to see if all_hosed are
+ %% all exactly equal.
+
+ _ = exec_ticks(40, Mgr_pids),
+ Private1 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ FLU <- All_list],
+ _ = exec_ticks(5, Mgr_pids),
+ Private2 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ FLU <- All_list],
+
+ (Private1 == Private2).
+
+
+-endif. % PULSE
diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl
new file mode 100644
index 0000000..def16c7
--- /dev/null
+++ b/test/machi_chain_manager1_test.erl
@@ -0,0 +1,589 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1_test).
+
+-include("machi.hrl").
+-include("machi_projection.hrl").
+
+-define(MGR, machi_chain_manager1).
+
+-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
+-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+-define(FLU_C, machi_flu1_client).
+
+-export([]).
+
+-ifdef(TEST).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+%% -include_lib("eqc/include/eqc_statem.hrl").
+-define(QC_OUT(P),
+ eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
+-endif.
+
+-include_lib("eunit/include/eunit.hrl").
+-compile(export_all).
+
+unanimous_report(Namez) ->
+ UniquePrivateEs =
+ lists:usort(lists:flatten(
+ [machi_flu0:proj_list_all(FLU, private) ||
+ {_FLUName, FLU} <- Namez])),
+ [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
+
+unanimous_report(Epoch, Namez) ->
+ Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
+ {ok, T} -> T;
+ _Else -> not_in_this_epoch
+ end} || {FLUName, FLU} <- Namez],
+ UPI_R_Sums = [{Proj#projection_v1.upi, Proj#projection_v1.repairing,
+ Proj#projection_v1.epoch_csum} ||
+ {_FLUname, Proj} <- Projs,
+ is_record(Proj, projection_v1)],
+ UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
+ Res =
+ [begin
+ case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
+ U == UPI]) of
+ [_1CSum] ->
+ %% Yay, there's only 1 checksum. Let's check
+ %% that all FLUs are in agreement.
+ {UPI, Repairing, _CSum} =
+ lists:keyfind(UPI, 1, UPI_R_Sums),
+ %% TODO: make certain that this subtlety doesn't get
+ %% last in later implementations.
+
+ %% So, this is a bit of a tricky thing. If we're at
+ %% upi=[c] and repairing=[a,b], then the transition
+ %% (eventually!) to upi=[c,a] does not currently depend
+ %% on b being an active participant in the repair.
+ %%
+ %% Yes, b's state is very important for making certain
+ %% that all repair operations succeed both to a & b.
+ %% However, in this simulation, we only consider that
+ %% the head(Repairing) is sane. Therefore, we use only
+ %% the "HeadOfRepairing" in our considerations here.
+ HeadOfRepairing = case Repairing of
+ [H_Rep|_] ->
+ [H_Rep];
+ _ ->
+ []
+ end,
+ Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
+ P when is_record(P, projection_v1) ->
+ P#projection_v1.epoch_csum;
+ Else ->
+ Else
+ end} || FLU <- UPI ++ HeadOfRepairing],
+ case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
+ [_] ->
+ {agreed_membership, {UPI, Repairing}};
+ Else2 ->
+ {not_agreed, {UPI, Repairing}, Else2}
+ end;
+ _Else ->
+ {UPI, not_unique, Epoch, _Else}
+ end
+ end || UPI <- UniqueUPIs],
+ AgreedResUPI_Rs = [UPI++Repairing ||
+ {agreed_membership, {UPI, Repairing}} <- Res],
+ Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
+ lists:sort(lists:flatten(AgreedResUPI_Rs)) of
+ true ->
+ ok_disjoint;
+ false ->
+ bummer_NOT_DISJOINT
+ end,
+ {Epoch, {Tag, Res}}.
+
+all_reports_are_disjoint(Report) ->
+ [] == [X || {_Epoch, Tuple}=X <- Report,
+ element(1, Tuple) /= ok_disjoint].
+
+extract_chains_relative_to_flu(FLU, Report) ->
+ {FLU, [{Epoch, UPI, Repairing} ||
+ {Epoch, {ok_disjoint, Es}} <- Report,
+ {agreed_membership, {UPI, Repairing}} <- Es,
+ lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
+
+chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
+ ?MGR:make_projection(Epoch, MyName, All_list,
+ All_list -- (UPI_list ++ Repairing_list),
+ UPI_list, Repairing_list, []).
+
+-ifndef(PULSE).
+
+smoke0_test() ->
+ {ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50),
+ Host = "localhost",
+ TcpPort = 6623,
+ {ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]),
+ {ok, M0} = ?MGR:start_link(a, [a,b,c], a),
+ SockA = machi_util:connect(Host, TcpPort),
+ try
+ pong = ?MGR:ping(M0)
+ after
+ ok = ?MGR:stop(M0),
+ ok = machi_flu0:stop(FLUa),
+ ok = machi_partition_simulator:stop()
+ end.
+
+smoke1_testTODO() ->
+ machi_partition_simulator:start_link({1,2,3}, 100, 0),
+ {ok, FLUa} = machi_flu0:start_link(a),
+ {ok, FLUb} = machi_flu0:start_link(b),
+ {ok, FLUc} = machi_flu0:start_link(c),
+ I_represent = I_am = a,
+ {ok, M0} = ?MGR:start_link(I_represent, [a,b,c], I_am),
+ try
+ {ok, _P1} = ?MGR:test_calc_projection(M0, false),
+
+ _ = ?MGR:test_calc_proposed_projection(M0),
+ {local_write_result, ok,
+ {remote_write_results, [{b,ok},{c,ok}]}} =
+ ?MGR:test_write_proposed_projection(M0),
+ {unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(M0, false),
+
+ ok
+ after
+ ok = ?MGR:stop(M0),
+ ok = machi_flu0:stop(FLUa),
+ ok = machi_flu0:stop(FLUb),
+ ok = machi_flu0:stop(FLUc),
+ ok = machi_partition_simulator:stop()
+ end.
+
+nonunanimous_setup_and_fix_testTODO() ->
+ machi_partition_simulator:start_link({1,2,3}, 100, 0),
+ {ok, FLUa} = machi_flu0:start_link(a),
+ {ok, FLUb} = machi_flu0:start_link(b),
+ I_represent = I_am = a,
+ {ok, Ma} = ?MGR:start_link(I_represent, [a,b], I_am),
+ {ok, Mb} = ?MGR:start_link(b, [a,b], b),
+ try
+ {ok, P1} = ?MGR:test_calc_projection(Ma, false),
+
+ P1a = ?MGR:update_projection_checksum(
+ P1#projection_v1{down=[b], upi=[a], dbg=[{hackhack, ?LINE}]}),
+ P1b = ?MGR:update_projection_checksum(
+ P1#projection_v1{author_server=b, creation_time=now(),
+ down=[a], upi=[b], dbg=[{hackhack, ?LINE}]}),
+ P1Epoch = P1#projection_v1.epoch_number,
+ ok = machi_flu0:proj_write(FLUa, P1Epoch, public, P1a),
+ ok = machi_flu0:proj_write(FLUb, P1Epoch, public, P1b),
+
+ ?D(x),
+ {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
+ ?Dw(_XX),
+ {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
+ %% The read repair here doesn't automatically trigger the creation of
+ %% a new projection (to try to create a unanimous projection). So
+ %% we expect nothing to change when called again.
+ {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
+
+ {now_using, _} = ?MGR:test_react_to_env(Ma),
+ {unanimous,P2,E2} = ?MGR:test_read_latest_public_projection(Ma, false),
+ {ok, P2pa} = machi_flu0:proj_read_latest(FLUa, private),
+ P2 = P2pa#projection_v1{dbg2=[]},
+
+ %% FLUb should still be using proj #0 for its private use
+ {ok, P0pb} = machi_flu0:proj_read_latest(FLUb, private),
+ 0 = P0pb#projection_v1.epoch_number,
+
+ %% Poke FLUb to react ... should be using the same private proj
+ %% as FLUa.
+ {now_using, _} = ?MGR:test_react_to_env(Mb),
+ {ok, P2pb} = machi_flu0:proj_read_latest(FLUb, private),
+ P2 = P2pb#projection_v1{dbg2=[]},
+
+ ok
+ after
+ ok = ?MGR:stop(Ma),
+ ok = ?MGR:stop(Mb),
+ ok = machi_flu0:stop(FLUa),
+ ok = machi_flu0:stop(FLUb),
+ ok = machi_partition_simulator:stop()
+ end.
+
+short_doc() ->
+"
+A visualization of the convergence behavior of the chain self-management
+algorithm for Machi.
+ 1. Set up 4 FLUs and chain manager pairs.
+ 2. Create a number of different network partition scenarios, where
+ (simulated) partitions may be symmetric or asymmetric. Then halt changing
+ the partitions and keep the simulated network stable and broken.
+ 3. Run a number of iterations of the algorithm in parallel by poking each
+ of the manager processes on a random'ish basis.
+ 4. Afterward, fetch the chain transition changes made by each FLU and
+ verify that no transition was unsafe.
+
+During the iteration periods, the following is a cheatsheet for the output.
+See the internal source for interpreting the rest of the output.
+
+ 'Let loose the dogs of war!' Network instability
+ 'SET partitions = ' Network stability (but broken)
+ 'x uses:' The FLU x has made an internal state transition. The rest of
+ the line is a dump of internal state.
+ '{t}' This is a tick event which triggers one of the manager processes
+ to evaluate its environment and perhaps make a state transition.
+
+A long chain of '{t}{t}{t}{t}' means that the chain state has settled
+to a stable configuration, which is the goal of the algorithm.
+Press control-c to interrupt....".
+
+long_doc() ->
+ "
+'Let loose the dogs of war!'
+
+ The simulated network is very unstable for a few seconds.
+
+'x uses'
+
+ After a single iteration, server x has determined that the chain
+ should be defined by the upi, repair, and down list in this record.
+ If all participants reach the same conclusion at the same epoch
+ number (and checksum, see next item below), then the chain is
+ stable, fully configured, and can provide full service.
+
+'epoch,E'
+
+ The epoch number for this decision is E. The checksum of the full
+ record is not shown. For purposes of the protocol, a server will
+ 'wedge' itself and refuse service (until a new config is chosen)
+ whenever: a). it sees a bigger epoch number mentioned somewhere, or
+ b). it sees the same epoch number but a different checksum. In case
+ of b), there was a network partition that has healed, and both sides
+ had chosen to operate with an identical epoch number but different
+ chain configs.
+
+'upi', 'repair', and 'down'
+
+ Members in the chain that are fully in sync and thus preserving the
+ Update Propagation Invariant, up but under repair (simulated), and
+ down, respectively.
+
+'ps,[some list]'
+
+ The list of asymmetric network partitions. {a,b} means that a
+ cannot send to b, but b can send to a.
+
+ This partition list is recorded for debugging purposes but is *not*
+ used by the algorithm. The algorithm only 'feels' its effects via
+ simulated timeout whenever there's a partition in one of the
+ messaging directions.
+
+'nodes_up,[list]'
+
+ The best guess right now of which ndoes are up, relative to the
+ author node, specified by '{author,X}'
+
+'SET partitions = [some list]'
+
+ All subsequent iterations should have a stable list of partitions,
+ i.e. the 'ps' list described should be stable.
+
+'{FLAP: x flaps n}!'
+
+ Server x has detected that it's flapping/oscillating after iteration
+ n of a naive/1st draft detection algorithm.
+".
+
+convergence_demo_testTODO_() ->
+ {timeout, 98*300, fun() -> convergence_demo_testfun() end}.
+
+convergence_demo_testfun() ->
+ convergence_demo_testfun(3).
+
+convergence_demo_testfun(NumFLUs) ->
+ timer:sleep(100),
+ io:format(user, short_doc(), []),
+ %% Faster test startup, commented: timer:sleep(3000),
+
+ FLU_biglist = [a,b,c,d,e,f,g],
+ All_list = lists:sublist(FLU_biglist, NumFLUs),
+ io:format(user, "\nSET # of FLus = ~w members ~w).\n",
+ [NumFLUs, All_list]),
+ machi_partition_simulator:start_link({111,222,33}, 0, 100),
+ _ = machi_partition_simulator:get(All_list),
+
+ Namez =
+ [begin
+ {ok, Pid} = machi_flu0:start_link(Name),
+ {Name, Pid}
+ end || Name <- All_list ],
+
+ MgrOpts = [private_write_verbose],
+ MgrNamez =
+ [begin
+ {ok, MPid} = ?MGR:start_link(Name, All_list, FLUPid, MgrOpts),
+ {Name, MPid}
+ end || {Name, FLUPid} <- Namez],
+ try
+ [{_, Ma}|_] = MgrNamez,
+ {ok, P1} = ?MGR:test_calc_projection(Ma, false),
+ P1Epoch = P1#projection_v1.epoch_number,
+ [ok = machi_flu0:proj_write(FLUPid, P1Epoch, public, P1) ||
+ {_, FLUPid} <- Namez, FLUPid /= Ma],
+
+ machi_partition_simulator:reset_thresholds(10, 50),
+ _ = machi_partition_simulator:get(All_list),
+
+ Parent = self(),
+ DoIt = fun(Iters, S_min, S_max) ->
+ io:format(user, "\nDoIt: top\n\n", []),
+ Pids = [spawn(fun() ->
+ random:seed(now()),
+ [begin
+ erlang:yield(),
+ S_max_rand = random:uniform(
+ S_max + 1),
+ io:format(user, "{t}", []),
+ Elapsed =
+ ?MGR:sleep_ranked_order(
+ S_min, S_max_rand,
+ M_name, All_list),
+ _ = ?MGR:test_react_to_env(MMM),
+ %% if M_name == d ->
+ %% [_ = ?MGR:test_react_to_env(MMM) ||
+ %% _ <- lists:seq(1,3)],
+ %% superunfair;
+ %% true ->
+ %% ok
+ %% end,
+ %% Be more unfair by not
+ %% sleeping here.
+ %% timer:sleep(S_max - Elapsed),
+ Elapsed
+ end || _ <- lists:seq(1, Iters)],
+ Parent ! done
+ end) || {M_name, MMM} <- MgrNamez ],
+ [receive
+ done ->
+ ok
+ after 995000 ->
+ exit(icky_timeout)
+ end || _ <- Pids]
+ end,
+
+ _XandYs1 = [[{X,Y}] || X <- All_list, Y <- All_list, X /= Y],
+ _XandYs2 = [[{X,Y}, {A,B}] || X <- All_list, Y <- All_list, X /= Y,
+ A <- All_list, B <- All_list, A /= B,
+ X /= A],
+ _XandYs3 = [[{X,Y}, {A,B}, {C,D}] || X <- All_list, Y <- All_list, X /= Y,
+ A <- All_list, B <- All_list, A /= B,
+ C <- All_list, D <- All_list, C /= D,
+ X /= A, X /= C, A /= C],
+ %% AllPartitionCombinations = _XandYs1 ++ _XandYs2,
+ %% AllPartitionCombinations = _XandYs3,
+ AllPartitionCombinations = _XandYs1 ++ _XandYs2 ++ _XandYs3,
+ ?D({?LINE, length(AllPartitionCombinations)}),
+
+ machi_partition_simulator:reset_thresholds(10, 50),
+ io:format(user, "\nLet loose the dogs of war!\n", []),
+ DoIt(30, 0, 0),
+ [begin
+ io:format(user, "\nSET partitions = ~w.\n", [ [] ]),machi_partition_simulator:no_partitions(),
+ [DoIt(50, 10, 100) || _ <- [1,2,3]],
+
+ %% machi_partition_simulator:reset_thresholds(10, 50),
+ %% io:format(user, "\nLet loose the dogs of war!\n", []),
+ %% DoIt(30, 0, 0),
+
+ machi_partition_simulator:always_these_partitions(Partition),
+ io:format(user, "\nSET partitions = ~w.\n", [Partition]),
+ [DoIt(50, 10, 100) || _ <- [1,2,3,4] ],
+ PPP =
+ [begin
+ PPPallPubs = machi_flu0:proj_list_all(FLU, public),
+ [begin
+ {ok, Pr} = machi_flu0:proj_read(FLU, PPPepoch, public),
+ {Pr#projection_v1.epoch_number, FLUName, Pr}
+ end || PPPepoch <- PPPallPubs]
+ end || {FLUName, FLU} <- Namez],
+ io:format(user, "PPP ~p\n", [lists:sort(lists:append(PPP))]),
+
+ %%%%%%%% {stable,true} = {stable,private_projections_are_stable(Namez, DoIt)},
+ {hosed_ok,true} = {hosed_ok,all_hosed_lists_are_identical(Namez, Partition)},
+ io:format(user, "\nSweet, all_hosed are identical-or-islands-inconclusive.\n", []),
+ timer:sleep(1000),
+ ok
+ end || Partition <- AllPartitionCombinations
+ %% end || Partition <- [ [{a,b},{b,d},{c,b}],
+ %% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
+ %% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
+ %% [{a,b},{b,d},{c,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}],
+ %% [{a,b},{b,d},{c,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}],
+ %% [{a,b}, {c,b}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}] ] %% hosed-not-equal @ 3 FLUs
+ %% end || Partition <- [ [{a,b}],
+ %% [{b,a}] ]
+ %% end || Partition <- [ [{a,b}, {c,b}],
+ %% [{a,b}, {b,c}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}, {c,d}],
+ %% [{a,b}, {b,c},{b,d}, {c,d}],
+ %% [{b,a}, {b,c}, {c,d}],
+ %% [{a,b}, {c,b}, {c,d}],
+ %% [{a,b}, {b,c}, {d,c}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}, {c,d}, {d,e}],
+ %% [{b,a}, {b,c}, {c,d}, {d,e}],
+ %% [{a,b}, {c,b}, {c,d}, {d,e}],
+ %% [{a,b}, {b,c}, {d,c}, {d,e}],
+ %% [{a,b}, {b,c}, {c,d}, {e,d}] ]
+ %% end || Partition <- [ [{c,a}] ]
+ %% end || Partition <- [ [{c,a}], [{c,b}, {a, b}] ]
+ %% end || Partition <- [ [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}],
+ %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {b,c}],
+ %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {c,d}] ]
+ %% end || Partition <- [ [{a,b}],
+ %% [{a,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
+ %% [{a,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
+ %% [{a,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}],
+ %% [{a,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]
+ ],
+ %% exit(end_experiment),
+
+ io:format(user, "\nSET partitions = []\n", []),
+ io:format(user, "We should see convergence to 1 correct chain.\n", []),
+ machi_partition_simulator:no_partitions(),
+ [DoIt(50, 10, 100) || _ <- [1]],
+ io:format(user, "Sweet, finishing early\n", []), exit(yoyoyo_testing_hack),
+ %% WARNING: In asymmetric partitions, private_projections_are_stable()
+ %% will never be true; code beyond this point on the -exp3
+ %% branch is bit-rotted, sorry!
+ true = private_projections_are_stable(Namez, DoIt),
+ io:format(user, "~s\n", [os:cmd("date")]),
+
+ %% We are stable now ... analyze it.
+
+ %% Create a report where at least one FLU has written a
+ %% private projection.
+ Report = unanimous_report(Namez),
+ %% ?D(Report),
+
+ %% Report is ordered by Epoch. For each private projection
+ %% written during any given epoch, confirm that all chain
+ %% members appear in only one unique chain, i.e., the sets of
+ %% unique chains are disjoint.
+ true = all_reports_are_disjoint(Report),
+
+ %% Given the report, we flip it around so that we observe the
+ %% sets of chain transitions relative to each FLU.
+ R_Chains = [extract_chains_relative_to_flu(FLU, Report) ||
+ FLU <- All_list],
+ %% ?D(R_Chains),
+ R_Projs = [{FLU, [chain_to_projection(FLU, Epoch, UPI, Repairing,
+ All_list) ||
+ {Epoch, UPI, Repairing} <- E_Chains]} ||
+ {FLU, E_Chains} <- R_Chains],
+
+ %% For each chain transition experienced by a particular FLU,
+ %% confirm that each state transition is OK.
+ try
+ [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane(Ps, FLU)} ||
+ {FLU, Ps} <- R_Projs],
+ io:format(user, "\nAll sanity checks pass, hooray!\n", [])
+ catch _Err:_What ->
+ io:format(user, "Report ~p\n", [Report]),
+ exit({line, ?LINE, _Err, _What})
+ end,
+ %% ?D(R_Projs),
+
+ ok
+ after
+ [ok = ?MGR:stop(MgrPid) || {_, MgrPid} <- MgrNamez],
+ [ok = machi_flu0:stop(FLUPid) || {_, FLUPid} <- Namez],
+ ok = machi_partition_simulator:stop()
+ end.
+
+private_projections_are_stable(Namez, PollFunc) ->
+ Private1 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ {_Name, FLU} <- Namez],
+ PollFunc(5, 1, 10),
+ Private2 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ {_Name, FLU} <- Namez],
+ true = (Private1 == Private2).
+
+all_hosed_lists_are_identical(Namez, Partition0) ->
+ Partition = lists:usort(Partition0),
+ Ps = [machi_flu0:proj_read_latest(FLU, private) || {_Name, FLU} <- Namez],
+ UniqueAllHoseds = lists:usort([machi_chain_manager1:get_all_hosed(P) ||
+ {ok, P} <- Ps]),
+ Members = [M || {M, _Pid} <- Namez],
+ Islands = machi_partition_simulator:partitions2num_islands(
+ Members, Partition),
+ %% io:format(user, "all_hosed_lists_are_identical:\n", []),
+ %% io:format(user, " Uniques = ~p Islands ~p\n Partition ~p\n",
+ %% [Uniques, Islands, Partition]),
+ case length(UniqueAllHoseds) of
+ 1 ->
+ true;
+ %% TODO: With the addition of the digraph stuff below, the clause
+ %% below probably isn't necessary anymore, since the
+ %% digraph calculation should catch complete partition islands?
+ _ when Islands == 'many' ->
+ %% There are at least two partitions, so yes, it's quite
+ %% possible that the all_hosed lists may differ.
+ %% TODO Fix this up to be smarter about fully-isolated
+ %% islands of partition.
+ true;
+ _ ->
+ DG = digraph:new(),
+ Connection = machi_partition_simulator:partition2connection(
+ Members, Partition),
+ [digraph:add_vertex(DG, X) || X <- Members],
+ [digraph:add_edge(DG, X, Y) || {X,Y} <- Connection],
+ Any =
+ lists:any(
+ fun(X) ->
+ NotX = Members -- [X],
+ lists:any(
+ fun(Y) ->
+ %% There must be a shortest path of length
+ %% two in both directions, otherwise
+ %% the read projection call will fail.
+ %% And it's that failure that we're
+ %% interested in here.
+ XtoY = digraph:get_short_path(DG, X, Y),
+ YtoX = digraph:get_short_path(DG, Y, X),
+ (XtoY == false orelse
+ length(XtoY) > 2)
+ orelse
+ (YtoX == false orelse
+ length(YtoX) > 2)
+ end, NotX)
+ end, Members),
+ digraph:delete(DG),
+ if Any == true ->
+ %% There's a missing path of length 2 between some
+ %% two FLUs, so yes, there's going to be
+ %% non-identical all_hosed lists.
+ true;
+ true ->
+ false % There's no excuse, buddy
+ end
+ end.
+
+-endif. % not PULSE
+-endif. % TEST
diff --git a/test/machi_partition_simulator.erl b/test/machi_partition_simulator.erl
new file mode 100644
index 0000000..7ef70a3
--- /dev/null
+++ b/test/machi_partition_simulator.erl
@@ -0,0 +1,239 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_partition_simulator).
+
+-behaviour(gen_server).
+
+-ifdef(TEST).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-endif.
+-ifdef(PULSE).
+-compile({parse_transform, pulse_instrument}).
+-endif.
+
+-export([start_link/3, stop/0,
+ get/1, reset_thresholds/2,
+ no_partitions/0, always_last_partitions/0, always_these_partitions/1]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([islands2partitions/1,
+ partition2connection/2,
+ connection2partition/2,
+ partitions2num_islands/2,
+ partition_list_is_symmetric_p/2]).
+
+-define(TAB, ?MODULE).
+
+-record(state, {
+ seed,
+ old_partitions,
+ old_threshold,
+ no_partition_threshold,
+ method=oneway_partitions :: 'island' | 'oneway_partitions'
+ }).
+
+start_link(Seed, OldThreshold, NoPartitionThreshold) ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE,
+ {Seed, OldThreshold, NoPartitionThreshold}, []).
+
+stop() ->
+ gen_server:call(?MODULE, {stop}, infinity).
+
+get(Nodes) ->
+ gen_server:call(?MODULE, {get, Nodes}, infinity).
+
+reset_thresholds(OldThreshold, NoPartitionThreshold) ->
+ gen_server:call(?MODULE, {reset_thresholds, OldThreshold, NoPartitionThreshold}, infinity).
+
+no_partitions() ->
+ reset_thresholds(-999, 999).
+
+always_last_partitions() ->
+ reset_thresholds(999, 0).
+
+always_these_partitions(Parts) ->
+ reset_thresholds(999, 0),
+ gen_server:call(?MODULE, {always_these_partitions, Parts}, infinity).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init({Seed, OldThreshold, NoPartitionThreshold}) ->
+ {ok, #state{seed=Seed,
+ old_partitions={[],[[]]},
+ old_threshold=OldThreshold,
+ no_partition_threshold=NoPartitionThreshold}}.
+
+handle_call({get, Nodes}, _From, S) ->
+ {Seed2, Partitions} =
+ calc_network_partitions(S#state.method,
+ Nodes,
+ S#state.seed,
+ S#state.old_partitions,
+ S#state.old_threshold,
+ S#state.no_partition_threshold),
+ {reply, Partitions, S#state{seed=Seed2,
+ old_partitions=Partitions}};
+handle_call({reset_thresholds, OldThreshold, NoPartitionThreshold}, _From, S) ->
+ {reply, ok, S#state{old_threshold=OldThreshold,
+ no_partition_threshold=NoPartitionThreshold}};
+handle_call({always_these_partitions, Parts}, _From, S) ->
+ {reply, ok, S#state{old_partitions={Parts,[na_reset_by_always]}}};
+handle_call({stop}, _From, S) ->
+ {stop, normal, ok, S}.
+
+handle_cast(_Cast, S) ->
+ {noreply, S}.
+
+handle_info(_Info, S) ->
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+calc_network_partitions(Method, Nodes, Seed1, OldPartition,
+ OldThreshold, NoPartitionThreshold) ->
+ {Cutoff2, Seed2} = random:uniform_s(100, Seed1),
+ if Cutoff2 < OldThreshold ->
+ {Seed2, OldPartition};
+ true ->
+ {Cutoff3, Seed3} = random:uniform_s(100, Seed1),
+ if Cutoff3 < NoPartitionThreshold ->
+ {Seed3, {[], [Nodes]}};
+ true ->
+ make_network_partition_locations(Method, Nodes, Seed3)
+ end
+ end.
+
+make_network_partition_locations(island=_Method, Nodes, Seed1) ->
+ Num = length(Nodes),
+ {Seed2, WeightsNodes} = lists:foldl(
+ fun(Node, {Seeda, Acc}) ->
+ {Cutoff0, Seedb} =
+ random:uniform_s(100, Seeda),
+ Cutoff = erlang:max(
+ 2, if Cutoff0 rem 4 == 0 ->
+ 0;
+ true ->
+ Cutoff0
+ end),
+ {Seedb, [{Cutoff, Node}|Acc]}
+ end, {Seed1, []}, Nodes),
+ IslandSep = 100 div Num,
+ Islands = [
+ lists:sort([Nd || {Weight, Nd} <- WeightsNodes,
+ (Max - IslandSep) =< Weight, Weight < Max])
+ || Max <- lists:seq(IslandSep + 1, 105, IslandSep)],
+ {Seed2, {lists:usort(islands2partitions(Islands)), lists:sort(Islands)}};
+make_network_partition_locations(oneway_partitions=_Method, Nodes, Seed1) ->
+ Pairs = make_all_pairs(Nodes),
+ Num = length(Pairs),
+ {Seed2, Weights} = lists:foldl(
+ fun(_, {Seeda, Acc}) ->
+ {Cutoff, Seedb} = random:uniform_s(100, Seeda),
+ {Seedb, [Cutoff|Acc]}
+ end, {Seed1, []}, lists:seq(1, Num)),
+ {Cutoff3, Seed3} = random:uniform_s(100, Seed2),
+ {Seed3, {[X || {Weight, X} <- lists:zip(Weights, Pairs),
+ Weight < Cutoff3], [islands_not_supported]}}.
+
+make_all_pairs(L) ->
+ lists:flatten(make_all_pairs2(lists:usort(L))).
+
+make_all_pairs2([]) ->
+ [];
+make_all_pairs2([_]) ->
+ [];
+make_all_pairs2([H1|T]) ->
+ [[{H1, X}, {X, H1}] || X <- T] ++ make_all_pairs(T).
+
+islands2partitions([]) ->
+ [];
+islands2partitions([Island|Rest]) ->
+ [{X,Y} || X <- Island,
+ Y <- lists:append(Rest), X /= Y]
+ ++
+ [{Y,X} || X <- Island,
+ Y <- lists:append(Rest), X /= Y]
+ ++
+ islands2partitions(Rest).
+
+partition2connection(Members0, Partition0) ->
+ p2c_invert(lists:usort(Members0), lists:usort(Partition0)).
+
+connection2partition(Members0, Partition0) ->
+ p2c_invert(lists:usort(Members0), lists:usort(Partition0)).
+
+p2c_invert(Members, Partition_list_Or_Connection_list) ->
+ All = [{X,Y} || X <- Members, Y <- Members, X /= Y],
+ All -- Partition_list_Or_Connection_list.
+
+partitions2num_islands(Members0, Partition0) ->
+ %% Ignore duplicates in either arg, if any.
+ Members = lists:usort(Members0),
+ Partition = lists:usort(Partition0),
+
+ Connections = partition2connection(Members, Partition),
+ Cs = [lists:member({X,Y}, Connections)
+ orelse
+ lists:member({Y,X}, Connections) || X <- Members, Y <- Members,
+ X /= Y],
+ case lists:usort(Cs) of
+ [true] -> 1;
+ [false, true] -> many % TODO too lazy to finish
+ end.
+
+partition_list_is_symmetric_p(Members0, Partition0) ->
+ %% %% Ignore duplicates in either arg, if any.
+ Members = lists:usort(Members0),
+ NumMembers = length(Members),
+ Partition = lists:usort(Partition0),
+
+ NewDict = lists:foldl(
+ fun({A,B}, Dict) ->
+ Key = if A > B -> {A,B};
+ true -> {B,A}
+ end,
+ orddict:update_counter(Key, 1, Dict)
+ end, orddict:new(), Partition),
+ AllOddP = orddict:fold(
+ fun(_Key, Count, true) when Count rem 2 == 0 ->
+ true;
+ (_, _, _) ->
+ false
+ end, true, NewDict),
+ if not AllOddP ->
+ false;
+ true ->
+ TwosCount = [Key || {Key, Count} <- orddict:to_list(NewDict),
+ Count == 2],
+ length(TwosCount) >= (NumMembers - 1)
+ end.
+
+-endif. % TEST
From a79f385fa718a1cf553bf591610e390176455edf Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Mon, 6 Apr 2015 15:49:47 +0900
Subject: [PATCH 02/22] Fix type problem for return of get_latest_epoch
---
src/machi_flu1_client.erl | 4 +--
src/machi_projection_store.erl | 49 ++++++++++++++++++++--------------
test/machi_flu1_test.erl | 3 ++-
3 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl
index 6dd6c65..8850a0c 100644
--- a/src/machi_flu1_client.erl
+++ b/src/machi_flu1_client.erl
@@ -154,7 +154,7 @@ list_files(Host, TcpPort, EpochID) when is_integer(TcpPort) ->
%% @doc Get the latest epoch number from the FLU's projection store.
-spec get_latest_epoch(port(), projection_type()) ->
- {ok, -1|non_neg_integer()} | {error, term()}.
+ {ok, epoch_id()} | {error, term()}.
get_latest_epoch(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
get_latest_epoch2(Sock, ProjType).
@@ -163,7 +163,7 @@ get_latest_epoch(Sock, ProjType)
-spec get_latest_epoch(inet_host(), inet_port(),
projection_type()) ->
- {ok, -1|non_neg_integer()} | {error, term()}.
+ {ok, epoch_id()} | {error, term()}.
get_latest_epoch(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl
index c88a21b..d53ecc4 100644
--- a/src/machi_projection_store.erl
+++ b/src/machi_projection_store.erl
@@ -42,8 +42,8 @@
private_dir = "" :: string(),
wedged = true :: boolean(),
wedge_notify_pid :: pid() | atom(),
- max_public_epoch = -1 :: -1 | non_neg_integer(),
- max_private_epoch = -1 :: -1 | non_neg_integer()
+ max_public_epoch = {-1,<<>>} :: -1 | non_neg_integer(),
+ max_private_epoch = {-1,<<>>} :: -1 | non_neg_integer()
}).
start_link(RegName, DataDir, NotifyWedgeStateChanges) ->
@@ -124,16 +124,16 @@ init([DataDir, NotifyWedgeStateChanges]) ->
handle_call({{get_latest_epoch, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
- Epoch = if ProjType == public -> S#state.max_public_epoch;
- ProjType == private -> S#state.max_private_epoch
- end,
- {reply, {{ok, Epoch}, LC2}, S};
+ EpochT = if ProjType == public -> S#state.max_public_epoch;
+ ProjType == private -> S#state.max_private_epoch
+ end,
+ {reply, {{ok, EpochT}, LC2}, S};
handle_call({{read_latest_projection, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
- Epoch = if ProjType == public -> S#state.max_public_epoch;
- ProjType == private -> S#state.max_private_epoch
+ {EpochNum, _CSum} = if ProjType == public -> S#state.max_public_epoch;
+ ProjType == private -> S#state.max_private_epoch
end,
- {Reply, NewS} = do_proj_read(ProjType, Epoch, S),
+ {Reply, NewS} = do_proj_read(ProjType, EpochNum, S),
{reply, {Reply, LC2}, NewS};
handle_call({{read, ProjType, Epoch}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
@@ -176,17 +176,21 @@ code_change(_OldVsn, S, _Extra) ->
do_proj_read(_ProjType, Epoch, S) when Epoch < 0 ->
{{error, not_written}, S};
-do_proj_read(ProjType, Epoch, S) ->
- Dir = pick_path(ProjType, S),
+do_proj_read(ProjType, Epoch, S_or_Dir) ->
+ Dir = if is_record(S_or_Dir, state) ->
+ pick_path(ProjType, S_or_Dir);
+ is_list(S_or_Dir) ->
+ S_or_Dir
+ end,
Path = filename:join(Dir, epoch2name(Epoch)),
case file:read_file(Path) of
{ok, Bin} ->
%% TODO and if Bin is corrupt? (even if binary_to_term() succeeds)
- {{ok, binary_to_term(Bin)}, S};
+ {{ok, binary_to_term(Bin)}, S_or_Dir};
{error, enoent} ->
- {{error, not_written}, S};
+ {{error, not_written}, S_or_Dir};
{error, Else} ->
- {{error, Else}, S}
+ {{error, Else}, S_or_Dir}
end.
do_proj_write(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
@@ -201,12 +205,15 @@ do_proj_write(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
ok = file:write(FH, term_to_binary(Proj)),
ok = file:sync(FH),
ok = file:close(FH),
- NewS = if ProjType == public, Epoch > S#state.max_public_epoch ->
+ EpochT = {Epoch, Proj},
+ NewS = if ProjType == public,
+ Epoch > element(1, S#state.max_public_epoch) ->
io:format(user, "TODO: tell ~p we are wedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
- S#state{max_public_epoch=Epoch, wedged=true};
- ProjType == private, Epoch > S#state.max_private_epoch ->
+ S#state{max_public_epoch=EpochT, wedged=true};
+ ProjType == private,
+ Epoch > element(1, S#state.max_private_epoch) ->
io:format(user, "TODO: tell ~p we are unwedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
- S#state{max_private_epoch=Epoch, wedged=false};
+ S#state{max_private_epoch=EpochT, wedged=false};
true ->
S
end,
@@ -233,9 +240,11 @@ find_all(Dir) ->
find_max_epoch(Dir) ->
Fs = lists:sort(filelib:wildcard("*", Dir)),
if Fs == [] ->
- -1;
+ {-1, <<>>};
true ->
- name2epoch(lists:last(Fs))
+ EpochNum = name2epoch(lists:last(Fs)),
+ {{ok, Proj}, _} = do_proj_read(proj_type_ignored, EpochNum, Dir),
+ {EpochNum, Proj}
end.
%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/test/machi_flu1_test.erl b/test/machi_flu1_test.erl
index 136d6d0..c37188c 100644
--- a/test/machi_flu1_test.erl
+++ b/test/machi_flu1_test.erl
@@ -125,7 +125,7 @@ flu_projection_smoke_test() ->
FLU1 = setup_test_flu(projection_test_flu, TcpPort, DataDir),
try
[begin
- {ok, -1} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
+ {ok, {-1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{error, not_written} =
?FLU_C:read_latest_projection(Host, TcpPort, T),
{ok, []} = ?FLU_C:list_all(Host, TcpPort, T),
@@ -135,6 +135,7 @@ flu_projection_smoke_test() ->
ok = ?FLU_C:write_projection(Host, TcpPort, T, P1),
{error, written} = ?FLU_C:write_projection(Host, TcpPort, T, P1),
{ok, P1} = ?FLU_C:read_projection(Host, TcpPort, T, 1),
+ {ok, {1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{ok, P1} = ?FLU_C:read_latest_projection(Host, TcpPort, T),
{ok, [1]} = ?FLU_C:list_all(Host, TcpPort, T),
{ok, [P1]} = ?FLU_C:get_all(Host, TcpPort, T),
From 1d63b93fc00e0d62e9cf51528ce858f6ff059e27 Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Mon, 6 Apr 2015 16:49:17 +0900
Subject: [PATCH 03/22] Kill append, projection, and listen pids on
machi_flu1:stop()
---
src/machi_flu1.erl | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl
index 02f7925..3d71ce4 100644
--- a/src/machi_flu1.erl
+++ b/src/machi_flu1.erl
@@ -46,7 +46,7 @@ start_link([{FluName, TcpPort, DataDir}|Rest])
stop(Pid) ->
case erlang:is_process_alive(Pid) of
true ->
- Pid ! forever,
+ Pid ! killme,
ok;
false ->
error
@@ -86,7 +86,11 @@ main2(RegName, TcpPort, DataDir, Rest) ->
put(flu_append_pid, AppendPid),
put(flu_projection_pid, ProjectionPid),
put(flu_listen_pid, ListenPid),
- receive forever -> ok end.
+ receive killme -> ok end,
+ (catch exit(AppendPid, kill)),
+ (catch exit(ProjectionPid, kill)),
+ (catch exit(ListenPid, kill)),
+ ok.
start_listen_server(S) ->
spawn_link(fun() -> run_listen_server(S) end).
From 16e283fe5b6b9d6b1c4bc055cab417730960c3a5 Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Mon, 6 Apr 2015 18:43:52 +0900
Subject: [PATCH 04/22] API overhaul, add machi_proxy_flu1_client.erl, add
chain manager (tests commented out)
---
src/machi_flu1.erl | 8 +-
src/machi_flu1_client.erl | 136 +++++++-----
src/machi_projection_store.erl | 34 +--
src/machi_proxy_flu1_client.erl | 344 +++++++++++++++++++++++++++++
src/machi_util.erl | 18 +-
test/machi_chain_manager1_test.erl | 4 +-
test/machi_flu1_test.erl | 15 +-
7 files changed, 472 insertions(+), 87 deletions(-)
create mode 100644 src/machi_proxy_flu1_client.erl
diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl
index 3d71ce4..bd34ff5 100644
--- a/src/machi_flu1.erl
+++ b/src/machi_flu1.erl
@@ -580,12 +580,12 @@ handle_projection_command({read_projection, ProjType, Epoch},
handle_projection_command({write_projection, ProjType, Proj},
#state{proj_store=ProjStore}) ->
machi_projection_store:write(ProjStore, ProjType, Proj);
-handle_projection_command({get_all, ProjType},
+handle_projection_command({get_all_projections, ProjType},
#state{proj_store=ProjStore}) ->
- machi_projection_store:get_all(ProjStore, ProjType);
-handle_projection_command({list_all, ProjType},
+ machi_projection_store:get_all_projections(ProjStore, ProjType);
+handle_projection_command({list_all_projections, ProjType},
#state{proj_store=ProjStore}) ->
- machi_projection_store:list_all(ProjStore, ProjType);
+ machi_projection_store:list_all_projections(ProjStore, ProjType);
handle_projection_command(Else, _S) ->
{error, unknown_cmd, Else}.
diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl
index 8850a0c..570c9fa 100644
--- a/src/machi_flu1_client.erl
+++ b/src/machi_flu1_client.erl
@@ -35,8 +35,8 @@
read_latest_projection/2, read_latest_projection/3,
read_projection/3, read_projection/4,
write_projection/3, write_projection/4,
- get_all/2, get_all/3,
- list_all/2, list_all/3,
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3,
%% Common API
quit/1
@@ -54,7 +54,7 @@
-type chunk_pos() :: {file_offset(), chunk_size(), file_name_s()}.
-type chunk_size() :: non_neg_integer().
-type epoch_csum() :: binary().
--type epoch_num() :: non_neg_integer().
+-type epoch_num() :: -1 | non_neg_integer().
-type epoch_id() :: {epoch_num(), epoch_csum()}.
-type file_info() :: {file_size(), file_name_s()}.
-type file_name() :: binary() | list().
@@ -243,44 +243,44 @@ write_projection(Host, TcpPort, ProjType, Proj)
%% @doc Get all projections from the FLU's projection store.
--spec get_all(port(), projection_type()) ->
+-spec get_all_projections(port(), projection_type()) ->
{ok, [projection()]} | {error, term()}.
-get_all(Sock, ProjType)
+get_all_projections(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
- get_all2(Sock, ProjType).
+ get_all_projections2(Sock, ProjType).
%% @doc Get all projections from the FLU's projection store.
--spec get_all(inet_host(), inet_port(),
+-spec get_all_projections(inet_host(), inet_port(),
projection_type()) ->
{ok, [projection()]} | {error, term()}.
-get_all(Host, TcpPort, ProjType)
+get_all_projections(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
try
- get_all2(Sock, ProjType)
+ get_all_projections2(Sock, ProjType)
after
catch gen_tcp:close(Sock)
end.
%% @doc Get all epoch numbers from the FLU's projection store.
--spec list_all(port(), projection_type()) ->
+-spec list_all_projections(port(), projection_type()) ->
{ok, [non_neg_integer()]} | {error, term()}.
-list_all(Sock, ProjType)
+list_all_projections(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
- list_all2(Sock, ProjType).
+ list_all_projections2(Sock, ProjType).
%% @doc Get all epoch numbers from the FLU's projection store.
--spec list_all(inet_host(), inet_port(),
+-spec list_all_projections(inet_host(), inet_port(),
projection_type()) ->
{ok, [non_neg_integer()]} | {error, term()}.
-list_all(Host, TcpPort, ProjType)
+list_all_projections(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
try
- list_all2(Sock, ProjType)
+ list_all_projections2(Sock, ProjType)
after
catch gen_tcp:close(Sock)
end.
@@ -365,6 +365,7 @@ trunc_hack(Host, TcpPort, EpochID, File) when is_integer(TcpPort) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%
append_chunk2(Sock, EpochID, Prefix0, Chunk0) ->
+ erase(bad_sock),
try
%% TODO: add client-side checksum to the server's protocol
%% _ = crypto:hash(md5, Chunk),
@@ -391,47 +392,59 @@ append_chunk2(Sock, EpochID, Prefix0, Chunk0) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
read_chunk2(Sock, EpochID, File0, Offset, Size) ->
- {EpochNum, EpochCSum} = EpochID,
- EpochIDRaw = <>,
- File = machi_util:make_binary(File0),
- PrefixHex = machi_util:int_to_hexbin(Offset, 64),
- SizeHex = machi_util:int_to_hexbin(Size, 32),
- CmdLF = [$R, 32, EpochIDRaw, PrefixHex, SizeHex, File, 10],
- ok = gen_tcp:send(Sock, CmdLF),
- case gen_tcp:recv(Sock, 3) of
- {ok, <<"OK\n">>} ->
- {ok, _Chunk}=Res = gen_tcp:recv(Sock, Size),
- Res;
- {ok, Else} ->
- {ok, OldOpts} = inet:getopts(Sock, [packet]),
- ok = inet:setopts(Sock, [{packet, line}]),
- {ok, Else2} = gen_tcp:recv(Sock, 0),
- ok = inet:setopts(Sock, OldOpts),
- case Else of
- <<"ERA">> ->
- {error, todo_erasure_coded}; %% escript_cc_parse_ec_info(Sock, Line, Else2);
- <<"ERR">> ->
- case Else2 of
- <<"OR BAD-IO\n">> ->
- {error, no_such_file};
- <<"OR NOT-ERASURE\n">> ->
- {error, no_such_file};
- <<"OR BAD-ARG\n">> ->
- {error, bad_arg};
- <<"OR PARTIAL-READ\n">> ->
- {error, partial_read};
- _ ->
- {error, Else2}
- end;
- _ ->
- {error, {whaaa, <>}}
- end
+ erase(bad_sock),
+ try
+ {EpochNum, EpochCSum} = EpochID,
+ EpochIDRaw = <>,
+ File = machi_util:make_binary(File0),
+ PrefixHex = machi_util:int_to_hexbin(Offset, 64),
+ SizeHex = machi_util:int_to_hexbin(Size, 32),
+ CmdLF = [$R, 32, EpochIDRaw, PrefixHex, SizeHex, File, 10],
+ ok = gen_tcp:send(Sock, CmdLF),
+ case gen_tcp:recv(Sock, 3) of
+ {ok, <<"OK\n">>} ->
+ {ok, _Chunk}=Res = gen_tcp:recv(Sock, Size),
+ Res;
+ {ok, Else} ->
+ {ok, OldOpts} = inet:getopts(Sock, [packet]),
+ ok = inet:setopts(Sock, [{packet, line}]),
+ {ok, Else2} = gen_tcp:recv(Sock, 0),
+ ok = inet:setopts(Sock, OldOpts),
+ case Else of
+ <<"ERA">> ->
+ {error, todo_erasure_coded}; %% escript_cc_parse_ec_info(Sock, Line, Else2);
+ <<"ERR">> ->
+ case Else2 of
+ <<"OR BAD-IO\n">> ->
+ {error, no_such_file};
+ <<"OR NOT-ERASURE\n">> ->
+ {error, no_such_file};
+ <<"OR BAD-ARG\n">> ->
+ {error, bad_arg};
+ <<"OR PARTIAL-READ\n">> ->
+ {error, partial_read};
+ _ ->
+ {error, Else2}
+ end;
+ _ ->
+ {error, {whaaa_todo, <>}}
+ end
+ end
+ catch
+ throw:Error ->
+ put(bad_sock, Sock),
+ Error;
+ error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
+ {error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
list2(Sock, EpochID) ->
@@ -462,6 +475,7 @@ list3(Else, _Sock) ->
throw({server_protocol_error, Else}).
checksum_list2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -484,8 +498,10 @@ checksum_list2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
@@ -515,6 +531,7 @@ checksum_list_finish(Chunks) ->
Line /= <<>>].
write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -542,12 +559,15 @@ write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
delete_migration2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -566,12 +586,15 @@ delete_migration2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
trunc_hack2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -590,8 +613,10 @@ trunc_hack2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
@@ -611,15 +636,16 @@ write_projection2(Sock, ProjType, Proj) ->
ProjCmd = {write_projection, ProjType, Proj},
do_projection_common(Sock, ProjCmd).
-get_all2(Sock, ProjType) ->
- ProjCmd = {get_all, ProjType},
+get_all_projections2(Sock, ProjType) ->
+ ProjCmd = {get_all_projections, ProjType},
do_projection_common(Sock, ProjCmd).
-list_all2(Sock, ProjType) ->
- ProjCmd = {list_all, ProjType},
+list_all_projections2(Sock, ProjType) ->
+ ProjCmd = {list_all_projections, ProjType},
do_projection_common(Sock, ProjCmd).
do_projection_common(Sock, ProjCmd) ->
+ erase(bad_sock),
try
ProjCmdBin = term_to_binary(ProjCmd),
Len = iolist_size(ProjCmdBin),
@@ -641,7 +667,9 @@ do_projection_common(Sock, ProjCmd) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl
index d53ecc4..09555d2 100644
--- a/src/machi_projection_store.erl
+++ b/src/machi_projection_store.erl
@@ -29,21 +29,23 @@
read_latest_projection/2, read_latest_projection/3,
read/3, read/4,
write/3, write/4,
- get_all/2, get_all/3,
- list_all/2, list_all/3
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3
]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
+-define(NO_EPOCH, {-1,<<0:(20*8)/big>>}).
+
-record(state, {
public_dir = "" :: string(),
private_dir = "" :: string(),
wedged = true :: boolean(),
wedge_notify_pid :: pid() | atom(),
- max_public_epoch = {-1,<<>>} :: -1 | non_neg_integer(),
- max_private_epoch = {-1,<<>>} :: -1 | non_neg_integer()
+ max_public_epoch = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()},
+ max_private_epoch = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()}
}).
start_link(RegName, DataDir, NotifyWedgeStateChanges) ->
@@ -82,19 +84,19 @@ write(PidSpec, ProjType, Proj, Timeout)
Proj#projection_v1.epoch_number >= 0 ->
g_call(PidSpec, {write, ProjType, Proj}, Timeout).
-get_all(PidSpec, ProjType) ->
- get_all(PidSpec, ProjType, infinity).
+get_all_projections(PidSpec, ProjType) ->
+ get_all_projections(PidSpec, ProjType, infinity).
-get_all(PidSpec, ProjType, Timeout)
+get_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
- g_call(PidSpec, {get_all, ProjType}, Timeout).
+ g_call(PidSpec, {get_all_projections, ProjType}, Timeout).
-list_all(PidSpec, ProjType) ->
- list_all(PidSpec, ProjType, infinity).
+list_all_projections(PidSpec, ProjType) ->
+ list_all_projections(PidSpec, ProjType, infinity).
-list_all(PidSpec, ProjType, Timeout)
+list_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
- g_call(PidSpec, {list_all, ProjType}, Timeout).
+ g_call(PidSpec, {list_all_projections, ProjType}, Timeout).
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -143,7 +145,7 @@ handle_call({{write, ProjType, Proj}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
{Reply, NewS} = do_proj_write(ProjType, Proj, S),
{reply, {Reply, LC2}, NewS};
-handle_call({{get_all, ProjType}, LC1}, _From, S) ->
+handle_call({{get_all_projections, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
Dir = pick_path(ProjType, S),
Epochs = find_all(Dir),
@@ -152,7 +154,7 @@ handle_call({{get_all, ProjType}, LC1}, _From, S) ->
Proj
end || Epoch <- Epochs],
{reply, {{ok, All}, LC2}, S};
-handle_call({{list_all, ProjType}, LC1}, _From, S) ->
+handle_call({{list_all_projections, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
Dir = pick_path(ProjType, S),
{reply, {{ok, find_all(Dir)}, LC2}, S};
@@ -205,7 +207,7 @@ do_proj_write(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
ok = file:write(FH, term_to_binary(Proj)),
ok = file:sync(FH),
ok = file:close(FH),
- EpochT = {Epoch, Proj},
+ EpochT = {Epoch, Proj#projection_v1.epoch_csum},
NewS = if ProjType == public,
Epoch > element(1, S#state.max_public_epoch) ->
io:format(user, "TODO: tell ~p we are wedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
@@ -240,7 +242,7 @@ find_all(Dir) ->
find_max_epoch(Dir) ->
Fs = lists:sort(filelib:wildcard("*", Dir)),
if Fs == [] ->
- {-1, <<>>};
+ ?NO_EPOCH;
true ->
EpochNum = name2epoch(lists:last(Fs)),
{{ok, Proj}, _} = do_proj_read(proj_type_ignored, EpochNum, Dir),
diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl
new file mode 100644
index 0000000..5222fb9
--- /dev/null
+++ b/src/machi_proxy_flu1_client.erl
@@ -0,0 +1,344 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(machi_proxy_flu1_client).
+
+-behaviour(gen_server).
+
+-include("machi.hrl").
+-include("machi_projection.hrl").
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif. % TEST.
+
+-export([start_link/1]).
+%% FLU1 API
+-export([
+ %% File API
+ append_chunk/4, append_chunk/5,
+ read_chunk/5, read_chunk/6,
+ checksum_list/3, checksum_list/4,
+ list_files/2, list_files/3,
+
+ %% %% Projection API
+ get_latest_epoch/2, get_latest_epoch/3,
+ read_latest_projection/2, read_latest_projection/3,
+ read_projection/3, read_projection/4,
+ write_projection/3, write_projection/4,
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3,
+
+ %% Common API
+ quit/1
+ ]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-define(FLU_C, machi_flu1_client).
+
+-record(state, {
+ i :: #p_srvr{},
+ sock :: 'undefined' | port()
+ }).
+
+start_link(#p_srvr{}=I) ->
+ gen_server:start_link(?MODULE, [I], []).
+
+append_chunk(PidSpec, EpochID, Prefix, Chunk) ->
+ append_chunk(PidSpec, EpochID, Prefix, Chunk, infinity).
+
+append_chunk(PidSpec, EpochID, Prefix, Chunk, Timeout) ->
+ gen_server:call(PidSpec, {req, {append_chunk, EpochID, Prefix, Chunk}},
+ Timeout).
+
+read_chunk(PidSpec, EpochID, File, Offset, Size) ->
+ read_chunk(PidSpec, EpochID, File, Offset, Size, infinity).
+
+read_chunk(PidSpec, EpochID, File, Offset, Size, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_chunk, EpochID, File, Offset, Size}},
+ Timeout).
+
+checksum_list(PidSpec, EpochID, File) ->
+ checksum_list(PidSpec, EpochID, File, infinity).
+
+checksum_list(PidSpec, EpochID, File, Timeout) ->
+ gen_server:call(PidSpec, {req, {checksum_list, EpochID, File}},
+ Timeout).
+
+list_files(PidSpec, EpochID) ->
+ list_files(PidSpec, EpochID, infinity).
+
+list_files(PidSpec, EpochID, Timeout) ->
+ gen_server:call(PidSpec, {req, {list_files, EpochID}},
+ Timeout).
+
+get_latest_epoch(PidSpec, ProjType) ->
+ get_latest_epoch(PidSpec, ProjType, infinity).
+
+get_latest_epoch(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {get_latest_epoch, ProjType}},
+ Timeout).
+
+read_latest_projection(PidSpec, ProjType) ->
+ read_latest_projection(PidSpec, ProjType, infinity).
+
+read_latest_projection(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_latest_projection, ProjType}},
+ Timeout).
+
+read_projection(PidSpec, ProjType, Epoch) ->
+ read_projection(PidSpec, ProjType, Epoch, infinity).
+
+read_projection(PidSpec, ProjType, Epoch, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_projection, ProjType, Epoch}},
+ Timeout).
+
+write_projection(PidSpec, ProjType, Proj) ->
+ write_projection(PidSpec, ProjType, Proj, infinity).
+
+write_projection(PidSpec, ProjType, Proj, Timeout) ->
+ gen_server:call(PidSpec, {req, {write_projection, ProjType, Proj}},
+ Timeout).
+
+get_all_projections(PidSpec, ProjType) ->
+ get_all_projections(PidSpec, ProjType, infinity).
+
+get_all_projections(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {get_all_projections, ProjType}},
+ Timeout).
+
+list_all_projections(PidSpec, ProjType) ->
+ list_all_projections(PidSpec, ProjType, infinity).
+
+list_all_projections(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {list_all_projections, ProjType}},
+ Timeout).
+
+quit(PidSpec) ->
+ gen_server:call(PidSpec, quit, infinity).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init([I]) ->
+ S0 = #state{i=I},
+ S1 = try_connect(S0),
+ {ok, S1}.
+
+handle_call({req, Req}, _From, S) ->
+ {Reply, NewS} = do_req(Req, S),
+ {reply, Reply, NewS};
+handle_call(quit, _From, S) ->
+ {stop, normal, ok, disconnect(S)};
+handle_call(_Request, _From, S) ->
+ Reply = ok,
+ {reply, Reply, S}.
+
+handle_cast(_Msg, S) ->
+ {noreply, S}.
+
+handle_info(_Info, S) ->
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+do_req(Req, S) ->
+ S2 = try_connect(S),
+ Fun = make_req_fun(Req, S2),
+ case connected_p(S2) of
+ true ->
+ case Fun() of
+ T when element(1, T) == ok ->
+ {T, S2};
+ Else ->
+ case get(bad_sock) of
+ Bad when Bad == S2#state.sock ->
+ {Else, disconnect(S2)};
+ _ ->
+ {Else, S2}
+ end
+ end;
+ false ->
+ {{error, not_connected}, S2}
+ end.
+
+make_req_fun({append_chunk, EpochID, Prefix, Chunk}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:append_chunk(Sock, EpochID, Prefix, Chunk) end;
+make_req_fun({read_chunk, EpochID, File, Offset, Size}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_chunk(Sock, EpochID, File, Offset, Size) end;
+make_req_fun({checksum_list, EpochID, File}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:checksum_list(Sock, EpochID, File) end;
+make_req_fun({list_files, EpochID}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:list_files(Sock, EpochID) end;
+make_req_fun({get_latest_epoch, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:get_latest_epoch(Sock, ProjType) end;
+make_req_fun({read_latest_projection, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_latest_projection(Sock, ProjType) end;
+make_req_fun({read_projection, ProjType, Epoch}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_projection(Sock, ProjType, Epoch) end;
+make_req_fun({write_projection, ProjType, Proj}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:write_projection(Sock, ProjType, Proj) end;
+make_req_fun({get_all_projections, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:get_all_projections(Sock, ProjType) end;
+make_req_fun({list_all_projections, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:list_all_projections(Sock, ProjType) end.
+
+connected_p(#state{sock=SockMaybe,
+ i=#p_srvr{proto=ipv4}=_I}=_S) ->
+ is_port(SockMaybe);
+connected_p(#state{i=#p_srvr{proto=disterl,
+ name=_NodeName}=_I}=_S) ->
+ true.
+ %% case net_adm:ping(NodeName) of
+ %% ping ->
+ %% true;
+ %% _ ->
+ %% false
+ %% end.
+
+try_connect(#state{sock=undefined,
+ i=#p_srvr{proto=ipv4, address=Host, port=TcpPort}=_I}=S) ->
+ try
+ Sock = machi_util:connect(Host, TcpPort),
+ S#state{sock=Sock}
+ catch
+ _:_ ->
+ S
+ end;
+try_connect(S) ->
+ %% If we're connection-based, we're already connected.
+ %% If we're not connection-based, then there's nothing to do.
+ S.
+
+disconnect(#state{sock=Sock,
+ i=#p_srvr{proto=ipv4}=_I}=S) ->
+ (catch gen_tcp:close(Sock)),
+ S#state{sock=undefined};
+disconnect(S) ->
+ S.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-ifdef(TEST).
+
+dummy_server(Parent, TcpPort) ->
+ spawn_link(fun() ->
+ {ok, LSock} = gen_tcp:listen(TcpPort,
+ [{reuseaddr,true},
+ {packet, line},
+ {mode, binary},
+ {active, false}]),
+ dummy_ack(Parent),
+ {ok, Sock} = gen_tcp:accept(LSock),
+ ok = inet:setopts(Sock, [{packet, line}]),
+ {ok, _Line} = gen_tcp:recv(Sock, 0),
+ ok = gen_tcp:send(Sock, "ERROR BADARG\n"),
+ (catch gen_tcp:close(Sock)),
+ unlink(Parent),
+ exit(normal)
+ end).
+
+dummy_ack(Parent) ->
+ Parent ! go.
+
+dummy_wait_for_ack() ->
+ receive go -> ok end.
+
+smoke_test() ->
+ TcpPort = 57123,
+ Me = self(),
+ _ServerPid = dummy_server(Me, TcpPort),
+ dummy_wait_for_ack(),
+
+ I = #p_srvr{name=smoke, proto=ipv4, address="localhost", port=TcpPort},
+ S0 = #state{i=I},
+ false = connected_p(S0),
+ S1 = try_connect(S0),
+ true = connected_p(S1),
+ gen_tcp:send(S1#state.sock, "yo dawg\n"),
+ {ok, _Answer} = gen_tcp:recv(S1#state.sock, 0),
+ _S2 = disconnect(S1),
+
+ ok.
+
+api_smoke_test() ->
+ RegName = api_smoke_flu,
+ Host = "localhost",
+ TcpPort = 57124,
+ DataDir = "./data.api_smoke_flu",
+ FLU1 = machi_flu1_test:setup_test_flu(RegName, TcpPort, DataDir),
+ erase(flu_pid),
+
+ try
+ I = #p_srvr{name=RegName, proto=ipv4, address=Host, port=TcpPort},
+ {ok, Prox1} = start_link(I),
+ try
+ FakeEpoch = {-1, <<0:(20*8)/big>>},
+ [{ok, {_,_,_}} = append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity) || _ <- lists:seq(1,5)],
+ %% Stop the FLU, what happens?
+ machi_flu1:stop(FLU1),
+ {error,_} = append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity),
+ {error,not_connected} = append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity),
+ %% Start the FLU again, we should be able to do stuff immediately
+ FLU1b = machi_flu1_test:setup_test_flu(RegName, TcpPort, DataDir,
+ [save_data_dir]),
+ put(flu_pid, FLU1b),
+ MyChunk = <<"my chunk data">>,
+ {ok, {MyOff,MySize,MyFile}} =
+ append_chunk(Prox1, FakeEpoch, <<"prefix">>, MyChunk,
+ infinity),
+ {ok, MyChunk} = read_chunk(Prox1, FakeEpoch, MyFile, MyOff, MySize),
+
+ %% Alright, now for the rest of the API, whee
+ BadFile = <<"no-such-file">>,
+ {error, no_such_file} = checksum_list(Prox1, FakeEpoch, BadFile),
+ {ok, [_]} = list_files(Prox1, FakeEpoch),
+ {ok, FakeEpoch} = get_latest_epoch(Prox1, public),
+ {error, not_written} = read_latest_projection(Prox1, public),
+ {error, not_written} = read_projection(Prox1, public, 44),
+ P1 = machi_projection:new(1, a, [a], [], [a], [], []),
+ ok = write_projection(Prox1, public, P1),
+ {ok, P1} = read_projection(Prox1, public, 1),
+ {ok, [P1]} = get_all_projections(Prox1, public),
+ {ok, [1]} = list_all_projections(Prox1, public),
+ ok
+ after
+ _ = (catch quit(Prox1))
+ end
+ after
+ (catch machi_flu1:stop(FLU1)),
+ (catch machi_flu1:stop(get(flu_pid)))
+ end.
+
+-endif. % TEST
diff --git a/src/machi_util.erl b/src/machi_util.erl
index 1331d11..af0ac29 100644
--- a/src/machi_util.erl
+++ b/src/machi_util.erl
@@ -31,7 +31,7 @@
read_max_filenum/2, increment_max_filenum/2,
info_msg/2, verb/1, verb/2,
%% TCP protocol helpers
- connect/2
+ connect/2, connect/3
]).
-compile(export_all).
@@ -168,13 +168,19 @@ info_msg(Fmt, Args) ->
-spec connect(inet:ip_address() | inet:hostname(), inet:port_number()) ->
port().
connect(Host, Port) ->
- escript_connect(Host, Port).
+ escript_connect(Host, Port, 4500).
-escript_connect(Host, PortStr) when is_list(PortStr) ->
+-spec connect(inet:ip_address() | inet:hostname(), inet:port_number(),
+ timeout()) ->
+ port().
+connect(Host, Port, Timeout) ->
+ escript_connect(Host, Port, Timeout).
+
+escript_connect(Host, PortStr, Timeout) when is_list(PortStr) ->
Port = list_to_integer(PortStr),
- escript_connect(Host, Port);
-escript_connect(Host, Port) when is_integer(Port) ->
+ escript_connect(Host, Port, Timeout);
+escript_connect(Host, Port, Timeout) when is_integer(Port) ->
{ok, Sock} = gen_tcp:connect(Host, Port, [{active,false}, {mode,binary},
- {packet, raw}]),
+ {packet, raw}], Timeout),
Sock.
diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl
index def16c7..5f4367a 100644
--- a/test/machi_chain_manager1_test.erl
+++ b/test/machi_chain_manager1_test.erl
@@ -133,13 +133,13 @@ chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
-ifndef(PULSE).
-smoke0_test() ->
+smoke0_testXXX() ->
{ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50),
Host = "localhost",
TcpPort = 6623,
{ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]),
{ok, M0} = ?MGR:start_link(a, [a,b,c], a),
- SockA = machi_util:connect(Host, TcpPort),
+ _SockA = machi_util:connect(Host, TcpPort),
try
pong = ?MGR:ping(M0)
after
diff --git a/test/machi_flu1_test.erl b/test/machi_flu1_test.erl
index c37188c..fbfc0ae 100644
--- a/test/machi_flu1_test.erl
+++ b/test/machi_flu1_test.erl
@@ -33,7 +33,12 @@ setup_test_flu(RegName, TcpPort, DataDir) ->
setup_test_flu(RegName, TcpPort, DataDir, []).
setup_test_flu(RegName, TcpPort, DataDir, DbgProps) ->
- clean_up_data_dir(DataDir),
+ case proplists:get_value(save_data_dir, DbgProps) of
+ true ->
+ ok;
+ _ ->
+ clean_up_data_dir(DataDir)
+ end,
{ok, FLU1} = ?FLU:start_link([{RegName, TcpPort, DataDir},
{dbg, DbgProps}]),
@@ -128,8 +133,8 @@ flu_projection_smoke_test() ->
{ok, {-1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{error, not_written} =
?FLU_C:read_latest_projection(Host, TcpPort, T),
- {ok, []} = ?FLU_C:list_all(Host, TcpPort, T),
- {ok, []} = ?FLU_C:get_all(Host, TcpPort, T),
+ {ok, []} = ?FLU_C:list_all_projections(Host, TcpPort, T),
+ {ok, []} = ?FLU_C:get_all_projections(Host, TcpPort, T),
P1 = machi_projection:new(1, a, [a], [], [a], [], []),
ok = ?FLU_C:write_projection(Host, TcpPort, T, P1),
@@ -137,8 +142,8 @@ flu_projection_smoke_test() ->
{ok, P1} = ?FLU_C:read_projection(Host, TcpPort, T, 1),
{ok, {1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{ok, P1} = ?FLU_C:read_latest_projection(Host, TcpPort, T),
- {ok, [1]} = ?FLU_C:list_all(Host, TcpPort, T),
- {ok, [P1]} = ?FLU_C:get_all(Host, TcpPort, T),
+ {ok, [1]} = ?FLU_C:list_all_projections(Host, TcpPort, T),
+ {ok, [P1]} = ?FLU_C:get_all_projections(Host, TcpPort, T),
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort, T, 2)
end || T <- [public, private] ]
after
From 0e38eddaa99305af04846df865c6330344934e71 Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Mon, 6 Apr 2015 20:07:39 +0900
Subject: [PATCH 05/22] WIP: baby step, machi_chain_manager1_test:smoke0_test()
works
---
src/machi_chain_manager1.erl | 11 +++++++----
test/machi_chain_manager1_test.erl | 14 ++++++++++----
2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl
index 5f662f6..c554621 100644
--- a/src/machi_chain_manager1.erl
+++ b/src/machi_chain_manager1.erl
@@ -35,6 +35,9 @@
-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+-define(FLU_C, machi_flu1_client).
+-define(FLU_PC, machi_proxy_flu1_client).
+
%% Keep a history of our flowchart execution in the process dictionary.
-define(REACT(T), put(react, [T|get(react)])).
@@ -202,13 +205,13 @@ code_change(_OldVsn, S, _Extra) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
finish_init(BestProj, #ch_mgr{init_finished=false, myflu=MyFLU} = S) ->
- case machi_flu0:proj_read_latest(MyFLU, private) of
- error_unwritten ->
+ case ?FLU_PC:read_latest_projection(MyFLU, private) of
+ {error, not_written} ->
Epoch = BestProj#projection_v1.epoch_number,
- case machi_flu0:proj_write(MyFLU, Epoch, private, BestProj) of
+ case ?FLU_PC:write_projection(MyFLU, private, BestProj) of
ok ->
S#ch_mgr{init_finished=true, proj=BestProj};
- error_written ->
+ {error, not_written} ->
exit({yo_impossible, ?LINE});
Else ->
?D({retry,Else}),
diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl
index 5f4367a..22a07db 100644
--- a/test/machi_chain_manager1_test.erl
+++ b/test/machi_chain_manager1_test.erl
@@ -28,7 +28,8 @@
-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
--define(FLU_C, machi_flu1_client).
+-define(FLU_C, machi_flu1_client).
+-define(FLU_PC, machi_proxy_flu1_client).
-export([]).
@@ -133,18 +134,23 @@ chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
-ifndef(PULSE).
-smoke0_testXXX() ->
+smoke0_test() ->
{ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50),
Host = "localhost",
TcpPort = 6623,
{ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]),
- {ok, M0} = ?MGR:start_link(a, [a,b,c], a),
+ Pa = #p_srvr{name=a, proto=ipv4, address=Host, port=TcpPort},
+ %% Egadz, more racing on startup, yay. TODO fix.
+ timer:sleep(1),
+ {ok, FLUaP} = ?FLU_PC:start_link(Pa),
+ {ok, M0} = ?MGR:start_link(a, [a,b,c], FLUaP),
_SockA = machi_util:connect(Host, TcpPort),
try
pong = ?MGR:ping(M0)
after
ok = ?MGR:stop(M0),
- ok = machi_flu0:stop(FLUa),
+ ok = machi_flu1:stop(FLUa),
+ ok = ?FLU_PC:quit(FLUaP),
ok = machi_partition_simulator:stop()
end.
From ad872e23ca44610713c6abfbb5ce980bee728007 Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Wed, 8 Apr 2015 14:24:07 +0900
Subject: [PATCH 06/22] Add first basic round of EDoc documentation, 'make
edoc' target
---
Makefile | 8 +-
doc/overview.edoc | 170 ++++++++++++++++++
edoc/.gitignore | 1 +
edoc/edoc-info | 7 +
edoc/erlang.png | Bin 0 -> 2109 bytes
edoc/index.html | 17 ++
edoc/machi_admin_util.html | 60 +++++++
edoc/machi_app.html | 39 +++++
edoc/machi_chain_manager1.html | 155 +++++++++++++++++
edoc/machi_chash.html | 171 ++++++++++++++++++
edoc/machi_flu1.html | 63 +++++++
edoc/machi_flu1_client.html | 278 ++++++++++++++++++++++++++++++
edoc/machi_flu_sup.html | 41 +++++
edoc/machi_projection.html | 70 ++++++++
edoc/machi_projection_store.html | 163 ++++++++++++++++++
edoc/machi_proxy_flu1_client.html | 222 ++++++++++++++++++++++++
edoc/machi_sequencer.html | 23 +++
edoc/machi_sup.html | 39 +++++
edoc/machi_util.html | 150 ++++++++++++++++
edoc/modules-frame.html | 24 +++
edoc/overview-summary.html | 185 ++++++++++++++++++++
edoc/overview.edoc | 14 ++
edoc/packages-frame.html | 11 ++
edoc/stylesheet.css | 55 ++++++
rebar.config | 1 +
src/machi_admin_util.erl | 4 +-
src/machi_app.erl | 2 +
src/machi_chain_manager1.erl | 22 +++
src/machi_chash.erl | 10 +-
src/machi_flu1.erl | 31 +++-
src/machi_flu1_client.erl | 14 +-
src/machi_flu_sup.erl | 3 +
src/machi_projection.erl | 17 ++
src/machi_projection_store.erl | 52 ++++++
src/machi_proxy_flu1_client.erl | 66 +++++++
src/machi_sequencer.erl | 3 +
src/machi_sup.erl | 2 +
src/machi_util.erl | 65 +++++--
38 files changed, 2232 insertions(+), 26 deletions(-)
create mode 100644 doc/overview.edoc
create mode 100644 edoc/.gitignore
create mode 100644 edoc/edoc-info
create mode 100644 edoc/erlang.png
create mode 100644 edoc/index.html
create mode 100644 edoc/machi_admin_util.html
create mode 100644 edoc/machi_app.html
create mode 100644 edoc/machi_chain_manager1.html
create mode 100644 edoc/machi_chash.html
create mode 100644 edoc/machi_flu1.html
create mode 100644 edoc/machi_flu1_client.html
create mode 100644 edoc/machi_flu_sup.html
create mode 100644 edoc/machi_projection.html
create mode 100644 edoc/machi_projection_store.html
create mode 100644 edoc/machi_proxy_flu1_client.html
create mode 100644 edoc/machi_sequencer.html
create mode 100644 edoc/machi_sup.html
create mode 100644 edoc/machi_util.html
create mode 100644 edoc/modules-frame.html
create mode 100644 edoc/overview-summary.html
create mode 100644 edoc/overview.edoc
create mode 100644 edoc/packages-frame.html
create mode 100644 edoc/stylesheet.css
diff --git a/Makefile b/Makefile
index ba8df11..b91d653 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ ifeq ($(REBAR_BIN),)
REBAR_BIN = ./rebar
endif
-.PHONY: rel deps package pkgclean
+.PHONY: rel deps package pkgclean edoc
all: deps compile
@@ -21,6 +21,12 @@ test: deps compile eunit
eunit:
$(REBAR_BIN) -v skip_deps=true eunit
+edoc: edoc-clean
+ $(REBAR_BIN) skip_deps=true doc
+
+edoc-clean:
+ rm -f edoc/*.png edoc/*.html edoc/*.css edoc/edoc-info
+
pulse: compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
diff --git a/doc/overview.edoc b/doc/overview.edoc
new file mode 100644
index 0000000..6182f6b
--- /dev/null
+++ b/doc/overview.edoc
@@ -0,0 +1,170 @@
+
+@title Machi: a small village of replicated files
+
+@doc
+
+== About This EDoc Documentation ==
+
+This EDoc-style documentation will concern itself only with Erlang
+function APIs and function & data types. Higher-level design and
+commentary will remain outside of the Erlang EDoc system; please see
+the "Pointers to Other Machi Documentation" section below for more
+details.
+
+Readers should beware that this documentation may be out-of-sync with
+the source code. When in doubt, use the `make edoc' command to
+regenerate all HTML pages.
+
+It is the developer's responsibility to re-generate the documentation
+periodically and commit it to the Git repo.
+
+== Machi Code Overview ==
+
+=== Chain Manager ===
+
+The Chain Manager is responsible for managing the state of Machi's
+"Chain Replication" state. This role is roughly analogous to the
+"Riak Core" application inside of Riak, which takes care of
+coordinating replica placement and replica repair.
+
+For each primitive data server in the cluster, a Machi FLU, there is a
+Chain Manager process that manages its FLU's role within the Machi
+cluster's Chain Replication scheme. Each Chain Manager process
+executes locally and independently to manage the distributed state of
+a single Machi Chain Replication chain.
+
+
+
+
To contrast with Riak Core ... Riak Core's claimant process is
+ solely responsible for managing certain critical aspects of
+ Riak Core distributed state. Machi's Chain Manager process
+ performs similar tasks as Riak Core's claimant. However, Machi
+ has several active Chain Manager processes, one per FLU server,
+ instead of a single active process like Core's claimant. Each
+ Chain Manager process acts independently; each is constrained
+ so that it will reach consensus via independent computation
+ & action.
+
+ Full discussion of this distributed consensus is outside the
+ scope of this document; see the "Pointers to Other Machi
+ Documentation" section below for more information.
+
+
Machi differs from a Riak Core application because Machi's
+ replica placement policy is simply, "All Machi servers store
+ replicas of all Machi files".
+ Machi is intended to be a primitive building block for creating larger
+ cluster-of-clusters where files are
+ distributed/fragmented/sharded across a large pool of
+ independent Machi clusters.
+
+
See
+ [https://www.usenix.org/legacy/events/osdi04/tech/renesse.html]
+ for a copy of the paper, "Chain Replication for Supporting High
+ Throughput and Availability" by Robbert van Renesse and Fred
+ B. Schneider.
+
+
+
+=== FLU ===
+
+The FLU is the basic storage server for Machi.
+
+
+
The name FLU is taken from "flash storage unit" from the paper
+ "CORFU: A Shared Log Design for Flash Clusters" by
+ Balakrishnan, Malkhi, Prabhakaran, and Wobber. See
+ [https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/balakrishnan]
+
+
In CORFU, the sequencer step is a prerequisite step that is
+ performed by a separate component, the Sequencer.
+ In Machi, the `append_chunk()' protocol message has
+ an implicit "sequencer" operation applied by the "head" of the
+ Machi Chain Replication chain. If a client wishes to write
+ data that has already been assigned a sequencer position, then
+ the `write_chunk()' API function is used.
+
+
+
+For each FLU, there are three independent tasks that are implemented
+using three different Erlang processes:
+
+
+
A FLU server, implemented primarily by `machi_flu.erl'.
+
+
A projection store server, implemented primarily by
+ `machi_projection_store.erl'.
+
+
A chain state manager server, implemented primarily by
+ `machi_chain_manager1.erl'.
+
+
+
+From the perspective of failure detection, it is very convenient that
+all three FLU-related services (file server, sequencer server, and
+projection server) are accessed using the same single TCP port.
+
+=== Projection (data structure) ===
+
+The projection is a data structure that specifies the current state
+of the Machi cluster: all FLUs, which FLUS are considered
+up/running or down/crashed/stopped, which FLUs are actively
+participants in the Chain Replication protocol, and which FLUs are
+under "repair" (i.e., having their data resyncronized when
+newly-added to a cluster or when restarting after a crash).
+
+=== Projection Store (server) ===
+
+The projection store is a storage service that is implemented by an
+Erlang/OTP `gen_server' process that is associated with each
+FLU. Conceptually, the projection store is an array of
+write-once registers. For each projection store register, the
+key is a 2-tuple of an epoch number (`non_neg_integer()' type)
+and a projection type (`public' or `private' type); the value is
+a projection data structure (`projection_v1()' type).
+
+=== Client and Proxy Client ===
+
+Machi is intentionally avoiding using distributed Erlang for Machi's
+communication. This design decision makes Erlang-side code more
+difficult & complex but allows us the freedom of implementing
+parts of Machi in other languages without major
+protocol&API&glue code changes later in the product's
+lifetime.
+
+There are two layers of interface for Machi clients.
+
+
+
The `machi_flu1_client' module implements an API that uses a
+ TCP socket directly.
+
+
The `machi_proxy_flu1_client' module implements an API that
+ uses a local, long-lived `gen_server' process as a proxy for
+ the remote, perhaps disconnected-or-crashed Machi FLU server.
+
+
+
+The types for both modules ought to be the same. However, due to
+rapid code churn, some differences might exist. Any major difference
+is (almost by definition) a bug: please open a GitHub issue to request
+a correction.
+
+== TODO notes ==
+
+Any use of the string "TODO" in upper/lower/mixed case, anywhere in
+the code, is a reminder signal of unfinished work.
+
+== Pointers to Other Machi Documentation ==
+
+
+
If you are viewing this document locally, please look in the
+ `../doc/' directory,
+
+
If you are viewing this document via the Web, please find the
+ documentation via this link:
+ [http://github.com/basho/machi/tree/master/doc/]
+ Please be aware that this link points to the `master' branch
+ of the Machi source repository and therefore may be
+ out-of-sync with non-`master' branch code.
+
The Machi chain manager, Guardian of all things related to
+Chain Replication state, status, and data replica safety.
+
+
The Chain Manager is responsible for managing the state of Machi's
+"Chain Replication" state. This role is roughly analogous to the
+"Riak Core" application inside of Riak, which takes care of
+coordinating replica placement and replica repair.
+
+
For each primitive data server in the cluster, a Machi FLU, there
+is a Chain Manager process that manages its FLU's role within the
+Machi cluster's Chain Replication scheme. Each Chain Manager
+process executes locally and independently to manage the
+distributed state of a single Machi Chain Replication chain.
+
+ Machi's Chain Manager process performs similar tasks as Riak Core's
+ claimant. However, Machi has several active Chain Manager
+ processes, one per FLU server, instead of a single active process
+ like Core's claimant. Each Chain Manager process acts
+ independently; each is constrained so that it will reach consensus
+ via independent computation & action.
+
A float map subdivides the unit interval, starting at 0.0, to
+ partitions that are assigned to various owners. The sum of all
+ floats must be exactly 1.0 (or close enough for floating point
+ purposes).
A nextfloat_list
+ differs from a float_map in two respects: 1) nextfloat_list contains
+ tuples with the brick name in 2nd position, 2) the float() at each
+ position I_n > I_m, for all n, m such that n > m.
+ For example, a nextfloat_list of the float_map example above,
+ [{0.25, {br1, nd1}}, {0.75, {br2, nd1}}, {1.0, {br3, nd1}].
A owner_weight_list is a definition of brick assignments over the
+ unit interval [0.0, 1.0]. The sum of all floats must be 1.0. For
+ example, [{{br1,nd1}, 0.25}, {{br2,nd1}, 0.5}, {{br3,nd1}, 0.25}].
For this library, a weight is an integer which specifies the
+ capacity of a "owner" relative to other owners. For example, if
+ owner A with a weight of 10, and if owner B has a weight of 20,
+ then B will be assigned twice as much of the unit interval as A.
Make a pretty/human-friendly version of a float map (based
+ upon a float map created from OldWeights and NewWeights) that
+ describes integer ranges between 1 and Scale.
Make a pretty/human-friendly version of a float map (based
+ upon a float map created from OldWeights and NewWeights) that
+ describes integer ranges between 1 and Scale.
+
+ The two parts of the summary are: a per-owner total of the unit
+ interval range(s) owned by each owner, and a total sum of all
+ per-owner ranges (which should be 1.0 but is not enforced).
+
+
The Machi FLU file server + file location sequencer.
+
+
This module implements only the Machi FLU file server and its
+implicit sequencer.
+Please see the EDoc "Overview" for details about the FLU as a
+primitive file server process vs. the larger Machi design of a FLU
+as a sequencer + file server + chain manager group of processes.
+
+
For the moment, this module also implements a rudimentary TCP-based
+protocol as the sole supported access method to the server,
+sequencer, and projection store. Conceptually, those three
+services are independent and ought to have their own protocols. As
+a practical matter, there is no need for wire protocol
+compatibility. Furthermore, from the perspective of failure
+detection, it is very convenient that all three FLU-related
+services are accessed using the same single TCP port.
+
+
The FLU is named after the CORFU server "FLU" or "FLash Unit" server.
+
+ TODO There is one major missing feature in this FLU implementation:
+ there is no "write-once" enforcement for any position in a Machi
+ file. At the moment, we rely on correct behavior of the client
+ & the sequencer to avoid overwriting data. In the Real World,
+ however, all Machi file data is supposed to be exactly write-once
+ to avoid problems with bugs, wire protocol corruption, malicious
+ clients, etc.
+
This API is gen_server-style message passing, intended for use
+within a single Erlang node to glue together the projection store
+server with the node-local process that implements Machi's TCP
+client access protocol (on the "server side" of the TCP connection).
+
+
All Machi client access to the projection store SHOULD NOT use this
+module's API.
+
+ The projection store is implemented by an Erlang/OTP gen_server
+ process that is associated with each FLU. Conceptually, the
+ projection store is an array of write-once registers. For each
+ projection store register, the key is a 2-tuple of an epoch number
+ (non_neg_integer() type) and a projection type (public or
+ private type); the value is a projection data structure
+ (projection_v1() type).
+
+
+ The DataDir argument should be the same directory as specified
+ for use by our companion FLU data server -- all file system paths
+ used by this server are intended to be stored underneath a common
+ file system parent directory as the FLU data server & sequencer
+ servers.
+
+
Erlang API for the Machi FLU TCP protocol version 1, with a
+proxy-process style API for hiding messy details such as TCP
+connection/disconnection with the remote Machi server.
+
+
Erlang API for the Machi FLU TCP protocol version 1, with a
+proxy-process style API for hiding messy details such as TCP
+connection/disconnection with the remote Machi server.
+
+
Machi is intentionally avoiding using distributed Erlang for
+ Machi's communication. This design decision makes Erlang-side code
+ more difficult & complex, but it's the price to pay for some
+language independence. Later in Machi's life cycle, we need to
+(re-)implement some components in a non-Erlang/BEAM-based language.
+
+ This module implements a "man in the middle" proxy between the
+ Erlang client and Machi server (which is on the "far side" of a TCP
+ connection to somewhere). This proxy process will always execute
+ on the same Erlang node as the Erlang client that uses it. The
+ proxy is intended to be a stable, long-lived process that survives
+ TCP communication problems with the remote server.
+
+
+
\ No newline at end of file
diff --git a/edoc/overview-summary.html b/edoc/overview-summary.html
new file mode 100644
index 0000000..a29913c
--- /dev/null
+++ b/edoc/overview-summary.html
@@ -0,0 +1,185 @@
+
+
+
+
+Machi: a small village of replicated files
+
+
+
+
+
This EDoc-style documentation will concern itself only with Erlang
+function APIs and function & data types. Higher-level design and
+commentary will remain outside of the Erlang EDoc system; please see
+the "Pointers to Other Machi Documentation" section below for more
+details.
+
+
Readers should beware that this documentation may be out-of-sync with
+the source code. When in doubt, use the make edoc command to
+regenerate all HTML pages.
+
+
It is the developer's responsibility to re-generate the documentation
+periodically and commit it to the Git repo.
The Chain Manager is responsible for managing the state of Machi's
+"Chain Replication" state. This role is roughly analogous to the
+"Riak Core" application inside of Riak, which takes care of
+coordinating replica placement and replica repair.
+
+
For each primitive data server in the cluster, a Machi FLU, there is a
+Chain Manager process that manages its FLU's role within the Machi
+cluster's Chain Replication scheme. Each Chain Manager process
+executes locally and independently to manage the distributed state of
+a single Machi Chain Replication chain.
+
+
+
+
To contrast with Riak Core ... Riak Core's claimant process is
+ solely responsible for managing certain critical aspects of
+ Riak Core distributed state. Machi's Chain Manager process
+ performs similar tasks as Riak Core's claimant. However, Machi
+ has several active Chain Manager processes, one per FLU server,
+ instead of a single active process like Core's claimant. Each
+ Chain Manager process acts independently; each is constrained
+ so that it will reach consensus via independent computation
+ & action.
+
+ Full discussion of this distributed consensus is outside the
+ scope of this document; see the "Pointers to Other Machi
+ Documentation" section below for more information.
+
+
Machi differs from a Riak Core application because Machi's
+ replica placement policy is simply, "All Machi servers store
+ replicas of all Machi files".
+ Machi is intended to be a primitive building block for creating larger
+ cluster-of-clusters where files are
+ distributed/fragmented/sharded across a large pool of
+ independent Machi clusters.
+
In CORFU, the sequencer step is a prerequisite step that is
+ performed by a separate component, the Sequencer.
+ In Machi, the append_chunk() protocol message has
+ an implicit "sequencer" operation applied by the "head" of the
+ Machi Chain Replication chain. If a client wishes to write
+ data that has already been assigned a sequencer position, then
+ the write_chunk() API function is used.
+
+
+
+
For each FLU, there are three independent tasks that are implemented
+using three different Erlang processes:
+
+
+
A FLU server, implemented primarily by machi_flu.erl.
+
+
A projection store server, implemented primarily by
+ machi_projection_store.erl.
+
+
A chain state manager server, implemented primarily by
+ machi_chain_manager1.erl.
+
+
+
+
From the perspective of failure detection, it is very convenient that
+all three FLU-related services (file server, sequencer server, and
+projection server) are accessed using the same single TCP port.
The projection is a data structure that specifies the current state
+of the Machi cluster: all FLUs, which FLUS are considered
+up/running or down/crashed/stopped, which FLUs are actively
+participants in the Chain Replication protocol, and which FLUs are
+under "repair" (i.e., having their data resyncronized when
+newly-added to a cluster or when restarting after a crash).
The projection store is a storage service that is implemented by an
+Erlang/OTP gen_server process that is associated with each
+FLU. Conceptually, the projection store is an array of
+write-once registers. For each projection store register, the
+key is a 2-tuple of an epoch number (non_neg_integer() type)
+and a projection type (public or private type); the value is
+a projection data structure (projection_v1() type).
Machi is intentionally avoiding using distributed Erlang for Machi's
+communication. This design decision makes Erlang-side code more
+difficult & complex but allows us the freedom of implementing
+parts of Machi in other languages without major
+protocol&API&glue code changes later in the product's
+lifetime.
+
+
There are two layers of interface for Machi clients.
+
+
+
The machi_flu1_client module implements an API that uses a
+ TCP socket directly.
+
+
The machi_proxy_flu1_client module implements an API that
+ uses a local, long-lived gen_server process as a proxy for
+ the remote, perhaps disconnected-or-crashed Machi FLU server.
+
+
+
+
The types for both modules ought to be the same. However, due to
+rapid code churn, some differences might exist. Any major difference
+is (almost by definition) a bug: please open a GitHub issue to request
+a correction.
If you are viewing this document locally, please look in the
+ ../doc/ directory,
+
+
If you are viewing this document via the Web, please find the
+ documentation via this link:
+ http://github.com/basho/machi/tree/master/doc/
+ Please be aware that this link points to the master branch
+ of the Machi source repository and therefore may be
+ out-of-sync with non-master branch code.
+
+
+
diff --git a/edoc/overview.edoc b/edoc/overview.edoc
new file mode 100644
index 0000000..04cf4cc
--- /dev/null
+++ b/edoc/overview.edoc
@@ -0,0 +1,14 @@
+
+@title Machi: a small village of replicated files
+
+@doc
+
+Documentation for Machi is an ongoing challenge. Much of the
+high-level design & commentary are outside of the Erlang EDoc system
+
+Zoom2 zoom zoom zoom boom boom boom boom
+
+Rumba tango Rumba tango Rumba tango Rumba tango Rumba tango Rumba
+tango Rumba tango Rumba tango Rumba tango Rumba tango Rumba tango
+Rumba tango Rumba tango Rumba tango Rumba tango Rumba tango Rumba
+tango Rumba tango Rumba tango Rumba tango Rumba tango
diff --git a/edoc/packages-frame.html b/edoc/packages-frame.html
new file mode 100644
index 0000000..189d01c
--- /dev/null
+++ b/edoc/packages-frame.html
@@ -0,0 +1,11 @@
+
+
+
+The machi application
+
+
+
+
Packages
+
+
+
\ No newline at end of file
diff --git a/edoc/stylesheet.css b/edoc/stylesheet.css
new file mode 100644
index 0000000..e426a90
--- /dev/null
+++ b/edoc/stylesheet.css
@@ -0,0 +1,55 @@
+/* standard EDoc style sheet */
+body {
+ font-family: Verdana, Arial, Helvetica, sans-serif;
+ margin-left: .25in;
+ margin-right: .2in;
+ margin-top: 0.2in;
+ margin-bottom: 0.2in;
+ color: #000000;
+ background-color: #ffffff;
+}
+h1,h2 {
+ margin-left: -0.2in;
+}
+div.navbar {
+ background-color: #add8e6;
+ padding: 0.2em;
+}
+h2.indextitle {
+ padding: 0.4em;
+ background-color: #add8e6;
+}
+h3.function,h3.typedecl {
+ background-color: #add8e6;
+ padding-left: 1em;
+}
+div.spec {
+ margin-left: 2em;
+ background-color: #eeeeee;
+}
+a.module,a.package {
+ text-decoration:none
+}
+a.module:hover,a.package:hover {
+ background-color: #eeeeee;
+}
+ul.definitions {
+ list-style-type: none;
+}
+ul.index {
+ list-style-type: none;
+ background-color: #eeeeee;
+}
+
+/*
+ * Minor style tweaks
+ */
+ul {
+ list-style-type: square;
+}
+table {
+ border-collapse: collapse;
+}
+td {
+ padding: 3
+}
diff --git a/rebar.config b/rebar.config
index 5b3cfa2..afb0283 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,5 +1,6 @@
%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
+{edoc_opts, [{dir, "./edoc"}]}.
{deps, [
{lager, ".*", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}}
diff --git a/src/machi_admin_util.erl b/src/machi_admin_util.erl
index 990d948..f0db9d0 100644
--- a/src/machi_admin_util.erl
+++ b/src/machi_admin_util.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Machi chain replication administration utilities.
+
-module(machi_admin_util).
%% TODO Move these types to a common header file? (also machi_flu1_client.erl?)
@@ -114,7 +116,7 @@ verify_chunk_checksum(File, ReadChunk) ->
fun({Offset, Size, CSum}, Acc) ->
case ReadChunk(File, Offset, Size) of
{ok, Chunk} ->
- CSum2 = machi_util:checksum(Chunk),
+ CSum2 = machi_util:checksum_chunk(Chunk),
if CSum == CSum2 ->
Acc;
true ->
diff --git a/src/machi_app.erl b/src/machi_app.erl
index 6dfddf7..2701f60 100644
--- a/src/machi_app.erl
+++ b/src/machi_app.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Top-level supervisor for the Machi application.
+
-module(machi_app).
-behaviour(application).
diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl
index c554621..ef81558 100644
--- a/src/machi_chain_manager1.erl
+++ b/src/machi_chain_manager1.erl
@@ -19,6 +19,28 @@
%% under the License.
%%
%% -------------------------------------------------------------------
+
+%% @doc The Machi chain manager, Guardian of all things related to
+%% Chain Replication state, status, and data replica safety.
+%%
+%% The Chain Manager is responsible for managing the state of Machi's
+%% "Chain Replication" state. This role is roughly analogous to the
+%% "Riak Core" application inside of Riak, which takes care of
+%% coordinating replica placement and replica repair.
+%%
+%% For each primitive data server in the cluster, a Machi FLU, there
+%% is a Chain Manager process that manages its FLU's role within the
+%% Machi cluster's Chain Replication scheme. Each Chain Manager
+%% process executes locally and independently to manage the
+%% distributed state of a single Machi Chain Replication chain.
+%%
+%% Machi's Chain Manager process performs similar tasks as Riak Core's
+%% claimant. However, Machi has several active Chain Manager
+%% processes, one per FLU server, instead of a single active process
+%% like Core's claimant. Each Chain Manager process acts
+%% independently; each is constrained so that it will reach consensus
+%% via independent computation & action.
+
-module(machi_chain_manager1).
%% TODO: I am going to sever the connection between the flowchart and the
diff --git a/src/machi_chash.erl b/src/machi_chash.erl
index f45473a..6ad46f3 100644
--- a/src/machi_chash.erl
+++ b/src/machi_chash.erl
@@ -16,9 +16,13 @@
%%%
%%%-------------------------------------------------------------------
-%% Consistent hashing library. Also known as "random slicing".
-%% Originally from the Hibari DB source code at https://github.com/hibari
+%% @doc Consistent hashing library. Also known as "random slicing".
%%
+%% This code was originally from the Hibari DB source code at
+%% [https://github.com/hibari]
+
+-module(machi_chash).
+
%% TODO items:
%%
%% 1. Refactor to use bigints instead of floating point numbers. The
@@ -26,8 +30,6 @@
%% much wiggle-room for making really small hashing range
%% definitions.
--module(machi_chash).
-
-define(SMALLEST_SIGNIFICANT_FLOAT_SIZE, 0.1e-12).
-define(SHA_MAX, (1 bsl (20*8))).
diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl
index bd34ff5..cb2c5fc 100644
--- a/src/machi_flu1.erl
+++ b/src/machi_flu1.erl
@@ -18,6 +18,33 @@
%%
%% -------------------------------------------------------------------
+%% @doc The Machi FLU file server + file location sequencer.
+%%
+%% This module implements only the Machi FLU file server and its
+%% implicit sequencer.
+%% Please see the EDoc "Overview" for details about the FLU as a
+%% primitive file server process vs. the larger Machi design of a FLU
+%% as a sequencer + file server + chain manager group of processes.
+%%
+%% For the moment, this module also implements a rudimentary TCP-based
+%% protocol as the sole supported access method to the server,
+%% sequencer, and projection store. Conceptually, those three
+%% services are independent and ought to have their own protocols. As
+%% a practical matter, there is no need for wire protocol
+%% compatibility. Furthermore, from the perspective of failure
+%% detection, it is very convenient that all three FLU-related
+%% services are accessed using the same single TCP port.
+%%
+%% The FLU is named after the CORFU server "FLU" or "FLash Unit" server.
+%%
+%% TODO There is one major missing feature in this FLU implementation:
+%% there is no "write-once" enforcement for any position in a Machi
+%% file. At the moment, we rely on correct behavior of the client
+%% & the sequencer to avoid overwriting data. In the Real World,
+%% however, all Machi file data is supposed to be exactly write-once
+%% to avoid problems with bugs, wire protocol corruption, malicious
+%% clients, etc.
+
-module(machi_flu1).
-include_lib("kernel/include/file.hrl").
@@ -218,7 +245,7 @@ do_net_server_append2(RegName, Sock, LenHex, Prefix) ->
<> = machi_util:hexstr_to_bin(LenHex),
ok = inet:setopts(Sock, [{packet, raw}]),
{ok, Chunk} = gen_tcp:recv(Sock, Len, 60*1000),
- CSum = machi_util:checksum(Chunk),
+ CSum = machi_util:checksum_chunk(Chunk),
try
RegName ! {seq_append, self(), Prefix, Chunk, CSum}
catch error:badarg ->
@@ -300,7 +327,7 @@ do_net_server_write2(Sock, OffsetHex, LenHex, FileBin, DataDir, FHc) ->
DoItFun = fun(FHd, Offset, Len) ->
ok = inet:setopts(Sock, [{packet, raw}]),
{ok, Chunk} = gen_tcp:recv(Sock, Len),
- CSum = machi_util:checksum(Chunk),
+ CSum = machi_util:checksum_chunk(Chunk),
case file:pwrite(FHd, Offset, Chunk) of
ok ->
CSumHex = machi_util:bin_to_hexstr(CSum),
diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl
index 570c9fa..d2dac02 100644
--- a/src/machi_flu1_client.erl
+++ b/src/machi_flu1_client.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Erlang API for the Machi FLU TCP protocol version 1.
+
-module(machi_flu1_client).
-include("machi.hrl").
@@ -151,7 +153,7 @@ list_files(Host, TcpPort, EpochID) when is_integer(TcpPort) ->
catch gen_tcp:close(Sock)
end.
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
-spec get_latest_epoch(port(), projection_type()) ->
{ok, epoch_id()} | {error, term()}.
@@ -159,7 +161,7 @@ get_latest_epoch(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
get_latest_epoch2(Sock, ProjType).
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
-spec get_latest_epoch(inet_host(), inet_port(),
projection_type()) ->
@@ -173,7 +175,7 @@ get_latest_epoch(Host, TcpPort, ProjType)
catch gen_tcp:close(Sock)
end.
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
-spec read_latest_projection(port(), projection_type()) ->
{ok, projection()} | {error, not_written} | {error, term()}.
@@ -181,7 +183,7 @@ read_latest_projection(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
read_latest_projection2(Sock, ProjType).
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
-spec read_latest_projection(inet_host(), inet_port(),
projection_type()) ->
@@ -368,7 +370,7 @@ append_chunk2(Sock, EpochID, Prefix0, Chunk0) ->
erase(bad_sock),
try
%% TODO: add client-side checksum to the server's protocol
- %% _ = crypto:hash(md5, Chunk),
+ %% _ = machi_util:checksum_chunk(Chunk),
Prefix = machi_util:make_binary(Prefix0),
Chunk = machi_util:make_binary(Chunk0),
Len = iolist_size(Chunk0),
@@ -536,7 +538,7 @@ write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
%% TODO: add client-side checksum to the server's protocol
- %% _ = crypto:hash(md5, Chunk),
+ %% _ = machi_util:checksum_chunk(Chunk),
File = machi_util:make_binary(File0),
true = (Offset >= ?MINIMUM_OFFSET),
OffsetHex = machi_util:int_to_hexbin(Offset, 64),
diff --git a/src/machi_flu_sup.erl b/src/machi_flu_sup.erl
index 4ad26fc..ce29502 100644
--- a/src/machi_flu_sup.erl
+++ b/src/machi_flu_sup.erl
@@ -18,6 +18,9 @@
%%
%% -------------------------------------------------------------------
+%% @doc Supervisor for Machi FLU servers and their related support
+%% servers.
+
-module(machi_flu_sup).
-behaviour(supervisor).
diff --git a/src/machi_projection.erl b/src/machi_projection.erl
index d4f7e42..42bfc8a 100644
--- a/src/machi_projection.erl
+++ b/src/machi_projection.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc API for manipulating Machi projection data structures (i.e., records).
+
-module(machi_projection).
-include("machi_projection.hrl").
@@ -30,13 +32,19 @@
make_projection_summary/1
]).
+%% @doc Create a new projection record.
+
new(MyName, All_list, UPI_list, Down_list, Repairing_list, Ps) ->
new(0, MyName, All_list, Down_list, UPI_list, Repairing_list, Ps).
+%% @doc Create a new projection record.
+
new(EpochNum, MyName, All_list, Down_list, UPI_list, Repairing_list, Dbg) ->
new(EpochNum, MyName, All_list, Down_list, UPI_list, Repairing_list,
Dbg, []).
+%% @doc Create a new projection record.
+
new(EpochNum, MyName, All_list0, Down_list, UPI_list, Repairing_list,
Dbg, Dbg2)
when is_integer(EpochNum), EpochNum >= 0,
@@ -87,15 +95,22 @@ new(EpochNum, MyName, All_list0, Down_list, UPI_list, Repairing_list,
},
update_projection_dbg2(update_projection_checksum(P), Dbg2).
+%% @doc Update the checksum element of a projection record.
+
update_projection_checksum(P) ->
CSum = crypto:hash(sha,
term_to_binary(P#projection_v1{epoch_csum= <<>>,
dbg2=[]})),
P#projection_v1{epoch_csum=CSum}.
+%% @doc Update the `dbg2' element of a projection record.
+
update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
P#projection_v1{dbg2=Dbg2}.
+%% @doc Compare two projection records for equality (assuming that the
+%% checksum element has been correctly calculated).
+
-spec compare(#projection_v1{}, #projection_v1{}) ->
integer().
compare(#projection_v1{epoch_number=E1, epoch_csum=C1},
@@ -107,6 +122,8 @@ compare(#projection_v1{epoch_number=E1},
E1 > E2 -> 1
end.
+%% @doc Create a proplist-style summary of a projection record.
+
make_projection_summary(#projection_v1{epoch_number=EpochNum,
all_members=_All_list,
down=Down_list,
diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl
index 09555d2..f4b9074 100644
--- a/src/machi_projection_store.erl
+++ b/src/machi_projection_store.erl
@@ -18,6 +18,25 @@
%%
%% -------------------------------------------------------------------
+%% @doc The Machi write-once projection store service.
+%%
+%% This API is gen_server-style message passing, intended for use
+%% within a single Erlang node to glue together the projection store
+%% server with the node-local process that implements Machi's TCP
+%% client access protocol (on the "server side" of the TCP connection).
+%%
+%% All Machi client access to the projection store SHOULD NOT use this
+%% module's API.
+%%
+%% The projection store is implemented by an Erlang/OTP `gen_server'
+%% process that is associated with each FLU. Conceptually, the
+%% projection store is an array of write-once registers. For each
+%% projection store register, the key is a 2-tuple of an epoch number
+%% (`non_neg_integer()' type) and a projection type (`public' or
+%% `private' type); the value is a projection data structure
+%% (`projection_v1()' type).
+
+
-module(machi_projection_store).
-include("machi_projection.hrl").
@@ -48,35 +67,60 @@
max_private_epoch = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()}
}).
+%% @doc Start a new projection store server.
+%%
+%% The `DataDir' argument should be the same directory as specified
+%% for use by our companion FLU data server -- all file system paths
+%% used by this server are intended to be stored underneath a common
+%% file system parent directory as the FLU data server & sequencer
+%% servers.
+
start_link(RegName, DataDir, NotifyWedgeStateChanges) ->
gen_server:start_link({local, RegName},
?MODULE, [DataDir, NotifyWedgeStateChanges], []).
+%% @doc Fetch the latest epoch number + checksum for type `ProjType'.
+
get_latest_epoch(PidSpec, ProjType) ->
get_latest_epoch(PidSpec, ProjType, infinity).
+%% @doc Fetch the latest epoch number + checksum for type `ProjType'.
+%% projection.
+
get_latest_epoch(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {get_latest_epoch, ProjType}, Timeout).
+%% @doc Fetch the latest projection record for type `ProjType'.
+
read_latest_projection(PidSpec, ProjType) ->
read_latest_projection(PidSpec, ProjType, infinity).
+%% @doc Fetch the latest projection record for type `ProjType'.
+
read_latest_projection(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {read_latest_projection, ProjType}, Timeout).
+%% @doc Fetch the projection record type `ProjType' for epoch number `Epoch' .
+
read(PidSpec, ProjType, Epoch) ->
read(PidSpec, ProjType, Epoch, infinity).
+%% @doc Fetch the projection record type `ProjType' for epoch number `Epoch' .
+
read(PidSpec, ProjType, Epoch, Timeout)
when ProjType == 'public' orelse ProjType == 'private',
is_integer(Epoch), Epoch >= 0 ->
g_call(PidSpec, {read, ProjType, Epoch}, Timeout).
+%% @doc Write the projection record type `ProjType' for epoch number `Epoch' .
+
write(PidSpec, ProjType, Proj) ->
write(PidSpec, ProjType, Proj, infinity).
+%% @doc Write the projection record type `ProjType' for epoch number `Epoch' .
+
write(PidSpec, ProjType, Proj, Timeout)
when ProjType == 'public' orelse ProjType == 'private',
is_record(Proj, projection_v1),
@@ -84,16 +128,24 @@ write(PidSpec, ProjType, Proj, Timeout)
Proj#projection_v1.epoch_number >= 0 ->
g_call(PidSpec, {write, ProjType, Proj}, Timeout).
+%% @doc Fetch all projection records of type `ProjType'.
+
get_all_projections(PidSpec, ProjType) ->
get_all_projections(PidSpec, ProjType, infinity).
+%% @doc Fetch all projection records of type `ProjType'.
+
get_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {get_all_projections, ProjType}, Timeout).
+%% @doc Fetch all projection epoch numbers of type `ProjType'.
+
list_all_projections(PidSpec, ProjType) ->
list_all_projections(PidSpec, ProjType, infinity).
+%% @doc Fetch all projection epoch numbers of type `ProjType'.
+
list_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {list_all_projections, ProjType}, Timeout).
diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl
index 5222fb9..f690c5c 100644
--- a/src/machi_proxy_flu1_client.erl
+++ b/src/machi_proxy_flu1_client.erl
@@ -18,6 +18,23 @@
%%
%% -------------------------------------------------------------------
+%% @doc Erlang API for the Machi FLU TCP protocol version 1, with a
+%% proxy-process style API for hiding messy details such as TCP
+%% connection/disconnection with the remote Machi server.
+%%
+%% Machi is intentionally avoiding using distributed Erlang for
+%% Machi's communication. This design decision makes Erlang-side code
+%% more difficult & complex, but it's the price to pay for some
+%% language independence. Later in Machi's life cycle, we need to
+%% (re-)implement some components in a non-Erlang/BEAM-based language.
+%%
+%% This module implements a "man in the middle" proxy between the
+%% Erlang client and Machi server (which is on the "far side" of a TCP
+%% connection to somewhere). This proxy process will always execute
+%% on the same Erlang node as the Erlang client that uses it. The
+%% proxy is intended to be a stable, long-lived process that survives
+%% TCP communication problems with the remote server.
+
-module(machi_proxy_flu1_client).
-behaviour(gen_server).
@@ -61,79 +78,128 @@
sock :: 'undefined' | port()
}).
+%% @doc Start a local, long-lived process that will be our steady
+%% & reliable communication proxy with the fickle & flaky
+%% remote Machi server.
+
start_link(#p_srvr{}=I) ->
gen_server:start_link(?MODULE, [I], []).
+%% @doc Append a chunk (binary- or iolist-style) of data to a file
+%% with `Prefix'.
+
append_chunk(PidSpec, EpochID, Prefix, Chunk) ->
append_chunk(PidSpec, EpochID, Prefix, Chunk, infinity).
+%% @doc Append a chunk (binary- or iolist-style) of data to a file
+%% with `Prefix'.
+
append_chunk(PidSpec, EpochID, Prefix, Chunk, Timeout) ->
gen_server:call(PidSpec, {req, {append_chunk, EpochID, Prefix, Chunk}},
Timeout).
+%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
+
read_chunk(PidSpec, EpochID, File, Offset, Size) ->
read_chunk(PidSpec, EpochID, File, Offset, Size, infinity).
+%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
+
read_chunk(PidSpec, EpochID, File, Offset, Size, Timeout) ->
gen_server:call(PidSpec, {req, {read_chunk, EpochID, File, Offset, Size}},
Timeout).
+%% @doc Fetch the list of chunk checksums for `File'.
+
checksum_list(PidSpec, EpochID, File) ->
checksum_list(PidSpec, EpochID, File, infinity).
+%% @doc Fetch the list of chunk checksums for `File'.
+
checksum_list(PidSpec, EpochID, File, Timeout) ->
gen_server:call(PidSpec, {req, {checksum_list, EpochID, File}},
Timeout).
+%% @doc Fetch the list of all files on the remote FLU.
+
list_files(PidSpec, EpochID) ->
list_files(PidSpec, EpochID, infinity).
+%% @doc Fetch the list of all files on the remote FLU.
+
list_files(PidSpec, EpochID, Timeout) ->
gen_server:call(PidSpec, {req, {list_files, EpochID}},
Timeout).
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
+
get_latest_epoch(PidSpec, ProjType) ->
get_latest_epoch(PidSpec, ProjType, infinity).
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
+
get_latest_epoch(PidSpec, ProjType, Timeout) ->
gen_server:call(PidSpec, {req, {get_latest_epoch, ProjType}},
Timeout).
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
+
read_latest_projection(PidSpec, ProjType) ->
read_latest_projection(PidSpec, ProjType, infinity).
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
+
read_latest_projection(PidSpec, ProjType, Timeout) ->
gen_server:call(PidSpec, {req, {read_latest_projection, ProjType}},
Timeout).
+%% @doc Read a projection `Proj' of type `ProjType'.
+
read_projection(PidSpec, ProjType, Epoch) ->
read_projection(PidSpec, ProjType, Epoch, infinity).
+%% @doc Read a projection `Proj' of type `ProjType'.
+
read_projection(PidSpec, ProjType, Epoch, Timeout) ->
gen_server:call(PidSpec, {req, {read_projection, ProjType, Epoch}},
Timeout).
+%% @doc Write a projection `Proj' of type `ProjType'.
+
write_projection(PidSpec, ProjType, Proj) ->
write_projection(PidSpec, ProjType, Proj, infinity).
+%% @doc Write a projection `Proj' of type `ProjType'.
+
write_projection(PidSpec, ProjType, Proj, Timeout) ->
gen_server:call(PidSpec, {req, {write_projection, ProjType, Proj}},
Timeout).
+%% @doc Get all projections from the FLU's projection store.
+
get_all_projections(PidSpec, ProjType) ->
get_all_projections(PidSpec, ProjType, infinity).
+%% @doc Get all projections from the FLU's projection store.
+
get_all_projections(PidSpec, ProjType, Timeout) ->
gen_server:call(PidSpec, {req, {get_all_projections, ProjType}},
Timeout).
+%% @doc Get all epoch numbers from the FLU's projection store.
+
list_all_projections(PidSpec, ProjType) ->
list_all_projections(PidSpec, ProjType, infinity).
+%% @doc Get all epoch numbers from the FLU's projection store.
+
list_all_projections(PidSpec, ProjType, Timeout) ->
gen_server:call(PidSpec, {req, {list_all_projections, ProjType}},
Timeout).
+%% @doc Quit & close the connection to remote FLU and stop our
+%% proxy process.
+
quit(PidSpec) ->
gen_server:call(PidSpec, quit, infinity).
diff --git a/src/machi_sequencer.erl b/src/machi_sequencer.erl
index ddd81a5..4d1116d 100644
--- a/src/machi_sequencer.erl
+++ b/src/machi_sequencer.erl
@@ -18,6 +18,9 @@
%%
%% -------------------------------------------------------------------
+%% @doc "Mothballed" sequencer code, perhaps to be reused sometime in
+%% the future?
+
-module(machi_sequencer).
-compile(export_all).
diff --git a/src/machi_sup.erl b/src/machi_sup.erl
index dcaadbe..31fcc9b 100644
--- a/src/machi_sup.erl
+++ b/src/machi_sup.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Top Machi application supervisor.
+
-module(machi_sup).
-behaviour(supervisor).
diff --git a/src/machi_util.erl b/src/machi_util.erl
index af0ac29..9efbbc0 100644
--- a/src/machi_util.erl
+++ b/src/machi_util.erl
@@ -18,10 +18,12 @@
%%
%% -------------------------------------------------------------------
+%% @doc Miscellaneous utility functions.
+
-module(machi_util).
-export([
- checksum/1,
+ checksum_chunk/1,
hexstr_to_bin/1, bin_to_hexstr/1,
hexstr_to_int/1, int_to_hexstr/2, int_to_hexbin/2,
make_binary/1, make_string/1,
@@ -39,33 +41,34 @@
-include("machi_projection.hrl").
-include_lib("kernel/include/file.hrl").
-append(Server, Prefix, Chunk) when is_binary(Prefix), is_binary(Chunk) ->
- CSum = checksum(Chunk),
- Server ! {seq_append, self(), Prefix, Chunk, CSum},
- receive
- {assignment, Offset, File} ->
- {Offset, File}
- after 10*1000 ->
- bummer
- end.
+%% @doc Create a registered name atom for FLU sequencer internal
+%% rendezvous/message passing use.
make_regname(Prefix) when is_binary(Prefix) ->
erlang:binary_to_atom(Prefix, latin1);
make_regname(Prefix) when is_list(Prefix) ->
erlang:list_to_atom(Prefix).
+%% @doc Calculate a config file path, by common convention.
+
make_config_filename(DataDir, Prefix) ->
lists:flatten(io_lib:format("~s/config/~s", [DataDir, Prefix])).
+%% @doc Calculate a checksum file path, by common convention.
+
make_checksum_filename(DataDir, Prefix, SequencerName, FileNum) ->
lists:flatten(io_lib:format("~s/config/~s.~s.~w.csum",
[DataDir, Prefix, SequencerName, FileNum])).
+%% @doc Calculate a checksum file path, by common convention.
+
make_checksum_filename(DataDir, "") ->
lists:flatten(io_lib:format("~s/config", [DataDir]));
make_checksum_filename(DataDir, FileName) ->
lists:flatten(io_lib:format("~s/config/~s.csum", [DataDir, FileName])).
+%% @doc Calculate a file data file path, by common convention.
+
make_data_filename(DataDir, "") ->
FullPath = lists:flatten(io_lib:format("~s/data", [DataDir])),
{"", FullPath};
@@ -73,17 +76,24 @@ make_data_filename(DataDir, File) ->
FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
{File, FullPath}.
+%% @doc Calculate a file data file path, by common convention.
+
make_data_filename(DataDir, Prefix, SequencerName, FileNum) ->
File = erlang:iolist_to_binary(io_lib:format("~s.~s.~w",
[Prefix, SequencerName, FileNum])),
FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
{File, FullPath}.
+%% @doc Calculate a projection store file path, by common convention.
+
make_projection_filename(DataDir, "") ->
lists:flatten(io_lib:format("~s/projection", [DataDir]));
make_projection_filename(DataDir, File) ->
lists:flatten(io_lib:format("~s/projection/~s", [DataDir, File])).
+%% @doc Read the file size of a config file, which is used as the
+%% basis for a minimum sequence number.
+
read_max_filenum(DataDir, Prefix) ->
case file:read_file_info(make_config_filename(DataDir, Prefix)) of
{error, enoent} ->
@@ -92,6 +102,9 @@ read_max_filenum(DataDir, Prefix) ->
FI#file_info.size
end.
+%% @doc Increase the file size of a config file, which is used as the
+%% basis for a minimum sequence number.
+
increment_max_filenum(DataDir, Prefix) ->
try
{ok, FH} = file:open(make_config_filename(DataDir, Prefix), [append]),
@@ -103,6 +116,8 @@ increment_max_filenum(DataDir, Prefix) ->
{error, Error, erlang:get_stacktrace()}
end.
+%% @doc Convert a hexadecimal string to a `binary()'.
+
hexstr_to_bin(S) when is_list(S) ->
hexstr_to_bin(S, []);
hexstr_to_bin(B) when is_binary(B) ->
@@ -114,6 +129,8 @@ hexstr_to_bin([X,Y|T], Acc) ->
{ok, [V], []} = io_lib:fread("~16u", [X,Y]),
hexstr_to_bin(T, [V | Acc]).
+%% @doc Convert a `binary()' to a hexadecimal string.
+
bin_to_hexstr(<<>>) ->
[];
bin_to_hexstr(<>) ->
@@ -124,40 +141,60 @@ hex_digit(X) when X < 10 ->
hex_digit(X) ->
X - 10 + $a.
+%% @doc Convert a compatible Erlang data type into a `binary()' equivalent.
+
make_binary(X) when is_binary(X) ->
X;
make_binary(X) when is_list(X) ->
iolist_to_binary(X).
+%% @doc Convert a compatible Erlang data type into a `string()' equivalent.
+
make_string(X) when is_list(X) ->
lists:flatten(X);
make_string(X) when is_binary(X) ->
binary_to_list(X).
+%% @doc Convert a hexadecimal string to an integer.
+
hexstr_to_int(X) ->
B = hexstr_to_bin(X),
B_size = byte_size(B) * 8,
<> = B,
I.
+%% @doc Convert an integer into a hexadecimal string whose length is
+%% based on `I_size'.
+
int_to_hexstr(I, I_size) ->
bin_to_hexstr(<>).
+%% @doc Convert an integer into a hexadecimal string (in `binary()'
+%% form) whose length is based on `I_size'.
+
int_to_hexbin(I, I_size) ->
list_to_binary(int_to_hexstr(I, I_size)).
-checksum(Bin) when is_binary(Bin) ->
- crypto:hash(md5, Bin).
+%% @doc Calculate a checksum for a chunk of file data.
+
+checksum_chunk(Chunk) when is_binary(Chunk); is_list(Chunk) ->
+ crypto:hash(sha, Chunk).
+
+%% @doc Log a verbose message.
verb(Fmt) ->
verb(Fmt, []).
+%% @doc Log a verbose message.
+
verb(Fmt, Args) ->
case application:get_env(kernel, verbose) of
{ok, true} -> io:format(Fmt, Args);
_ -> ok
end.
+%% @doc Log an 'info' level message.
+
info_msg(Fmt, Args) ->
case application:get_env(kernel, verbose) of {ok, false} -> ok;
_ -> error_logger:info_msg(Fmt, Args)
@@ -165,11 +202,15 @@ info_msg(Fmt, Args) ->
%%%%%%%%%%%%%%%%%
+%% @doc Create a TCP connection to a remote Machi server.
+
-spec connect(inet:ip_address() | inet:hostname(), inet:port_number()) ->
port().
connect(Host, Port) ->
escript_connect(Host, Port, 4500).
+%% @doc Create a TCP connection to a remote Machi server.
+
-spec connect(inet:ip_address() | inet:hostname(), inet:port_number(),
timeout()) ->
port().
From e0cabf3cb6d97db85197fffb77bb1e592937cfbe Mon Sep 17 00:00:00 2001
From: Scott Lystig Fritchie
Date: Wed, 8 Apr 2015 17:58:49 +0900
Subject: [PATCH 07/22] Remove 'edoc' dir: it's moved to gh-pages
---
edoc/.gitignore | 1 -
edoc/edoc-info | 7 -
edoc/erlang.png | Bin 2109 -> 0 bytes
edoc/index.html | 17 --
edoc/machi_admin_util.html | 60 -------
edoc/machi_app.html | 39 -----
edoc/machi_chain_manager1.html | 155 -----------------
edoc/machi_chash.html | 171 ------------------
edoc/machi_flu1.html | 63 -------
edoc/machi_flu1_client.html | 278 ------------------------------
edoc/machi_flu_sup.html | 41 -----
edoc/machi_projection.html | 70 --------
edoc/machi_projection_store.html | 163 ------------------
edoc/machi_proxy_flu1_client.html | 222 ------------------------
edoc/machi_sequencer.html | 23 ---
edoc/machi_sup.html | 39 -----
edoc/machi_util.html | 150 ----------------
edoc/modules-frame.html | 24 ---
edoc/overview-summary.html | 185 --------------------
edoc/overview.edoc | 14 --
edoc/packages-frame.html | 11 --
edoc/stylesheet.css | 55 ------
22 files changed, 1788 deletions(-)
delete mode 100644 edoc/.gitignore
delete mode 100644 edoc/edoc-info
delete mode 100644 edoc/erlang.png
delete mode 100644 edoc/index.html
delete mode 100644 edoc/machi_admin_util.html
delete mode 100644 edoc/machi_app.html
delete mode 100644 edoc/machi_chain_manager1.html
delete mode 100644 edoc/machi_chash.html
delete mode 100644 edoc/machi_flu1.html
delete mode 100644 edoc/machi_flu1_client.html
delete mode 100644 edoc/machi_flu_sup.html
delete mode 100644 edoc/machi_projection.html
delete mode 100644 edoc/machi_projection_store.html
delete mode 100644 edoc/machi_proxy_flu1_client.html
delete mode 100644 edoc/machi_sequencer.html
delete mode 100644 edoc/machi_sup.html
delete mode 100644 edoc/machi_util.html
delete mode 100644 edoc/modules-frame.html
delete mode 100644 edoc/overview-summary.html
delete mode 100644 edoc/overview.edoc
delete mode 100644 edoc/packages-frame.html
delete mode 100644 edoc/stylesheet.css
diff --git a/edoc/.gitignore b/edoc/.gitignore
deleted file mode 100644
index bcd672a..0000000
--- a/edoc/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-tmp.*
diff --git a/edoc/edoc-info b/edoc/edoc-info
deleted file mode 100644
index f119ed0..0000000
--- a/edoc/edoc-info
+++ /dev/null
@@ -1,7 +0,0 @@
-%% encoding: UTF-8
-{application,machi}.
-{packages,[]}.
-{modules,[machi_admin_util,machi_app,machi_chain_manager1,machi_chash,
- machi_flu1,machi_flu1_client,machi_flu_sup,machi_projection,
- machi_projection_store,machi_proxy_flu1_client,machi_sequencer,
- machi_sup,machi_util]}.
diff --git a/edoc/erlang.png b/edoc/erlang.png
deleted file mode 100644
index 987a618e2403af895bfaf8c2f929e3a4f3746659..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 2109
zcmV-D2*US?P)rez_nr%N
ze)-p~%6|a|LA_bA=l=$|3jjqS$tjbGG?@TN0w$Azq7Z{YeQxKcpLO55vno1^u23DP&V=i9-KAAsU*ECy^#OtaDC!lVSo!+|-%T+LhTHP^Oqwx8m)b4r3V28JmV&6M#iG)&0;P`j>XGfomEIEK6wPkhI{{K?3#uAGq$!`N_F)TNX
zAvuspF?^;c9h%CPWyTDc_03%r4N8+Yzzo_VSfa!zo_7F6D?<+-+KkHwXiWQR=Mr(9|K@{{xEjfDvAbS9uNCP&{)NNCoC?XA$aRe>R8->
z5N<#S_)$d|EYpJfPC?{`$Y~f4yjH&dxHXIGG8wiaLBD6usC87cg+dd&3WLJd4_TcmEeAOz8R>ikgW(9821
z{34Se09Y?KoG<_Y;DDSoyTk>fUN0YO5)3^Za{&s1JbidC9}56{px+f|K_0;YuL5h}
z_9J3y%7ucwM)E4K#=Cn7tCjjRkKjnQuiFcM6{17Jt#5F}7z8~RYqW24xV?kAU6xQN
zh+h4|SmO1;TdsVOaOeD*kKf}6I7=6ZNig_rtqV?Ov1HrU(P%Hi#6npSe>%qGaNK1w
zW$v+r`r0>#p~AN^8b)#7Yesu(ys(>3SCYb4sF9%A9=kMHrLmzk}E&WPG~Jx
z9!r{qo5M184t;<7I`t1AsNjv912EeKkHKtOSl%wbcjFh7L6|G?Q+{?radOvuEW$>1
zoc+c&F+u$^0f}1_2dN&lS#I#p3e&+|YGHlMzRC)%&8TnGt+p*;Oz
z`0=D=n|qcN+f@07;QjB@ktLhZ`+qz;(xYDli^Pex&&wwU2V4N-a3b@veqHg2cvCRb
zoi=ZerLk!4t5!s3?|ARuWx_4-VCgl|TY2qa@$Dr~5QdiT8?$oPpZhaF5UOZ&x=+I9
zt((`6wBPM((BS{;2lmSB;o%z{>=mg*1k2oLjI=+zcf5$4BIZmkOrjrE
z*VY(<@FO?zBVDc+Q~Lh;LnlYodZ$J3tmWJBN4j~wVOWelzexhft2nY6A3PZAcm!q}
z931CL#1Ki6;HM{agTbKF>3(R-yuF1&Apn3Nh@PGvv)K$mkVqu*^z@vaFgQ3kFfg!s
z^=f26@{Ny=_w@7x1qHF$bEk5X$)wR}0s{l>V!TCGM=R5Ei1Ll8u7Z*N0G1CPgB
zyLPP|0H{-FRUDJv`Ea=9fX
zC63D4+FBlumz$eAJv~j5q*|@^_xC?_>XiL0K@bH61$;i=&CLx(QGb8`8#iu{BnjJW
zHUvSgUcK7T&~W(h;koN8t5vB~Ha0dgnane1&RA#87dVcaOpEMM)6>)E&YiPZEXBpe
zlarHk89g;+G#U#E3hL_W002xT6UTApOeR%UR_5g7q^73!_4PG2Hi|@|ii(Pfi3vIY
z0ES^?Mx1IOizO0?e0_a!9483k`PtCk-rm~Unwpw=?b@~O?(WdgP^bMMAYlLg{dIM9
zOy}OcxVTs%k(@q#n$PF+`TXkYYA%;cr_*5ofWcr$PEL-Ai772Db)3`L*|~G)&eqn}
zq@*OrbXim`UAiO`3XdK=%H#1=D%HHV>FMbqAtCAM=@!e}C6Cc))ai5zg~H3rYjkup
zD=RBMKR+`wv!kN}1^{3fR#a3}RaLcP#}20|H!^bT)~%G3lp{xu!0_{Wr2hW?>({UQ
z`T1F`)|D$)*3{IP&1UDKhLn_)sMYHH{QRkzV=$M?#W2idGFh!wf*`b7ZGC-xVPT=c
zV1Vs&!otFoN~M>VQ$G_G6}5No-m0pqwzjr;?W@INu~;m#k*%qz(P%VUt#;3zJ^lUt
zU0q%G?%kVzvF7cqQmLw|tA~e&XIqun*x2Ug=9-!s48ty7ycil9Di(|7aybkD7#y?%lgQ
z9`Ewy%eDpgxlvJ3Cr+GTFc>(F+cg;(8TPc>y?b|jeEgLwR}LLIBoqp1+1c4_HrvO?
z$J5g@G&D3gIC$2ITrQ7`iwh4AfA;K|OePZu1oriTVVG1Zl}e@S)~)mK@UU1cI-Ty|
z!Gj8gg2UmUD2ibif*{e+(R4bU#bU|j@{Joe^7(uSf+8X!q*7@_M1;L=AqbM3oXp{H
nT3T9A6wS=c+_!HZolgHhw9g$%O4Wbp00000NkvXXu0mjf3HKBY
diff --git a/edoc/index.html b/edoc/index.html
deleted file mode 100644
index be9d1af..0000000
--- a/edoc/index.html
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
-The machi application
-
-
-
\ No newline at end of file
diff --git a/edoc/machi_admin_util.html b/edoc/machi_admin_util.html
deleted file mode 100644
index 744230a..0000000
--- a/edoc/machi_admin_util.html
+++ /dev/null
@@ -1,60 +0,0 @@
-
-
-
-
-Module machi_admin_util
-
-
-
-
The Machi chain manager, Guardian of all things related to
-Chain Replication state, status, and data replica safety.
-
-
The Chain Manager is responsible for managing the state of Machi's
-"Chain Replication" state. This role is roughly analogous to the
-"Riak Core" application inside of Riak, which takes care of
-coordinating replica placement and replica repair.
-
-
For each primitive data server in the cluster, a Machi FLU, there
-is a Chain Manager process that manages its FLU's role within the
-Machi cluster's Chain Replication scheme. Each Chain Manager
-process executes locally and independently to manage the
-distributed state of a single Machi Chain Replication chain.
-
- Machi's Chain Manager process performs similar tasks as Riak Core's
- claimant. However, Machi has several active Chain Manager
- processes, one per FLU server, instead of a single active process
- like Core's claimant. Each Chain Manager process acts
- independently; each is constrained so that it will reach consensus
- via independent computation & action.
-
A float map subdivides the unit interval, starting at 0.0, to
- partitions that are assigned to various owners. The sum of all
- floats must be exactly 1.0 (or close enough for floating point
- purposes).
A nextfloat_list
- differs from a float_map in two respects: 1) nextfloat_list contains
- tuples with the brick name in 2nd position, 2) the float() at each
- position I_n > I_m, for all n, m such that n > m.
- For example, a nextfloat_list of the float_map example above,
- [{0.25, {br1, nd1}}, {0.75, {br2, nd1}}, {1.0, {br3, nd1}].
A owner_weight_list is a definition of brick assignments over the
- unit interval [0.0, 1.0]. The sum of all floats must be 1.0. For
- example, [{{br1,nd1}, 0.25}, {{br2,nd1}, 0.5}, {{br3,nd1}, 0.25}].
For this library, a weight is an integer which specifies the
- capacity of a "owner" relative to other owners. For example, if
- owner A with a weight of 10, and if owner B has a weight of 20,
- then B will be assigned twice as much of the unit interval as A.
Make a pretty/human-friendly version of a float map (based
- upon a float map created from OldWeights and NewWeights) that
- describes integer ranges between 1 and Scale.
Make a pretty/human-friendly version of a float map (based
- upon a float map created from OldWeights and NewWeights) that
- describes integer ranges between 1 and Scale.
-
- The two parts of the summary are: a per-owner total of the unit
- interval range(s) owned by each owner, and a total sum of all
- per-owner ranges (which should be 1.0 but is not enforced).
-
-
The Machi FLU file server + file location sequencer.
-
-
This module implements only the Machi FLU file server and its
-implicit sequencer.
-Please see the EDoc "Overview" for details about the FLU as a
-primitive file server process vs. the larger Machi design of a FLU
-as a sequencer + file server + chain manager group of processes.
-
-
For the moment, this module also implements a rudimentary TCP-based
-protocol as the sole supported access method to the server,
-sequencer, and projection store. Conceptually, those three
-services are independent and ought to have their own protocols. As
-a practical matter, there is no need for wire protocol
-compatibility. Furthermore, from the perspective of failure
-detection, it is very convenient that all three FLU-related
-services are accessed using the same single TCP port.
-
-
The FLU is named after the CORFU server "FLU" or "FLash Unit" server.
-
- TODO There is one major missing feature in this FLU implementation:
- there is no "write-once" enforcement for any position in a Machi
- file. At the moment, we rely on correct behavior of the client
- & the sequencer to avoid overwriting data. In the Real World,
- however, all Machi file data is supposed to be exactly write-once
- to avoid problems with bugs, wire protocol corruption, malicious
- clients, etc.
-
This API is gen_server-style message passing, intended for use
-within a single Erlang node to glue together the projection store
-server with the node-local process that implements Machi's TCP
-client access protocol (on the "server side" of the TCP connection).
-
-
All Machi client access to the projection store SHOULD NOT use this
-module's API.
-
- The projection store is implemented by an Erlang/OTP gen_server
- process that is associated with each FLU. Conceptually, the
- projection store is an array of write-once registers. For each
- projection store register, the key is a 2-tuple of an epoch number
- (non_neg_integer() type) and a projection type (public or
- private type); the value is a projection data structure
- (projection_v1() type).
-
-
- The DataDir argument should be the same directory as specified
- for use by our companion FLU data server -- all file system paths
- used by this server are intended to be stored underneath a common
- file system parent directory as the FLU data server & sequencer
- servers.
-
-
Erlang API for the Machi FLU TCP protocol version 1, with a
-proxy-process style API for hiding messy details such as TCP
-connection/disconnection with the remote Machi server.
-
-
Erlang API for the Machi FLU TCP protocol version 1, with a
-proxy-process style API for hiding messy details such as TCP
-connection/disconnection with the remote Machi server.
-
-
Machi is intentionally avoiding using distributed Erlang for
- Machi's communication. This design decision makes Erlang-side code
- more difficult & complex, but it's the price to pay for some
-language independence. Later in Machi's life cycle, we need to
-(re-)implement some components in a non-Erlang/BEAM-based language.
-
- This module implements a "man in the middle" proxy between the
- Erlang client and Machi server (which is on the "far side" of a TCP
- connection to somewhere). This proxy process will always execute
- on the same Erlang node as the Erlang client that uses it. The
- proxy is intended to be a stable, long-lived process that survives
- TCP communication problems with the remote server.
-
-
-
\ No newline at end of file
diff --git a/edoc/overview-summary.html b/edoc/overview-summary.html
deleted file mode 100644
index a29913c..0000000
--- a/edoc/overview-summary.html
+++ /dev/null
@@ -1,185 +0,0 @@
-
-
-
-
-Machi: a small village of replicated files
-
-
-
-
-
This EDoc-style documentation will concern itself only with Erlang
-function APIs and function & data types. Higher-level design and
-commentary will remain outside of the Erlang EDoc system; please see
-the "Pointers to Other Machi Documentation" section below for more
-details.
-
-
Readers should beware that this documentation may be out-of-sync with
-the source code. When in doubt, use the make edoc command to
-regenerate all HTML pages.
-
-
It is the developer's responsibility to re-generate the documentation
-periodically and commit it to the Git repo.
The Chain Manager is responsible for managing the state of Machi's
-"Chain Replication" state. This role is roughly analogous to the
-"Riak Core" application inside of Riak, which takes care of
-coordinating replica placement and replica repair.
-
-
For each primitive data server in the cluster, a Machi FLU, there is a
-Chain Manager process that manages its FLU's role within the Machi
-cluster's Chain Replication scheme. Each Chain Manager process
-executes locally and independently to manage the distributed state of
-a single Machi Chain Replication chain.
-
-
-
-
To contrast with Riak Core ... Riak Core's claimant process is
- solely responsible for managing certain critical aspects of
- Riak Core distributed state. Machi's Chain Manager process
- performs similar tasks as Riak Core's claimant. However, Machi
- has several active Chain Manager processes, one per FLU server,
- instead of a single active process like Core's claimant. Each
- Chain Manager process acts independently; each is constrained
- so that it will reach consensus via independent computation
- & action.
-
- Full discussion of this distributed consensus is outside the
- scope of this document; see the "Pointers to Other Machi
- Documentation" section below for more information.
-
-
Machi differs from a Riak Core application because Machi's
- replica placement policy is simply, "All Machi servers store
- replicas of all Machi files".
- Machi is intended to be a primitive building block for creating larger
- cluster-of-clusters where files are
- distributed/fragmented/sharded across a large pool of
- independent Machi clusters.
-
In CORFU, the sequencer step is a prerequisite step that is
- performed by a separate component, the Sequencer.
- In Machi, the append_chunk() protocol message has
- an implicit "sequencer" operation applied by the "head" of the
- Machi Chain Replication chain. If a client wishes to write
- data that has already been assigned a sequencer position, then
- the write_chunk() API function is used.
-
-
-
-
For each FLU, there are three independent tasks that are implemented
-using three different Erlang processes:
-
-
-
A FLU server, implemented primarily by machi_flu.erl.
-
-
A projection store server, implemented primarily by
- machi_projection_store.erl.
-
-
A chain state manager server, implemented primarily by
- machi_chain_manager1.erl.
-
-
-
-
From the perspective of failure detection, it is very convenient that
-all three FLU-related services (file server, sequencer server, and
-projection server) are accessed using the same single TCP port.
The projection is a data structure that specifies the current state
-of the Machi cluster: all FLUs, which FLUS are considered
-up/running or down/crashed/stopped, which FLUs are actively
-participants in the Chain Replication protocol, and which FLUs are
-under "repair" (i.e., having their data resyncronized when
-newly-added to a cluster or when restarting after a crash).
The projection store is a storage service that is implemented by an
-Erlang/OTP gen_server process that is associated with each
-FLU. Conceptually, the projection store is an array of
-write-once registers. For each projection store register, the
-key is a 2-tuple of an epoch number (non_neg_integer() type)
-and a projection type (public or private type); the value is
-a projection data structure (projection_v1() type).
Machi is intentionally avoiding using distributed Erlang for Machi's
-communication. This design decision makes Erlang-side code more
-difficult & complex but allows us the freedom of implementing
-parts of Machi in other languages without major
-protocol&API&glue code changes later in the product's
-lifetime.
-
-
There are two layers of interface for Machi clients.
-
-
-
The machi_flu1_client module implements an API that uses a
- TCP socket directly.
-
-
The machi_proxy_flu1_client module implements an API that
- uses a local, long-lived gen_server process as a proxy for
- the remote, perhaps disconnected-or-crashed Machi FLU server.
-
-
-
-
The types for both modules ought to be the same. However, due to
-rapid code churn, some differences might exist. Any major difference
-is (almost by definition) a bug: please open a GitHub issue to request
-a correction.
If you are viewing this document locally, please look in the
- ../doc/ directory,
-
-
If you are viewing this document via the Web, please find the
- documentation via this link:
- http://github.com/basho/machi/tree/master/doc/
- Please be aware that this link points to the master branch
- of the Machi source repository and therefore may be
- out-of-sync with non-master branch code.
-