diff --git a/.gitignore b/.gitignore
index 180a370..09dacbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,4 @@ deps
erl_crash.dump
.concrete/DEV_MODE
.rebar
-doc/edoc-info
-doc/erlang.png
-doc/*.html
-doc/stylesheet.css
+edoc
diff --git a/Makefile b/Makefile
index ba8df11..b91d653 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ ifeq ($(REBAR_BIN),)
REBAR_BIN = ./rebar
endif
-.PHONY: rel deps package pkgclean
+.PHONY: rel deps package pkgclean edoc
all: deps compile
@@ -21,6 +21,12 @@ test: deps compile eunit
eunit:
$(REBAR_BIN) -v skip_deps=true eunit
+edoc: edoc-clean
+ $(REBAR_BIN) skip_deps=true doc
+
+edoc-clean:
+ rm -f edoc/*.png edoc/*.html edoc/*.css edoc/edoc-info
+
pulse: compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 0000000..74849f7
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,12 @@
+## Machi Documentation Overview
+
+For a Web-browsable version of a snapshot of the source doc "EDoc"
+Erlang documentation, please use this link:
+[Machi EDoc snapshot](https://basho.github.io/machi/edoc/).
+
+## Documents in this directory
+
+* __chain-self-management-sketch.org__ is an introduction to the
+self-management algorithm proposed for Machi. This algorithm is
+(hoped to be) sufficient for managing the Chain Replication state of a
+Machi cluster.
diff --git a/doc/overview.edoc b/doc/overview.edoc
new file mode 100644
index 0000000..6182f6b
--- /dev/null
+++ b/doc/overview.edoc
@@ -0,0 +1,170 @@
+
+@title Machi: a small village of replicated files
+
+@doc
+
+== About This EDoc Documentation ==
+
+This EDoc-style documentation will concern itself only with Erlang
+function APIs and function & data types. Higher-level design and
+commentary will remain outside of the Erlang EDoc system; please see
+the "Pointers to Other Machi Documentation" section below for more
+details.
+
+Readers should beware that this documentation may be out-of-sync with
+the source code. When in doubt, use the `make edoc' command to
+regenerate all HTML pages.
+
+It is the developer's responsibility to re-generate the documentation
+periodically and commit it to the Git repo.
+
+== Machi Code Overview ==
+
+=== Chain Manager ===
+
+The Chain Manager is responsible for managing the state of Machi's
+"Chain Replication" state. This role is roughly analogous to the
+"Riak Core" application inside of Riak, which takes care of
+coordinating replica placement and replica repair.
+
+For each primitive data server in the cluster, a Machi FLU, there is a
+Chain Manager process that manages its FLU's role within the Machi
+cluster's Chain Replication scheme. Each Chain Manager process
+executes locally and independently to manage the distributed state of
+a single Machi Chain Replication chain.
+
+
+
+ - To contrast with Riak Core ... Riak Core's claimant process is
+ solely responsible for managing certain critical aspects of
+ Riak Core distributed state. Machi's Chain Manager process
+ performs similar tasks as Riak Core's claimant. However, Machi
+ has several active Chain Manager processes, one per FLU server,
+ instead of a single active process like Core's claimant. Each
+ Chain Manager process acts independently; each is constrained
+ so that it will reach consensus via independent computation
+ & action.
+
+ Full discussion of this distributed consensus is outside the
+ scope of this document; see the "Pointers to Other Machi
+ Documentation" section below for more information.
+
+ - Machi differs from a Riak Core application because Machi's
+ replica placement policy is simply, "All Machi servers store
+ replicas of all Machi files".
+ Machi is intended to be a primitive building block for creating larger
+ cluster-of-clusters where files are
+ distributed/fragmented/sharded across a large pool of
+ independent Machi clusters.
+
+ - See
+ [https://www.usenix.org/legacy/events/osdi04/tech/renesse.html]
+ for a copy of the paper, "Chain Replication for Supporting High
+ Throughput and Availability" by Robbert van Renesse and Fred
+ B. Schneider.
+
+
+
+=== FLU ===
+
+The FLU is the basic storage server for Machi.
+
+
+ - The name FLU is taken from "flash storage unit" from the paper
+ "CORFU: A Shared Log Design for Flash Clusters" by
+ Balakrishnan, Malkhi, Prabhakaran, and Wobber. See
+ [https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/balakrishnan]
+
+ - In CORFU, the sequencer step is a prerequisite step that is
+ performed by a separate component, the Sequencer.
+ In Machi, the `append_chunk()' protocol message has
+ an implicit "sequencer" operation applied by the "head" of the
+ Machi Chain Replication chain. If a client wishes to write
+ data that has already been assigned a sequencer position, then
+ the `write_chunk()' API function is used.
+
+
+
+For each FLU, there are three independent tasks that are implemented
+using three different Erlang processes:
+
+
+ - A FLU server, implemented primarily by `machi_flu.erl'.
+
+ - A projection store server, implemented primarily by
+ `machi_projection_store.erl'.
+
+ - A chain state manager server, implemented primarily by
+ `machi_chain_manager1.erl'.
+
+
+
+From the perspective of failure detection, it is very convenient that
+all three FLU-related services (file server, sequencer server, and
+projection server) are accessed using the same single TCP port.
+
+=== Projection (data structure) ===
+
+The projection is a data structure that specifies the current state
+of the Machi cluster: all FLUs, which FLUS are considered
+up/running or down/crashed/stopped, which FLUs are actively
+participants in the Chain Replication protocol, and which FLUs are
+under "repair" (i.e., having their data resyncronized when
+newly-added to a cluster or when restarting after a crash).
+
+=== Projection Store (server) ===
+
+The projection store is a storage service that is implemented by an
+Erlang/OTP `gen_server' process that is associated with each
+FLU. Conceptually, the projection store is an array of
+write-once registers. For each projection store register, the
+key is a 2-tuple of an epoch number (`non_neg_integer()' type)
+and a projection type (`public' or `private' type); the value is
+a projection data structure (`projection_v1()' type).
+
+=== Client and Proxy Client ===
+
+Machi is intentionally avoiding using distributed Erlang for Machi's
+communication. This design decision makes Erlang-side code more
+difficult & complex but allows us the freedom of implementing
+parts of Machi in other languages without major
+protocol&API&glue code changes later in the product's
+lifetime.
+
+There are two layers of interface for Machi clients.
+
+
+ - The `machi_flu1_client' module implements an API that uses a
+ TCP socket directly.
+
+ - The `machi_proxy_flu1_client' module implements an API that
+ uses a local, long-lived `gen_server' process as a proxy for
+ the remote, perhaps disconnected-or-crashed Machi FLU server.
+
+
+
+The types for both modules ought to be the same. However, due to
+rapid code churn, some differences might exist. Any major difference
+is (almost by definition) a bug: please open a GitHub issue to request
+a correction.
+
+== TODO notes ==
+
+Any use of the string "TODO" in upper/lower/mixed case, anywhere in
+the code, is a reminder signal of unfinished work.
+
+== Pointers to Other Machi Documentation ==
+
+
+- If you are viewing this document locally, please look in the
+ `../doc/' directory,
+
+- If you are viewing this document via the Web, please find the
+ documentation via this link:
+ [http://github.com/basho/machi/tree/master/doc/]
+ Please be aware that this link points to the `master' branch
+ of the Machi source repository and therefore may be
+ out-of-sync with non-`master' branch code.
+
+
+
diff --git a/include/machi_chain_manager.hrl b/include/machi_chain_manager.hrl
new file mode 100644
index 0000000..9382fa6
--- /dev/null
+++ b/include/machi_chain_manager.hrl
@@ -0,0 +1,41 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-include("machi_projection.hrl").
+
+-define(NOT_FLAPPING, {0,0,0}).
+
+-type projection() :: #projection_v1{}.
+
+-record(ch_mgr, {
+ name :: pv1_server(),
+ flap_limit :: non_neg_integer(),
+ proj :: projection(),
+ %%
+ timer :: 'undefined' | timer:tref(),
+ proj_history :: queue:queue(),
+ flaps=0 :: integer(),
+ flap_start = ?NOT_FLAPPING
+ :: erlang:timestamp(),
+ runenv :: list(), %proplist()
+ opts :: list(), %proplist()
+ members_dict :: p_srvr_dict(),
+ proxies_dict :: orddict:orddict()
+ }).
diff --git a/include/machi_projection.hrl b/include/machi_projection.hrl
index 2e35aed..59baf03 100644
--- a/include/machi_projection.hrl
+++ b/include/machi_projection.hrl
@@ -18,30 +18,15 @@
%%
%% -------------------------------------------------------------------
+-ifndef(MACHI_PROJECTION_HRL).
+-define(MACHI_PROJECTION_HRL, true).
+
-type pv1_csum() :: binary().
-type pv1_epoch() :: {pv1_epoch_n(), pv1_csum()}.
-type pv1_epoch_n() :: non_neg_integer().
-type pv1_server() :: atom() | binary().
-type pv1_timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
--define(DUMMY_PV1_EPOCH, {0,<<0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0>>}).
-
--record(projection_v1, {
- epoch_number :: pv1_epoch_n(),
- epoch_csum :: pv1_csum(),
- all_members :: [pv1_server()],
- member_dict :: orddict:orddict(),
- down :: [pv1_server()],
- creation_time :: pv1_timestamp(),
- author_server :: pv1_server(),
- upi :: [pv1_server()],
- repairing :: [pv1_server()],
- dbg :: list(), %proplist(), is checksummed
- dbg2 :: list() %proplist(), is not checksummed
- }).
-
--define(MACHI_DEFAULT_TCP_PORT, 50000).
-
-record(p_srvr, {
name :: pv1_server(),
proto = 'ipv4' :: 'ipv4' | 'disterl', % disterl? Hrm.
@@ -50,4 +35,27 @@
props = [] :: list() % proplist for other related info
}).
+-type p_srvr() :: #p_srvr{}.
+-type p_srvr_dict() :: orddict:orddict().
+
+-define(DUMMY_PV1_EPOCH, {0,<<0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0>>}).
+
+-record(projection_v1, {
+ epoch_number :: pv1_epoch_n(),
+ epoch_csum :: pv1_csum(),
+ author_server :: pv1_server(),
+ creation_time :: pv1_timestamp(),
+ all_members :: [pv1_server()],
+ down :: [pv1_server()],
+ upi :: [pv1_server()],
+ repairing :: [pv1_server()],
+ dbg :: list(), %proplist(), is checksummed
+ dbg2 :: list(), %proplist(), is not checksummed
+ members_dict :: p_srvr_dict()
+ }).
+
+-define(MACHI_DEFAULT_TCP_PORT, 50000).
+
-define(SHA_MAX, (1 bsl (20*8))).
+
+-endif. % !MACHI_PROJECTION_HRL
diff --git a/rebar b/rebar
index 03c9be6..146c9aa 100755
Binary files a/rebar and b/rebar differ
diff --git a/rebar.config b/rebar.config
index 5b3cfa2..88f9c3d 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,5 +1,8 @@
+{require_otp_vsn, "17"}.
+
%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
+{edoc_opts, [{dir, "./edoc"}]}.
{deps, [
{lager, ".*", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}}
diff --git a/src/machi_admin_util.erl b/src/machi_admin_util.erl
index 990d948..f0db9d0 100644
--- a/src/machi_admin_util.erl
+++ b/src/machi_admin_util.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Machi chain replication administration utilities.
+
-module(machi_admin_util).
%% TODO Move these types to a common header file? (also machi_flu1_client.erl?)
@@ -114,7 +116,7 @@ verify_chunk_checksum(File, ReadChunk) ->
fun({Offset, Size, CSum}, Acc) ->
case ReadChunk(File, Offset, Size) of
{ok, Chunk} ->
- CSum2 = machi_util:checksum(Chunk),
+ CSum2 = machi_util:checksum_chunk(Chunk),
if CSum == CSum2 ->
Acc;
true ->
diff --git a/src/machi_app.erl b/src/machi_app.erl
index 6dfddf7..2701f60 100644
--- a/src/machi_app.erl
+++ b/src/machi_app.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Top-level supervisor for the Machi application.
+
-module(machi_app).
-behaviour(application).
diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl
new file mode 100644
index 0000000..90ca727
--- /dev/null
+++ b/src/machi_chain_manager1.erl
@@ -0,0 +1,1609 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% @doc The Machi chain manager, Guardian of all things related to
+%% Chain Replication state, status, and data replica safety.
+%%
+%% The Chain Manager is responsible for managing the state of Machi's
+%% "Chain Replication" state. This role is roughly analogous to the
+%% "Riak Core" application inside of Riak, which takes care of
+%% coordinating replica placement and replica repair.
+%%
+%% For each primitive data server in the cluster, a Machi FLU, there
+%% is a Chain Manager process that manages its FLU's role within the
+%% Machi cluster's Chain Replication scheme. Each Chain Manager
+%% process executes locally and independently to manage the
+%% distributed state of a single Machi Chain Replication chain.
+%%
+%% Machi's Chain Manager process performs similar tasks as Riak Core's
+%% claimant. However, Machi has several active Chain Manager
+%% processes, one per FLU server, instead of a single active process
+%% like Core's claimant. Each Chain Manager process acts
+%% independently; each is constrained so that it will reach consensus
+%% via independent computation & action.
+
+-module(machi_chain_manager1).
+
+%% TODO: I am going to sever the connection between the flowchart and the
+%% code. That diagram is really valuable, but it also takes a long time
+%% to make any kind of edit; the process is too slow. This is a todo
+%% item a reminder that the flowchart is important documentation and
+%% must be brought back into sync with the code soon.
+
+-behaviour(gen_server).
+
+-include("machi_projection.hrl").
+-include("machi_chain_manager.hrl").
+
+-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
+-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+
+-define(FLU_C, machi_flu1_client).
+-define(FLU_PC, machi_proxy_flu1_client).
+-define(TO, (2*1000)). % default timeout
+
+%% Keep a history of our flowchart execution in the process dictionary.
+-define(REACT(T), put(react, [T|get(react)])).
+
+%% API
+-export([start_link/2, start_link/3, stop/1, ping/1]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([projection_transitions_are_sane/2]).
+
+-ifdef(TEST).
+
+-export([test_calc_projection/2,
+ test_write_public_projection/2,
+ test_read_latest_public_projection/2,
+ test_set_active/2,
+ test_react_to_env/1,
+ get_all_hosed/1]).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-endif.
+-ifdef(PULSE).
+-compile({parse_transform, pulse_instrument}).
+-endif.
+
+-include_lib("eunit/include/eunit.hrl").
+-compile(export_all).
+-endif. %TEST
+
+start_link(MyName, MembersDict) ->
+ start_link(MyName, MembersDict, []).
+
+start_link(MyName, MembersDict, MgrOpts) ->
+ gen_server:start_link(?MODULE, {MyName, MembersDict, MgrOpts}, []).
+
+stop(Pid) ->
+ gen_server:call(Pid, {stop}, infinity).
+
+ping(Pid) ->
+ gen_server:call(Pid, {ping}, infinity).
+
+-ifdef(TEST).
+
+%% Test/debugging code only.
+
+test_write_public_projection(Pid, Proj) ->
+ gen_server:call(Pid, {test_write_public_projection, Proj}, infinity).
+
+%% Calculate a projection and return it to us.
+%% If KeepRunenvP is true, the server will retain its change in its
+%% runtime environment, e.g., changes in simulated network partitions.
+test_calc_projection(Pid, KeepRunenvP) ->
+ gen_server:call(Pid, {test_calc_projection, KeepRunenvP}, infinity).
+
+test_read_latest_public_projection(Pid, ReadRepairP) ->
+ gen_server:call(Pid, {test_read_latest_public_projection, ReadRepairP},
+ infinity).
+
+test_set_active(Pid, Boolean) when Boolean == true; Boolean == false ->
+ gen_server:call(Pid, {test_set_active, Boolean}, infinity).
+
+test_react_to_env(Pid) ->
+ gen_server:call(Pid, {test_react_to_env}, infinity).
+
+-endif. % TEST
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init({MyName, MembersDict, MgrOpts}) ->
+ All_list = [P#p_srvr.name || {_, P} <- orddict:to_list(MembersDict)],
+ Opt = fun(Key, Default) -> proplists:get_value(Key, MgrOpts, Default) end,
+ RunEnv = [{seed, Opt(seed, now())},
+ {network_partitions, Opt(network_partitions, [])},
+ {network_islands, Opt(network_islands, [])},
+ {flapping_i, Opt(flapping, [])},
+ {up_nodes, Opt(up_nodes, not_init_yet)}],
+ ActiveP = Opt(active_mode, true),
+ Down_list = All_list -- [MyName],
+ UPI_list = [MyName],
+ NoneProj = machi_projection:new(MyName, MembersDict,
+ Down_list, UPI_list, [], []),
+ Proxies = orddict:fold(
+ fun(K, P, Acc) ->
+ {ok, Pid} = ?FLU_PC:start_link(P),
+ [{K, Pid}|Acc]
+ end, [], MembersDict),
+ S = #ch_mgr{name=MyName,
+ %% TODO 2015-03-04: revisit, should this constant be bigger?
+ %% Yes, this should be bigger, but it's a hack. There is
+ %% no guarantee that all parties will advance to a minimum
+ %% flap awareness in the amount of time that this mgr will.
+ flap_limit=length(All_list) + 50,
+ proj=NoneProj,
+ timer='undefined',
+ proj_history=queue:new(),
+ runenv=RunEnv,
+ opts=MgrOpts,
+ members_dict=MembersDict,
+ proxies_dict=orddict:from_list(Proxies)},
+ S2 = if ActiveP == false ->
+ S;
+ ActiveP == true ->
+ set_active_timer(S)
+ end,
+ {ok, S2}.
+
+handle_call({ping}, _From, S) ->
+ {reply, pong, S};
+handle_call({stop}, _From, S) ->
+ {stop, normal, ok, S};
+handle_call({test_calc_projection, KeepRunenvP}, _From,
+ #ch_mgr{name=MyName}=S) ->
+ RelativeToServer = MyName,
+ {P, S2} = calc_projection(S, RelativeToServer),
+ {reply, {ok, P}, if KeepRunenvP -> S2;
+ true -> S
+ end};
+handle_call({test_write_public_projection, Proj}, _From, S) ->
+ {Res, S2} = do_cl_write_public_proj(Proj, S),
+ {reply, Res, S2};
+handle_call({test_read_latest_public_projection, ReadRepairP}, _From, S) ->
+ {Perhaps, Val, ExtraInfo, S2} =
+ do_cl_read_latest_public_projection(ReadRepairP, S),
+ Res = {Perhaps, Val, ExtraInfo},
+ {reply, Res, S2};
+handle_call({test_set_active, Boolean}, _From, #ch_mgr{timer=TRef}=S) ->
+ case {Boolean, TRef} of
+ {true, undefined} ->
+ S2 = set_active_timer(S),
+ {reply, ok, S2};
+ {false, _} ->
+ (catch timer:cancel(TRef)),
+ {reply, ok, S#ch_mgr{timer=undefined}};
+ _ ->
+ {reply, error, S}
+ end;
+handle_call({test_react_to_env}, _From, S) ->
+ {TODOtodo, S2} = do_react_to_env(S),
+ {reply, TODOtodo, S2};
+handle_call(_Call, _From, S) ->
+ {reply, whaaaaaaaaaa, S}.
+
+handle_cast(_Cast, S) ->
+ ?D({cast_whaaaaaaaaaaa, _Cast}),
+ {noreply, S}.
+
+handle_info(Msg, S) ->
+ exit({bummer, Msg}),
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+set_active_timer(#ch_mgr{name=MyName, members_dict=MembersDict}=S) ->
+ FLU_list = [P#p_srvr.name || {_,P} <- orddict:to_list(MembersDict)],
+ USec = calc_sleep_ranked_order(1000, 2000, MyName, FLU_list),
+ {ok, TRef} = timer:send_interval(USec, yo_yo_yo),
+ S#ch_mgr{timer=TRef}.
+
+do_cl_write_public_proj(Proj, S) ->
+ #projection_v1{epoch_number=Epoch} = Proj,
+ cl_write_public_proj(Epoch, Proj, S).
+
+cl_write_public_proj(Epoch, Proj, S) ->
+ cl_write_public_proj(Epoch, Proj, false, S).
+
+cl_write_public_proj_skip_local_error(Epoch, Proj, S) ->
+ cl_write_public_proj(Epoch, Proj, true, S).
+
+cl_write_public_proj(Epoch, Proj, SkipLocalWriteErrorP, S) ->
+ %% Write to local public projection store first, and if it succeeds,
+ %% then write to all remote public projection stores.
+ cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP, S).
+
+cl_write_public_proj_local(Epoch, Proj, SkipLocalWriteErrorP,
+ #ch_mgr{name=MyName}=S) ->
+ {_UpNodes, Partitions, S2} = calc_up_nodes(S),
+ Res0 = perhaps_call_t(
+ S, Partitions, MyName,
+ fun(Pid) -> ?FLU_PC:write_projection(Pid, public, Proj, ?TO) end),
+ Continue = fun() ->
+ FLUs = Proj#projection_v1.all_members -- [MyName],
+ cl_write_public_proj_remote(FLUs, Partitions, Epoch, Proj, S)
+ end,
+ case Res0 of
+ ok ->
+ {XX, SS} = Continue(),
+ {{local_write_result, ok, XX}, SS};
+ Else when SkipLocalWriteErrorP ->
+ {XX, SS} = Continue(),
+ {{local_write_result, Else, XX}, SS};
+ Else when Else == error_written; Else == timeout; Else == t_timeout ->
+ {Else, S2}
+ end.
+
+cl_write_public_proj_remote(FLUs, Partitions, _Epoch, Proj, S) ->
+ %% We're going to be very care-free about this write because we'll rely
+ %% on the read side to do any read repair.
+ DoIt = fun(Pid) -> ?FLU_PC:write_projection(Pid, public, Proj, ?TO) end,
+ Rs = [{FLU, perhaps_call_t(S, Partitions, FLU, fun(Pid) -> DoIt(Pid) end)} ||
+ FLU <- FLUs],
+ {{remote_write_results, Rs}, S}.
+
+do_cl_read_latest_public_projection(ReadRepairP,
+ #ch_mgr{proj=Proj1} = S) ->
+ _Epoch1 = Proj1#projection_v1.epoch_number,
+ case cl_read_latest_projection(public, S) of
+ {needs_repair, FLUsRs, Extra, S3} ->
+ if not ReadRepairP ->
+ {not_unanimous, todoxyz, [{results, FLUsRs}|Extra], S3};
+ true ->
+ {_Status, S4} = do_read_repair(FLUsRs, Extra, S3),
+ do_cl_read_latest_public_projection(ReadRepairP, S4)
+ end;
+ {UnanimousTag, Proj2, Extra, S3}=_Else ->
+ {UnanimousTag, Proj2, Extra, S3}
+ end.
+
+read_latest_projection_call_only(ProjectionType, AllHosed,
+ #ch_mgr{proj=CurrentProj}=S) ->
+ #projection_v1{all_members=All_list} = CurrentProj,
+ All_queried_list = All_list -- AllHosed,
+
+ {_UpNodes, Partitions, S2} = calc_up_nodes(S),
+ DoIt = fun(Pid) ->
+ case ?FLU_PC:read_latest_projection(Pid, ProjectionType, ?TO) of
+ {ok, P} -> P;
+ Else -> Else
+ end
+ end,
+ Rs = [perhaps_call_t(S, Partitions, FLU, fun(Pid) -> DoIt(Pid) end) ||
+ FLU <- All_queried_list],
+ FLUsRs = lists:zip(All_queried_list, Rs),
+ {All_queried_list, FLUsRs, S2}.
+
+cl_read_latest_projection(ProjectionType, S) ->
+ AllHosed = [],
+ cl_read_latest_projection(ProjectionType, AllHosed, S).
+
+cl_read_latest_projection(ProjectionType, AllHosed, S) ->
+ {All_queried_list, FLUsRs, S2} =
+ read_latest_projection_call_only(ProjectionType, AllHosed, S),
+
+ rank_and_sort_projections_with_extra(All_queried_list, FLUsRs, S2).
+
+rank_and_sort_projections_with_extra(All_queried_list, FLUsRs,
+ #ch_mgr{proj=CurrentProj}=S) ->
+ UnwrittenRs = [x || {_, error_unwritten} <- FLUsRs],
+ Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection_v1)],
+ BadAnswerFLUs = [FLU || {FLU, Answer} <- FLUsRs,
+ not is_record(Answer, projection_v1)],
+
+ if All_queried_list == []
+ orelse
+ length(UnwrittenRs) == length(FLUsRs) ->
+ {error_unwritten, FLUsRs, [todo_fix_caller_perhaps], S};
+ UnwrittenRs /= [] ->
+ {needs_repair, FLUsRs, [flarfus], S};
+ true ->
+ [{_Rank, BestProj}|_] = rank_and_sort_projections(Ps, CurrentProj),
+ NotBestPs = [Proj || Proj <- Ps, Proj /= BestProj],
+ UnanimousTag = if NotBestPs == [] -> unanimous;
+ true -> not_unanimous
+ end,
+ Extra = [{all_members_replied, length(FLUsRs) == length(All_queried_list)}],
+ Best_FLUs = [FLU || {FLU, Projx} <- FLUsRs, Projx == BestProj],
+ TransAllHosed = lists:usort(
+ lists:flatten([get_all_hosed(P) || P <- Ps])),
+ AllFlapCounts = merge_flap_counts([get_all_flap_counts(P) ||
+ P <- Ps]),
+ Extra2 = [{all_queried_list, All_queried_list},
+ {flus_rs, FLUsRs},
+ {unanimous_flus,Best_FLUs},
+ {not_unanimous_flus, All_queried_list --
+ (Best_FLUs ++ BadAnswerFLUs)},
+ {bad_answer_flus, BadAnswerFLUs},
+ {not_unanimous_answers, NotBestPs},
+ {trans_all_hosed, TransAllHosed},
+ {trans_all_flap_counts, AllFlapCounts}|Extra],
+ {UnanimousTag, BestProj, Extra2, S}
+ end.
+
+do_read_repair(FLUsRs, _Extra, #ch_mgr{proj=CurrentProj} = S) ->
+ Unwrittens = [x || {_FLU, error_unwritten} <- FLUsRs],
+ Ps = [Proj || {_FLU, Proj} <- FLUsRs, is_record(Proj, projection_v1)],
+ if Unwrittens == [] orelse Ps == [] ->
+ {nothing_to_do, S};
+ true ->
+ %% We have at least one unwritten and also at least one proj.
+ %% Pick the best one, then spam it everywhere.
+
+ [{_Rank, BestProj}|_] = rank_and_sort_projections(Ps, CurrentProj),
+ Epoch = BestProj#projection_v1.epoch_number,
+
+ %% We're doing repair, so use the flavor that will
+ %% continue to all others even if there is an
+ %% error_written on the local FLU.
+ {_DontCare, _S2}=Res = cl_write_public_proj_skip_local_error(
+ Epoch, BestProj, S),
+ Res
+ end.
+
+calc_projection(S, RelativeToServer) ->
+ calc_projection(S, RelativeToServer, []).
+
+calc_projection(#ch_mgr{proj=LastProj, runenv=RunEnv} = S,
+ RelativeToServer, AllHosed) ->
+ Dbg = [],
+ OldThreshold = proplists:get_value(old_threshold, RunEnv),
+ NoPartitionThreshold = proplists:get_value(no_partition_threshold, RunEnv),
+ calc_projection(OldThreshold, NoPartitionThreshold, LastProj,
+ RelativeToServer, AllHosed, Dbg, S).
+
+%% OldThreshold: Percent chance of using the old/previous network partition list
+%% NoPartitionThreshold: If the network partition changes, what percent chance
+%% that there are no partitions at all?
+%% AllHosed: FLUs that we must treat as if they are down, e.g., we are
+%% in a flapping situation and wish to ignore FLUs that we
+%% believe are bad-behaving causes of our flapping.
+
+calc_projection(_OldThreshold, _NoPartitionThreshold, LastProj,
+ RelativeToServer, AllHosed, Dbg,
+ #ch_mgr{name=MyName, runenv=RunEnv1}=S) ->
+ #projection_v1{epoch_number=OldEpochNum,
+ members_dict=MembersDict,
+ upi=OldUPI_list,
+ repairing=OldRepairing_list
+ } = LastProj,
+ LastUp = lists:usort(OldUPI_list ++ OldRepairing_list),
+ AllMembers = (S#ch_mgr.proj)#projection_v1.all_members,
+ {Up0, Partitions, RunEnv2} = calc_up_nodes(MyName,
+ AllMembers, RunEnv1),
+ Up = Up0 -- AllHosed,
+
+ NewUp = Up -- LastUp,
+ Down = AllMembers -- Up,
+
+ NewUPI_list = [X || X <- OldUPI_list, lists:member(X, Up)],
+ Repairing_list2 = [X || X <- OldRepairing_list, lists:member(X, Up)],
+ {NewUPI_list3, Repairing_list3, RunEnv3} =
+ case {NewUp, Repairing_list2} of
+ {[], []} ->
+D_foo=[],
+ {NewUPI_list, [], RunEnv2};
+ {[], [H|T]} when RelativeToServer == hd(NewUPI_list) ->
+ %% The author is head of the UPI list. Let's see if
+ %% *everyone* in the UPI+repairing lists are using our
+ %% projection. This is to simulate a requirement that repair
+ %% a real repair process cannot take place until the chain is
+ %% stable, i.e. everyone is in the same epoch.
+
+ %% TODO create a real API call for fetching this info?
+ SameEpoch_p = check_latest_private_projections_same_epoch(
+ tl(NewUPI_list) ++ Repairing_list2,
+ S#ch_mgr.proj, Partitions, S),
+ if not SameEpoch_p ->
+D_foo=[],
+ {NewUPI_list, OldRepairing_list, RunEnv2};
+ true ->
+D_foo=[{repair_airquote_done, {we_agree, (S#ch_mgr.proj)#projection_v1.epoch_number}}],
+ {NewUPI_list ++ [H], T, RunEnv2}
+ end;
+ {_, _} ->
+D_foo=[],
+ {NewUPI_list, OldRepairing_list, RunEnv2}
+ end,
+ Repairing_list4 = case NewUp of
+ [] -> Repairing_list3;
+ NewUp -> Repairing_list3 ++ NewUp
+ end,
+ Repairing_list5 = Repairing_list4 -- Down,
+
+ TentativeUPI = NewUPI_list3,
+ TentativeRepairing = Repairing_list5,
+
+ {NewUPI, NewRepairing} =
+ if TentativeUPI == [] andalso TentativeRepairing /= [] ->
+ [FirstRepairing|TailRepairing] = TentativeRepairing,
+ {[FirstRepairing], TailRepairing};
+ true ->
+ {TentativeUPI, TentativeRepairing}
+ end,
+
+ P = machi_projection:new(OldEpochNum + 1,
+ MyName, MembersDict, Down, NewUPI, NewRepairing,
+ D_foo ++
+ Dbg ++ [{ps, Partitions},{nodes_up, Up}]),
+ {P, S#ch_mgr{runenv=RunEnv3}}.
+
+check_latest_private_projections_same_epoch(FLUs, MyProj, Partitions, S) ->
+ FoldFun = fun(_FLU, false) ->
+ false;
+ (FLU, true) ->
+ F = fun(Pid) ->
+ ?FLU_PC:read_latest_projection(Pid, private, ?TO)
+ end,
+ case perhaps_call_t(S, Partitions, FLU, F) of
+ {ok, RemotePrivateProj} ->
+ if MyProj#projection_v1.epoch_number ==
+ RemotePrivateProj#projection_v1.epoch_number
+ andalso
+ MyProj#projection_v1.epoch_csum ==
+ RemotePrivateProj#projection_v1.epoch_csum ->
+ true;
+ true ->
+ false
+ end;
+ _ ->
+ false
+ end
+ end,
+ lists:foldl(FoldFun, true, FLUs).
+
+calc_up_nodes(#ch_mgr{name=MyName, proj=Proj, runenv=RunEnv1}=S) ->
+ AllMembers = Proj#projection_v1.all_members,
+ {UpNodes, Partitions, RunEnv2} =
+ calc_up_nodes(MyName, AllMembers, RunEnv1),
+ {UpNodes, Partitions, S#ch_mgr{runenv=RunEnv2}}.
+
+calc_up_nodes(MyName, AllMembers, RunEnv1) ->
+ {Partitions2, Islands2} = machi_partition_simulator:get(AllMembers),
+ catch ?REACT({partitions,Partitions2}),
+ catch ?REACT({islands,Islands2}),
+ UpNodes = lists:sort(
+ [Node || Node <- AllMembers,
+ not lists:member({MyName, Node}, Partitions2),
+ not lists:member({Node, MyName}, Partitions2)]),
+ RunEnv2 = replace(RunEnv1,
+ [{network_partitions, Partitions2},
+ {network_islands, Islands2},
+ {up_nodes, UpNodes}]),
+ {UpNodes, Partitions2, RunEnv2}.
+
+replace(PropList, Items) ->
+ proplists:compact(Items ++ PropList).
+
+rank_and_sort_projections([], CurrentProj) ->
+ rank_projections([CurrentProj], CurrentProj);
+rank_and_sort_projections(Ps, CurrentProj) ->
+ Epoch = lists:max([Proj#projection_v1.epoch_number || Proj <- Ps]),
+ MaxPs = [Proj || Proj <- Ps,
+ Proj#projection_v1.epoch_number == Epoch],
+ %% Sort with highest rank first (custom sort)
+ lists:sort(fun({RankA,_}, {RankB,_}) -> RankA > RankB end,
+ rank_projections(MaxPs, CurrentProj)).
+
+%% Caller must ensure all Projs are of the same epoch number.
+%% If the caller gives us projections with different epochs, we assume
+%% that the caller is doing an OK thing.
+
+rank_projections(Projs, CurrentProj) ->
+ #projection_v1{all_members=All_list} = CurrentProj,
+ MemberRank = orddict:from_list(
+ lists:zip(All_list, lists:seq(1, length(All_list)))),
+ N = length(All_list),
+ [{rank_projection(Proj, MemberRank, N), Proj} || Proj <- Projs].
+
+rank_projection(#projection_v1{upi=[]}, _MemberRank, _N) ->
+ -100;
+rank_projection(#projection_v1{author_server=Author,
+ upi=UPI_list,
+ repairing=Repairing_list}, MemberRank, N) ->
+ AuthorRank = orddict:fetch(Author, MemberRank),
+ %% (AuthorRank-AuthorRank) + % feels unstable????
+ AuthorRank + % feels stable
+ ( N * length(Repairing_list)) +
+ (N*N * length(UPI_list)).
+
+do_react_to_env(S) ->
+ put(react, []),
+ react_to_env_A10(S).
+
+react_to_env_A10(S) ->
+ ?REACT(a10),
+ react_to_env_A20(0, S).
+
+react_to_env_A20(Retries, S) ->
+ ?REACT(a20),
+ {UnanimousTag, P_latest, ReadExtra, S2} =
+ do_cl_read_latest_public_projection(true, S),
+
+ %% The UnanimousTag isn't quite sufficient for our needs. We need
+ %% to determine if *all* of the UPI+Repairing FLUs are members of
+ %% the unanimous server replies.
+ UnanimousFLUs = lists:sort(proplists:get_value(unanimous_flus, ReadExtra)),
+ UPI_Repairing_FLUs = lists:sort(P_latest#projection_v1.upi ++
+ P_latest#projection_v1.repairing),
+ All_UPI_Repairing_were_unanimous = UPI_Repairing_FLUs == UnanimousFLUs,
+ %% TODO: investigate if the condition below is more correct?
+ %% All_UPI_Repairing_were_unanimous = (UPI_Repairing_FLUs -- UnanimousFLUs) == [],
+ LatestUnanimousP =
+ if UnanimousTag == unanimous
+ andalso
+ All_UPI_Repairing_were_unanimous ->
+ ?REACT({a20,?LINE}),
+ true;
+ UnanimousTag == unanimous ->
+ ?REACT({a20,?LINE,[{upi_repairing,UPI_Repairing_FLUs},
+ {unanimous,UnanimousFLUs}]}),
+ false;
+ UnanimousTag == not_unanimous ->
+ ?REACT({a20,?LINE}),
+ false;
+ true ->
+ exit({badbad, UnanimousTag})
+ end,
+ react_to_env_A30(Retries, P_latest, LatestUnanimousP, ReadExtra, S2).
+
+react_to_env_A30(Retries, P_latest, LatestUnanimousP, _ReadExtra,
+ #ch_mgr{name=MyName, proj=P_current,
+ flap_limit=FlapLimit} = S) ->
+ ?REACT(a30),
+ {P_newprop1, S2} = calc_projection(S, MyName),
+ ?REACT({a30, ?LINE, [{newprop1, machi_projection:make_summary(P_newprop1)}]}),
+
+ %% Are we flapping yet?
+ {P_newprop2, S3} = calculate_flaps(P_newprop1, P_current, FlapLimit, S2),
+
+ %% Move the epoch number up ... originally done in C300.
+ #projection_v1{epoch_number=Epoch_newprop2}=P_newprop2,
+ #projection_v1{epoch_number=Epoch_latest}=P_latest,
+ NewEpoch = erlang:max(Epoch_newprop2, Epoch_latest) + 1,
+ P_newprop3 = P_newprop2#projection_v1{epoch_number=NewEpoch},
+ ?REACT({a30, ?LINE, [{newprop3, machi_projection:make_summary(P_newprop3)}]}),
+
+ {P_newprop10, S10} =
+ case get_flap_count(P_newprop3) of
+ {_, P_newprop3_flap_count} when P_newprop3_flap_count >= FlapLimit ->
+ AllHosed = get_all_hosed(S3),
+ {P_i, S_i} = calc_projection(S3, MyName, AllHosed),
+ P_inner = case lists:member(MyName, AllHosed) of
+ false ->
+ P_i;
+ true ->
+ P_i#projection_v1{
+ upi=[MyName],
+ repairing=[],
+ down=P_i#projection_v1.all_members
+ -- [MyName]}
+ end,
+
+ FinalInnerEpoch =
+ case inner_projection_exists(P_current) of
+ false ->
+ AllFlapCounts_epk =
+ [Epk || {{Epk,_FlTime}, _FlCount} <-
+ get_all_flap_counts(P_newprop3)],
+ case AllFlapCounts_epk of
+ [] ->
+ %% HRM, distrust?...
+ %% P_newprop3#projection_v1.epoch_number;
+ P_newprop3#projection_v1.epoch_number;
+ [_|_] ->
+ lists:max(AllFlapCounts_epk)
+ end;
+ true ->
+ P_oldinner = inner_projection_or_self(P_current),
+ if P_oldinner#projection_v1.upi ==
+ P_inner#projection_v1.upi
+ andalso
+ P_oldinner#projection_v1.repairing ==
+ P_inner#projection_v1.repairing
+ andalso
+ P_oldinner#projection_v1.down ==
+ P_inner#projection_v1.down ->
+ %% HRM, distrust?...
+ %% P_oldinner#projection_v1.epoch_number;
+ P_oldinner#projection_v1.epoch_number + 1;
+ true ->
+ P_oldinner#projection_v1.epoch_number + 1
+ end
+ end,
+
+ P_inner2 = P_inner#projection_v1{epoch_number=FinalInnerEpoch},
+ InnerInfo = [{inner_summary, machi_projection:make_summary(P_inner2)},
+ {inner_projection, P_inner2}],
+ DbgX = replace(P_newprop3#projection_v1.dbg, InnerInfo),
+ ?REACT({a30, ?LINE, [qqqwww|DbgX]}),
+ {P_newprop3#projection_v1{dbg=DbgX}, S_i};
+ _ ->
+ {P_newprop3, S3}
+ end,
+
+ react_to_env_A40(Retries, P_newprop10, P_latest,
+ LatestUnanimousP, S10).
+
+react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP,
+ #ch_mgr{name=MyName, proj=P_current}=S) ->
+ ?REACT(a40),
+ [{Rank_newprop, _}] = rank_projections([P_newprop], P_current),
+ [{Rank_latest, _}] = rank_projections([P_latest], P_current),
+ LatestAuthorDownP = lists:member(P_latest#projection_v1.author_server,
+ P_newprop#projection_v1.down),
+
+ if
+ %% Epoch == 0 is reserved for first-time, just booting conditions.
+ (P_current#projection_v1.epoch_number > 0
+ andalso
+ P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number)
+ orelse
+ not LatestUnanimousP ->
+ ?REACT({a40, ?LINE,
+ [{latest_epoch, P_latest#projection_v1.epoch_number},
+ {current_epoch, P_current#projection_v1.epoch_number},
+ {latest_unanimous_p, LatestUnanimousP}]}),
+
+ %% 1st clause: someone else has written a newer projection
+ %% 2nd clause: a network partition has healed, revealing a
+ %% differing opinion.
+ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest, S);
+
+ P_latest#projection_v1.epoch_number < P_current#projection_v1.epoch_number
+ orelse
+ P_latest /= P_current ->
+ ?REACT({a40, ?LINE,
+ [{latest_epoch, P_latest#projection_v1.epoch_number},
+ {current_epoch, P_current#projection_v1.epoch_number},
+ {neq, P_latest /= P_current}]}),
+
+ %% Both of these cases are rare. Elsewhere, the code
+ %% assumes that the local FLU's projection store is always
+ %% available, so reads & writes to it aren't going to fail
+ %% willy-nilly. If that assumption is true, then we can
+ %% reason as follows:
+ %%
+ %% a. If we can always read from the local FLU projection
+ %% store, then the 1st clause isn't possible because
+ %% P_latest's epoch # must be at least as large as
+ %% P_current's epoch #
+ %%
+ %% b. If P_latest /= P_current, then there can't be a
+ %% unanimous reply for P_latest, so the earlier 'if'
+ %% clause would be triggered and so we could never reach
+ %% this clause.
+ %%
+ %% I'm keeping this 'if' clause just in case the local FLU
+ %% projection store assumption changes.
+ react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest, S);
+
+ %% A40a (see flowchart)
+ Rank_newprop > Rank_latest ->
+ ?REACT({b10, ?LINE,
+ [{rank_latest, Rank_latest},
+ {rank_newprop, Rank_newprop},
+ {latest_author, P_latest#projection_v1.author_server}]}),
+
+ %% TODO: There may be an "improvement" here. If we're the
+ %% highest-ranking FLU in the all_members list, then if we make a
+ %% projection where our UPI list is the same as P_latest's, and
+ %% our repairing list is the same as P_latest's, then it may not
+ %% be necessary to write our projection: it doesn't "improve"
+ %% anything UPI-wise or repairing-wise. But it isn't clear to me
+ %% if it's 100% correct to "improve" here and skip writing
+ %% P_newprop, yet.
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ %% A40b (see flowchart)
+ P_latest#projection_v1.author_server == MyName
+ andalso
+ (P_newprop#projection_v1.upi /= P_latest#projection_v1.upi
+ orelse
+ P_newprop#projection_v1.repairing /= P_latest#projection_v1.repairing) ->
+ ?REACT({a40, ?LINE,
+ [{latest_author, P_latest#projection_v1.author_server},
+ {newprop_upi, P_newprop#projection_v1.upi},
+ {latest_upi, P_latest#projection_v1.upi},
+ {newprop_repairing, P_newprop#projection_v1.repairing},
+ {latest_repairing, P_latest#projection_v1.repairing}]}),
+
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ %% A40c (see flowchart)
+ LatestAuthorDownP ->
+ ?REACT({a40, ?LINE,
+ [{latest_author, P_latest#projection_v1.author_server},
+ {author_is_down_p, LatestAuthorDownP}]}),
+
+ %% TODO: I believe that membership in the
+ %% P_newprop#projection_v1.down is not sufficient for long
+ %% chains. Rather, we ought to be using a full broadcast
+ %% gossip of server up status.
+ %%
+ %% Imagine 5 servers in an "Olympic Rings" style
+ %% overlapping network paritition, where ring1 = upper
+ %% leftmost and ring5 = upper rightmost. It's both
+ %% possible and desirable for ring5's projection to be
+ %% seen (public) by ring1. Ring5's projection's rank is
+ %% definitely higher than ring1's proposed projection's
+ %% rank ... but we're in a crazy netsplit where:
+ %% * if we accept ring5's proj: only one functioning chain
+ %% ([ring4,ring5] but stable
+ %% * if we accept ring1's proj: two functioning chains
+ %% ([ring1,ring2] and [ring4,ring5] indepependently)
+ %% but unstable: we're probably going to flap back & forth?!
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ true ->
+ ?REACT({a40, ?LINE, [true]}),
+
+ FinalProps = [{throttle_seconds, 0}],
+ react_to_env_A50(P_latest, FinalProps, S)
+ end.
+
+react_to_env_A50(P_latest, FinalProps, S) ->
+ ?REACT(a50),
+
+ _HH = get(react),
+% io:format(user, "HEE50s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- _HH, is_atom(X)])]),
+ %% io:format(user, "HEE50 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(_HH)]),
+
+ ?REACT({a50, ?LINE, [{latest_epoch, P_latest#projection_v1.epoch_number},
+ {final_props, FinalProps}]}),
+ {{no_change, FinalProps, P_latest#projection_v1.epoch_number}, S}.
+
+react_to_env_B10(Retries, P_newprop, P_latest, LatestUnanimousP,
+ Rank_newprop, Rank_latest,
+ #ch_mgr{name=MyName, flap_limit=FlapLimit}=S)->
+ ?REACT(b10),
+
+ {_P_newprop_flap_time, P_newprop_flap_count} = get_flap_count(P_newprop),
+ _LatestAllFlapCounts = get_all_flap_counts_counts(P_latest),
+ %% Transitive flap counts are buggy: the current method to observe
+ %% them is **buggy**.
+ %% P_latest_trans_flap_count = my_find_minmost(LatestAllFlapCounts),
+
+ if
+ LatestUnanimousP ->
+ ?REACT({b10, ?LINE, [{latest_unanimous_p, LatestUnanimousP}]}),
+ put(b10_hack, false),
+
+ react_to_env_C100(P_newprop, P_latest, S);
+
+ P_newprop_flap_count >= FlapLimit ->
+ %% I am flapping ... what else do I do?
+ ?REACT({b10, ?LINE, [i_am_flapping,
+ {newprop_flap_count, P_newprop_flap_count},
+ %% {latest_trans_flap_count, P_latest_trans_flap_count},
+ {flap_limit, FlapLimit}]}),
+ _B10Hack = get(b10_hack),
+ %% if _B10Hack == false andalso P_newprop_flap_count - FlapLimit - 3 =< 0 -> io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]), put(b10_hack, true); true -> ok end,
+ io:format(user, "{FLAP: ~w flaps ~w}!\n", [S#ch_mgr.name, P_newprop_flap_count]),
+
+%io:format(user, "FLAP: ~w flapz ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- get(react), is_atom(X)])]),
+
+ if
+ %% So, if we noticed a flap count by some FLU X with a
+ %% count below FlapLimit, then X crashes so that X's
+ %% flap count remains below FlapLimit, then we could get
+ %% stuck forever? Hrm, except that 'crashes' ought to be
+ %% detected by our own failure detector and get us out of
+ %% this current flapping situation, right? TODO
+ %%
+ %% 2015-04-10: TODO Flap count detection, as it has
+ %% been attempted before now, is buggy.
+ %%
+ %% MEANWHILE, we have learned some things about this
+ %% algorithm in the past few months. With the introduction
+ %% of the "inner projection" concept, we know that the inner
+ %% projection may be stable but the "outer" projection will
+ %% continue to be flappy for as long as there's an
+ %% asymmetric network partition somewhere. We now know that
+ %% that flappiness is OK and that the only problem with it
+ %% is that it needs to be slowed down so that we don't have
+ %% zillions of public projection proposals written every
+ %% second.
+ %%
+ %% It doesn't matter if the FlapLimit count mechanism
+ %% doesn't give an accurate sense of global flapping state.
+ %% FlapLimit is enough to be able to tell us to slow down.
+
+ true -> %% P_latest_trans_flap_count >= FlapLimit ->
+ %% We already know that I'm flapping. We need to
+ %% signal to the rest of the world that I'm writing
+ %% and flapping and churning, so we cannot always
+ %% go to A50 from here.
+ %%
+ %% If we do go to A50, then recommend that we poll less
+ %% frequently.
+ {X, S2} = gimme_random_uniform(100, S),
+ if X < 80 ->
+ ?REACT({b10, ?LINE, [flap_stop]}),
+ ThrottleTime = if FlapLimit < 500 -> 1;
+ FlapLimit < 1000 -> 5;
+ FlapLimit < 5000 -> 10;
+ true -> 30
+ end,
+ FinalProps = [{my_flap_limit, FlapLimit},
+ {throttle_seconds, ThrottleTime}],
+io:format(user, "<--x=~w-.--~w-~w-~w->", [X, MyName, P_newprop_flap_count,FlapLimit]),
+ react_to_env_A50(P_latest, FinalProps, S2);
+ true ->
+ %% It is our moral imperative to write so that
+ %% the flap cycle continues enough times so that
+ %% everyone notices then eventually falls into
+ %% consensus.
+ ?REACT({b10, ?LINE, [flap_continue]}),
+io:format(user, "<--x=~w-oooo-~w-~w-~w->", [X, MyName, P_newprop_flap_count,FlapLimit]),
+ react_to_env_C300(P_newprop, P_latest, S2)
+ end
+ end;
+
+ Retries > 2 ->
+ ?REACT({b10, ?LINE, [{retries, Retries}]}),
+ put(b10_hack, false),
+
+ %% The author of P_latest is too slow or crashed.
+ %% Let's try to write P_newprop and see what happens!
+ react_to_env_C300(P_newprop, P_latest, S);
+
+ Rank_latest >= Rank_newprop
+ andalso
+ P_latest#projection_v1.author_server /= MyName ->
+ ?REACT({b10, ?LINE,
+ [{rank_latest, Rank_latest},
+ {rank_newprop, Rank_newprop},
+ {latest_author, P_latest#projection_v1.author_server}]}),
+ put(b10_hack, false),
+
+ %% Give the author of P_latest an opportunite to write a
+ %% new projection in a new epoch to resolve this mixed
+ %% opinion.
+ react_to_env_C200(Retries, P_latest, S);
+
+ true ->
+ ?REACT({b10, ?LINE}),
+ put(b10_hack, false),
+
+ %% P_newprop is best, so let's write it.
+ react_to_env_C300(P_newprop, P_latest, S)
+ end.
+
+react_to_env_C100(P_newprop, P_latest,
+ #ch_mgr{name=MyName, proj=P_current}=S) ->
+ ?REACT(c100),
+
+ %% TODO 2015-04-10
+ %% OK, well, we need to be checking sanity on inner projections here,
+ %% but how to do it is still a bit of a mystery.
+ %%
+ %% If the *Y bindings are identical to incoming args, then we aren't
+ %% checking at all. That's bad, but we don't go into Infinite Loops of
+ %% ReallyReallyBad.
+
+ P_newpropY = P_newprop,
+ P_latestY = P_latest,
+ P_currentY = P_current,
+ %% P_newpropY = inner_projection_or_self(P_newprop),
+ %% P_latestY = inner_projection_or_self(P_latest),
+ %% P_currentY = inner_projection_or_self(P_current),
+
+ I_am_UPI_in_newprop_p = lists:member(MyName, P_newprop#projection_v1.upi),
+ I_am_Repairing_in_latest_p = lists:member(MyName,
+ P_latest#projection_v1.repairing),
+ ShortCircuit_p =
+ P_latest#projection_v1.epoch_number > P_current#projection_v1.epoch_number
+ andalso
+ I_am_UPI_in_newprop_p
+ andalso
+ I_am_Repairing_in_latest_p,
+
+ Current_sane_p = projection_transition_is_sane(P_current, P_latest,
+ MyName),
+ Inner_sane_p =
+ if P_currentY == P_current, P_latestY == P_latest ->
+ true;
+ true ->
+ projection_transition_is_sane(P_currentY, P_latestY, MyName)
+ end,
+
+ case {ShortCircuit_p, Current_sane_p} of
+ _ when P_current#projection_v1.epoch_number == 0 ->
+ %% Epoch == 0 is reserved for first-time, just booting conditions.
+ ?REACT({c100, ?LINE, [first_write]}),
+ react_to_env_C110(P_latest, S);
+ {true, _} ->
+ %% Someone else believes that I am repairing. We assume
+ %% that nobody is being Byzantine, so we'll believe that I
+ %% am/should be repairing. We ignore our proposal and try
+ %% to go with the latest.
+ ?REACT({c100, ?LINE, [repairing_short_circuit]}),
+ if Inner_sane_p == false -> io:format(user, "QQQ line ~p false\n", [?LINE]), timer:sleep(500); true -> ok end,
+ react_to_env_C110(P_latest, S);
+ {_, true} when Inner_sane_p ->
+ ?REACT({c100, ?LINE, [sane]}),
+ if Inner_sane_p == false -> io:format(user, "QQQ line ~p false\n", [?LINE]), timer:sleep(500); true -> ok end,
+ react_to_env_C110(P_latest, S);
+ {_, _AnyOtherReturnValue} ->
+ %% P_latest is not sane or else P_latestY is not sane.
+ %% By process of elimination, P_newprop is best,
+ %% so let's write it.
+io:format(user, "\nUrp: ~p ~p ~p ~p\n", [MyName, ShortCircuit_p, _AnyOtherReturnValue, Inner_sane_p]),
+io:format(user, "c100 P_newprop : ~w\n", [machi_projection:make_summary(P_newprop)]),
+io:format(user, "c100 P_newpropY: ~w\n", [machi_projection:make_summary(P_newpropY)]),
+io:format(user, "c100 P_latest : ~w\n", [machi_projection:make_summary(P_latest)]),
+io:format(user, "c100 P_latestY: ~w\n", [machi_projection:make_summary(P_latestY)]),
+ ?REACT({c100, ?LINE, [not_sane]}),
+ react_to_env_C300(P_newprop, P_latest, S)
+ end.
+
+react_to_env_C110(P_latest, #ch_mgr{name=MyName} = S) ->
+ ?REACT(c110),
+ %% TOOD: Should we carry along any extra info that that would be useful
+ %% in the dbg2 list?
+ Extra_todo = [],
+ RunEnv = S#ch_mgr.runenv,
+ Islands = proplists:get_value(network_islands, RunEnv),
+ P_latest2 = machi_projection:update_dbg2(
+ P_latest,
+ [%% {network_islands, Islands},
+ %% {hooray, {v2, date(), time()}}
+ Islands--Islands
+ |Extra_todo]),
+
+ MyNamePid = proxy_pid(MyName, S),
+ %% TODO: We need to fetch the inner projection, if it exists, and
+ %% write it to the private store. Put the outer projection
+ %% into dbg2 for forensics and perhaps re-start use?
+ ok = ?FLU_PC:write_projection(MyNamePid, private, P_latest2, ?TO),
+ case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
+ true ->
+ {_,_,C} = os:timestamp(),
+ MSec = trunc(C / 1000),
+ {HH,MM,SS} = time(),
+ case inner_projection_exists(P_latest2) of
+ false ->
+ ok;
+ true ->
+ P_inner = inner_projection_or_self(P_latest2),
+ io:format(user, "\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses INNER: ~w\n",
+ [HH,MM,SS,MSec, S#ch_mgr.name,
+ machi_projection:make_summary(P_inner)])
+ end,
+ io:format(user, "\n~2..0w:~2..0w:~2..0w.~3..0w ~p uses: ~w\n",
+ [HH,MM,SS,MSec, S#ch_mgr.name,
+ machi_projection:make_summary(P_latest2)]);
+ _ ->
+ ok
+ end,
+ react_to_env_C120(P_latest, [], S).
+
+react_to_env_C120(P_latest, FinalProps, #ch_mgr{proj_history=H} = S) ->
+ ?REACT(c120),
+ H2 = queue:in(P_latest, H),
+ H3 = case queue:len(H2) of
+ %% TODO: revisit this constant? Is this too long as a base?
+ %% My hunch is that it's fine and that the flap_limit needs to
+ %% be raised much higher (because it can increase several ticks
+ %% without a newer public epoch proposed anywhere).
+ X when X > length(P_latest#projection_v1.all_members) * 2 ->
+ {_V, Hxx} = queue:out(H2),
+ Hxx;
+ _ ->
+ H2
+ end,
+
+ HH = get(react),
+ io:format(user, "HEE120s ~w ~w ~w\n", [S#ch_mgr.name, self(), lists:reverse([X || X <- HH, is_atom(X)])]),
+ %% io:format(user, "HEE120 ~w ~w ~p\n", [S#ch_mgr.name, self(), lists:reverse(HH)]),
+
+ ?REACT({c120, [{latest, machi_projection:make_summary(P_latest)}]}),
+ {{now_using, FinalProps, P_latest#projection_v1.epoch_number},
+ S#ch_mgr{proj=P_latest, proj_history=H3}}.
+
+react_to_env_C200(Retries, P_latest, S) ->
+ ?REACT(c200),
+ try
+ %% TODO: This code works "well enough" without actually
+ %% telling anybody anything. Do we want to rip this out?
+ %% Actually implement it? None of the above?
+ yo:tell_author_yo(P_latest#projection_v1.author_server)
+ catch _Type:_Err ->
+ %% io:format(user, "TODO: tell_author_yo is broken: ~p ~p\n",
+ %% [_Type, _Err]),
+ ok
+ end,
+ react_to_env_C210(Retries, S).
+
+react_to_env_C210(Retries, #ch_mgr{name=MyName, proj=Proj} = S) ->
+ ?REACT(c210),
+ sleep_ranked_order(10, 100, MyName, Proj#projection_v1.all_members),
+ react_to_env_C220(Retries, S).
+
+react_to_env_C220(Retries, S) ->
+ ?REACT(c220),
+ react_to_env_A20(Retries + 1, S).
+
+react_to_env_C300(#projection_v1{epoch_number=_Epoch_newprop}=P_newprop,
+ #projection_v1{epoch_number=_Epoch_latest}=_P_latest, S) ->
+ ?REACT(c300),
+
+ %% This logic moved to A30.
+ %% NewEpoch = erlang:max(Epoch_newprop, Epoch_latest) + 1,
+ %% P_newprop2 = P_newprop#projection_v1{epoch_number=NewEpoch},
+ %% react_to_env_C310(update_checksum(P_newprop2), S).
+
+ react_to_env_C310(machi_projection:update_checksum(P_newprop), S).
+
+react_to_env_C310(P_newprop, S) ->
+ ?REACT(c310),
+ Epoch = P_newprop#projection_v1.epoch_number,
+ {WriteRes, S2} = cl_write_public_proj_skip_local_error(Epoch, P_newprop, S),
+ ?REACT({c310, ?LINE,
+ [{newprop, machi_projection:make_summary(P_newprop)},
+ {write_result, WriteRes}]}),
+ react_to_env_A10(S2).
+
+calculate_flaps(P_newprop, _P_current, FlapLimit,
+ #ch_mgr{name=MyName, proj_history=H, flap_start=FlapStart,
+ flaps=Flaps, runenv=RunEnv0} = S) ->
+ RunEnv1 = replace(RunEnv0, [{flapping_i, []}]),
+ HistoryPs = queue:to_list(H),
+ Ps = HistoryPs ++ [P_newprop],
+ UniqueProposalSummaries = lists:usort([{P#projection_v1.upi,
+ P#projection_v1.repairing,
+ P#projection_v1.down} || P <- Ps]),
+
+ {_WhateverUnanimous, BestP, Props, _S} =
+ cl_read_latest_projection(private, S),
+ NotBestPs = proplists:get_value(not_unanimous_answers, Props),
+ DownUnion = lists:usort(
+ lists:flatten(
+ [P#projection_v1.down ||
+ P <- [BestP|NotBestPs]])),
+ HosedTransUnion = proplists:get_value(trans_all_hosed, Props),
+ TransFlapCounts0 = proplists:get_value(trans_all_flap_counts, Props),
+
+ _Unanimous = proplists:get_value(unanimous_flus, Props),
+ _NotUnanimous = proplists:get_value(not_unanimous_flus, Props),
+ %% NOTE: bad_answer_flus are probably due to timeout or some other network
+ %% glitch, i.e., anything other than {ok, P::projection()}
+ %% response from machi_flu0:proj_read_latest().
+ BadFLUs = proplists:get_value(bad_answer_flus, Props),
+
+ RemoteTransFlapCounts1 = lists:keydelete(MyName, 1, TransFlapCounts0),
+ RemoteTransFlapCounts =
+ [X || {_FLU, {{_FlEpk,FlTime}, _FlapCount}}=X <- RemoteTransFlapCounts1,
+ FlTime /= ?NOT_FLAPPING],
+ TempNewFlaps = Flaps + 1,
+ TempAllFlapCounts = lists:sort([{MyName, {FlapStart, TempNewFlaps}}|
+ RemoteTransFlapCounts]),
+ %% Sanity check.
+ true = lists:all(fun({_,{_,_}}) -> true;
+ (_) -> false end, TempAllFlapCounts),
+
+ %% H is the bounded history of all of this manager's private
+ %% projection store writes. If we've proposed the *same*
+ %% {UPI+Repairing, Down} combination for the entire length of our
+ %% bounded size of H, then we're flapping.
+ %%
+ %% If we're flapping, then we use our own flap counter and that of
+ %% all of our peer managers to see if we've all got flap counters
+ %% that exceed the flap_limit. If that global condition appears
+ %% true, then we "blow the circuit breaker" by stopping our
+ %% participation in the flapping store (via the shortcut to A50).
+ %%
+ %% We reset our flap counter on any of several conditions:
+ %%
+ %% 1. If our bounded history H contains more than one proposal,
+ %% then by definition we are not flapping.
+ %% 2. If a remote manager is flapping and has re-started a new
+ %% flapping episode.
+ %% 3. If one of the remote managers that we saw earlier has
+ %% stopped flapping.
+
+ ?REACT({calculate_flaps, queue:len(H), UniqueProposalSummaries}),
+ case {queue:len(H), UniqueProposalSummaries} of
+ {N, [_]} when N >= length(P_newprop#projection_v1.all_members) ->
+ NewFlaps = TempNewFlaps,
+ if element(2,FlapStart) == ?NOT_FLAPPING ->
+ NewFlapStart = {{epk,P_newprop#projection_v1.epoch_number},now()};
+ true ->
+ NewFlapStart = FlapStart
+ end,
+
+ %% Wow, this behavior is almost spooky.
+ %%
+ %% For an example partition map [{c,a}], on the very first
+ %% time this 'if' clause is hit by FLU b, AllHosed=[a,c].
+ %% How the heck does B know that??
+ %%
+ %% If I use:
+ %% DownUnionQQQ = [{P#projection_v1.epoch_number, P#projection_v1.author_server, P#projection_v1.down} || P <- [BestP|NotBestPs]],
+ %% AllHosed = [x_1] ++ DownUnion ++ [x_2] ++ HosedTransUnion ++ [x_3] ++ BadFLUs ++ [{downunionqqq, DownUnionQQQ}];
+ %%
+ %% ... then b sees this when proposing epoch 451:
+ %%
+ %% {all_hosed,
+ %% [x_1,a,c,x_2,x_3,
+ %% {downunionqqq,
+ %% [{450,a,[c]},{449,b,[]},{448,c,[a]},{441,d,[]}]}]},
+ %%
+ %% So b's working on epoch 451 at the same time that d's latest
+ %% public projection is only epoch 441. But there's enough
+ %% lag so that b can "see" that a's bad=[c] (due to t_timeout!)
+ %% and c's bad=[a]. So voila, b magically knows about both
+ %% problem FLUs. Weird/cool.
+
+ AllFlapCounts = TempAllFlapCounts,
+ AllHosed = lists:usort(DownUnion ++ HosedTransUnion ++ BadFLUs);
+ {_N, _} ->
+ NewFlaps = 0,
+ NewFlapStart = {{epk,-1},?NOT_FLAPPING},
+ AllFlapCounts = [],
+ AllHosed = []
+ end,
+
+ %% If there's at least one count in AllFlapCounts that isn't my
+ %% flap count, and if it's over the flap limit, then consider them
+ %% settled.
+ AllFlapCountsSettled = lists:keydelete(MyName, 1, AllFlapCounts) /= []
+ andalso
+ my_find_minmost(AllFlapCounts) >= FlapLimit,
+ FlappingI = {flapping_i, [{flap_count, {NewFlapStart, NewFlaps}},
+ {all_hosed, AllHosed},
+ {all_flap_counts, lists:sort(AllFlapCounts)},
+ {all_flap_counts_settled, AllFlapCountsSettled},
+ {bad,BadFLUs},
+ {da_downu, DownUnion}, % debugging aid
+ {da_hosedtu, HosedTransUnion}, % debugging aid
+ {da_downreports, [{P#projection_v1.epoch_number, P#projection_v1.author_server, P#projection_v1.down} || P <- [BestP|NotBestPs]]} % debugging aid
+ ]},
+ Dbg2 = [FlappingI|P_newprop#projection_v1.dbg],
+ %% SLF TODO: 2015-03-04: I'm growing increasingly suspicious of
+ %% the 'runenv' variable that's threaded through all this code.
+ %% It isn't doing what I'd originally intended. And I think that
+ %% the flapping information that we've just constructed here is
+ %% going to get lost, and that's a shame. Fix it.
+ RunEnv2 = replace(RunEnv1, [FlappingI]),
+ %% NOTE: If we'd increment of flaps here, that doesn't mean that
+ %% someone's public proj store has been updated. For example,
+ %% if we loop through states C2xx a few times, we would incr
+ %% flaps each time ... but the C2xx path doesn't write a new
+ %% proposal to everyone's public proj stores, and there's no
+ %% guarantee that anyone else as written a new public proj either.
+ {machi_projection:update_checksum(P_newprop#projection_v1{dbg=Dbg2}),
+ S#ch_mgr{flaps=NewFlaps, flap_start=NewFlapStart, runenv=RunEnv2}}.
+
+projection_transitions_are_sane(Ps, RelativeToServer) ->
+ projection_transitions_are_sane(Ps, RelativeToServer, false).
+
+-ifdef(TEST).
+projection_transitions_are_sane_retrospective(Ps, RelativeToServer) ->
+ projection_transitions_are_sane(Ps, RelativeToServer, true).
+-endif. % TEST
+
+projection_transitions_are_sane([], _RelativeToServer, _RetrospectiveP) ->
+ true;
+projection_transitions_are_sane([_], _RelativeToServer, _RetrospectiveP) ->
+ true;
+projection_transitions_are_sane([P1, P2|T], RelativeToServer, RetrospectiveP) ->
+ case projection_transition_is_sane(P1, P2, RelativeToServer,
+ RetrospectiveP) of
+ true ->
+ projection_transitions_are_sane([P2|T], RelativeToServer,
+ RetrospectiveP);
+ Else ->
+ Else
+ end.
+
+projection_transition_is_sane(P1, P2, RelativeToServer) ->
+ projection_transition_is_sane(P1, P2, RelativeToServer, false).
+
+-ifdef(TEST).
+projection_transition_is_sane_retrospective(P1, P2, RelativeToServer) ->
+ projection_transition_is_sane(P1, P2, RelativeToServer, true).
+-endif. % TEST
+
+projection_transition_is_sane(
+ #projection_v1{epoch_number=Epoch1,
+ epoch_csum=CSum1,
+ creation_time=CreationTime1,
+ author_server=AuthorServer1,
+ all_members=All_list1,
+ down=Down_list1,
+ upi=UPI_list1,
+ repairing=Repairing_list1,
+ dbg=Dbg1} = P1,
+ #projection_v1{epoch_number=Epoch2,
+ epoch_csum=CSum2,
+ creation_time=CreationTime2,
+ author_server=AuthorServer2,
+ all_members=All_list2,
+ down=Down_list2,
+ upi=UPI_list2,
+ repairing=Repairing_list2,
+ dbg=Dbg2} = P2,
+ RelativeToServer, RetrospectiveP) ->
+ try
+ %% General notes:
+ %%
+ %% I'm making no attempt to be "efficient" here. All of these data
+ %% structures are small, and they're not called zillions of times per
+ %% second.
+ %%
+ %% The chain sequence/order checks at the bottom of this function aren't
+ %% as easy-to-read as they ought to be. However, I'm moderately confident
+ %% that it isn't buggy. TODO: refactor them for clarity.
+
+ true = is_integer(Epoch1) andalso is_integer(Epoch2),
+ true = is_binary(CSum1) andalso is_binary(CSum2),
+ {_,_,_} = CreationTime1,
+ {_,_,_} = CreationTime2,
+ true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo will probably change
+ true = is_list(All_list1) andalso is_list(All_list2),
+ true = is_list(Down_list1) andalso is_list(Down_list2),
+ true = is_list(UPI_list1) andalso is_list(UPI_list2),
+ true = is_list(Repairing_list1) andalso is_list(Repairing_list2),
+ true = is_list(Dbg1) andalso is_list(Dbg2),
+
+ true = Epoch2 > Epoch1,
+ All_list1 = All_list2, % todo will probably change
+
+ %% No duplicates
+ true = lists:sort(Down_list2) == lists:usort(Down_list2),
+ true = lists:sort(UPI_list2) == lists:usort(UPI_list2),
+ true = lists:sort(Repairing_list2) == lists:usort(Repairing_list2),
+
+ %% Disjoint-ness
+ true = lists:sort(All_list2) == lists:sort(Down_list2 ++ UPI_list2 ++
+ Repairing_list2),
+ [] = [X || X <- Down_list2, not lists:member(X, All_list2)],
+ [] = [X || X <- UPI_list2, not lists:member(X, All_list2)],
+ [] = [X || X <- Repairing_list2, not lists:member(X, All_list2)],
+ DownS2 = sets:from_list(Down_list2),
+ UPIS2 = sets:from_list(UPI_list2),
+ RepairingS2 = sets:from_list(Repairing_list2),
+ true = sets:is_disjoint(DownS2, UPIS2),
+ true = sets:is_disjoint(DownS2, RepairingS2),
+ true = sets:is_disjoint(UPIS2, RepairingS2),
+
+ %% The author must not be down.
+ false = lists:member(AuthorServer1, Down_list1),
+ false = lists:member(AuthorServer2, Down_list2),
+ %% The author must be in either the UPI or repairing list.
+ true = lists:member(AuthorServer1, UPI_list1 ++ Repairing_list1),
+ true = lists:member(AuthorServer2, UPI_list2 ++ Repairing_list2),
+
+ %% Additions to the UPI chain may only be at the tail
+ UPI_common_prefix = find_common_prefix(UPI_list1, UPI_list2),
+ if UPI_common_prefix == [] ->
+ if UPI_list1 == [] orelse UPI_list2 == [] ->
+ %% If the common prefix is empty, then one of the
+ %% inputs must be empty.
+ true;
+ true ->
+ %% Otherwise, we have a case of UPI changing from
+ %% one of these two situations:
+ %%
+ %% UPI_list1 -> UPI_list2
+ %% -------------------------------------------------
+ %% [d,c,b,a] -> [c,a]
+ %% [d,c,b,a] -> [c,a,repair_finished_added_to_tail].
+ NotUPI2 = (Down_list2 ++ Repairing_list2),
+ case lists:prefix(UPI_list1 -- NotUPI2, UPI_list2) of
+ true ->
+ true;
+ false ->
+ %% Here's a possible failure scenario:
+ %% UPI_list1 -> UPI_list2
+ %% Repairing_list1 -> Repairing_list2
+ %% -----------------------------------
+ %% [a,b,c] author=a -> [c,a] author=c
+ %% [] [b]
+ %%
+ %% ... where RelativeToServer=b. In this case, b
+ %% has been partitions for a while and has only
+ %% now just learned of several epoch transitions.
+ %% If the author of both is also in the UPI of
+ %% both, then those authors would not have allowed
+ %% a bad transition, so we will assume this
+ %% transition is OK.
+ lists:member(AuthorServer1, UPI_list1)
+ andalso
+ lists:member(AuthorServer2, UPI_list2)
+ end
+ end;
+ true ->
+ true
+ end,
+ true = lists:prefix(UPI_common_prefix, UPI_list1),
+ true = lists:prefix(UPI_common_prefix, UPI_list2),
+ UPI_1_suffix = UPI_list1 -- UPI_common_prefix,
+ UPI_2_suffix = UPI_list2 -- UPI_common_prefix,
+
+ MoreCheckingP =
+ RelativeToServer == undefined
+ orelse
+ not (lists:member(RelativeToServer, Down_list2) orelse
+ lists:member(RelativeToServer, Repairing_list2)),
+
+ if not MoreCheckingP ->
+ ok;
+ MoreCheckingP ->
+ %% Where did elements in UPI_2_suffix come from?
+ %% Only two sources are permitted.
+ [lists:member(X, Repairing_list1) % X added after repair done
+ orelse
+ lists:member(X, UPI_list1) % X in UPI_list1 after common pref
+ || X <- UPI_2_suffix],
+
+ %% The UPI_2_suffix must exactly be equal to: ordered items from
+ %% UPI_list1 concat'ed with ordered items from Repairing_list1.
+ %% Both temp vars below preserve relative order!
+ UPI_2_suffix_from_UPI1 = [X || X <- UPI_1_suffix,
+ lists:member(X, UPI_list2)],
+ UPI_2_suffix_from_Repairing1 = [X || X <- UPI_2_suffix,
+ lists:member(X, Repairing_list1)],
+ %% true?
+ UPI_2_concat = (UPI_2_suffix_from_UPI1 ++ UPI_2_suffix_from_Repairing1),
+ if UPI_2_suffix == UPI_2_concat ->
+ ok;
+ true ->
+ if RetrospectiveP ->
+ %% We are in retrospective mode. But there are
+ %% some transitions that are difficult to find
+ %% when standing outside of all of the FLUs and
+ %% examining their behavior. (In contrast to
+ %% this same function being called "in the path"
+ %% of a projection transition by a particular FLU
+ %% which knows exactly its prior projection and
+ %% exactly what it intends to do.) Perhaps this
+ %% exception clause here can go away with
+ %% better/more clever retrospection analysis?
+ %%
+ %% Here's a case that PULSE found:
+ %% FLU B:
+ %% E=257: UPI=[c,a], REPAIRING=[b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %% FLU a:
+ %% E=251: UPI=[c], REPAIRING=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %% FLU c:
+ %% E=282: UPI=[c], REPAIRING=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %%
+ %% From the perspective of each individual FLU,
+ %% the unanimous transition at epoch #284 is
+ %% good. The repair that is done by FLU c -> a
+ %% is likewise good.
+ %%
+ %% From a retrospective point of view (and the
+ %% current implementation), there's a bad-looking
+ %% transition from epoch #269 to #284. This is
+ %% from the point of view of the last two
+ %% unanimous private projection store epochs:
+ %%
+ %% E=269: UPI=[c], REPAIRING=[], DOWN=[a,b]
+ %% E=284: UPI=[c,a], REPAIRING=[b]
+ %%
+ %% The retrospective view by
+ %% machi_chain_manager1_pulse.erl just can't
+ %% reason correctly about this situation. We
+ %% will instead rely on the non-introspective
+ %% sanity checking that each FLU does before it
+ %% writes to its private projection store and
+ %% then adopts that projection (and unwedges
+ %% itself, etc etc).
+
+ %% io:format(user, "QQQ: RetrospectiveP ~p\n", [RetrospectiveP]),
+ %% io:format(user, "QQQ: UPI_2_suffix ~p\n", [UPI_2_suffix]),
+ %% io:format(user, "QQQ: UPI_2_suffix_from_UPI1 ~p\n", [UPI_2_suffix_from_UPI1]),
+ %% io:format(user, "QQQ: UPI_2_suffix_from_Repairing1 ~p\n", [UPI_2_suffix_from_Repairing1]),
+ io:format(user, "|~p,~p TODO revisit|",
+ [?MODULE, ?LINE]),
+ ok;
+ true ->
+ %% The following is OK: We're shifting from a
+ %% normal projection to an inner one. The old
+ %% normal has a UPI that has nothing to do with
+ %% RelativeToServer a.k.a. me.
+ %% from:
+ %% {epoch,847},{author,c},{upi,[c]},{repair,[]},{down,[a,b,d]},
+ %% to:
+ %% {epoch,848},{author,a},{upi,[a]},{repair,[]},{down,[b,c,d]},
+ if UPI_2_suffix == [AuthorServer2] ->
+ true;
+ not RetrospectiveP ->
+ exit({upi_2_suffix_error, UPI_2_suffix})
+ end
+ end
+ end
+ end,
+ true
+ catch
+ _Type:_Err ->
+ S1 = machi_projection:make_summary(P1),
+ S2 = machi_projection:make_summary(P2),
+ Trace = erlang:get_stacktrace(),
+ %% %% TODO: this history goop is useful sometimes for debugging but
+ %% %% not for any "real" use. Get rid of it, for the long term.
+ %% H = (catch [{FLUName, Type, P#projection_v1.epoch_number, machi_projection:make_summary(P)} ||
+ %% FLUName <- P1#projection_v1.all_members,
+ %% Type <- [public,private],
+ %% P <- ?FLU_PC:proj_get_all(orddict:fetch(FLUName, What?), Type)]),
+ {err, _Type, _Err, from, S1, to, S2, relative_to, RelativeToServer,
+ history, (catch lists:sort([no_history])),
+ stack, Trace}
+ end.
+
+find_common_prefix([], _) ->
+ [];
+find_common_prefix(_, []) ->
+ [];
+find_common_prefix([H|L1], [H|L2]) ->
+ [H|find_common_prefix(L1, L2)];
+find_common_prefix(_, _) ->
+ [].
+
+sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list) ->
+ USec = calc_sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list),
+ timer:sleep(USec),
+ USec.
+
+calc_sleep_ranked_order(MinSleep, MaxSleep, FLU, FLU_list) ->
+ Front = lists:takewhile(fun(X) -> X /=FLU end, FLU_list),
+ Index = length(Front) + 1,
+ NumNodes = length(FLU_list),
+ SleepIndex = NumNodes - Index,
+ SleepChunk = MaxSleep div NumNodes,
+ MinSleep + (SleepChunk * SleepIndex).
+
+my_find_minmost([]) ->
+ 0;
+my_find_minmost([{_,_}|_] = TransFlapCounts0) ->
+ lists:min([FlapCount || {_T, {_FlTime, FlapCount}} <- TransFlapCounts0]);
+my_find_minmost(TransFlapCounts0) ->
+ lists:min(TransFlapCounts0).
+
+get_raw_flapping_i(#projection_v1{dbg=Dbg}) ->
+ proplists:get_value(flapping_i, Dbg, []).
+
+get_flap_count(P) ->
+ proplists:get_value(flap_count, get_raw_flapping_i(P), 0).
+
+get_all_flap_counts(P) ->
+ proplists:get_value(all_flap_counts, get_raw_flapping_i(P), []).
+
+get_all_flap_counts_counts(P) ->
+ case get_all_flap_counts(P) of
+ [] ->
+ [];
+ [{_,{_,_}}|_] = Cs ->
+ [Count || {_FLU, {_Time, Count}} <- Cs]
+ end.
+
+get_all_hosed(P) when is_record(P, projection_v1)->
+ proplists:get_value(all_hosed, get_raw_flapping_i(P), []);
+get_all_hosed(S) when is_record(S, ch_mgr) ->
+ proplists:get_value(all_hosed,
+ proplists:get_value(flapping_i, S#ch_mgr.runenv, []),
+ []).
+
+merge_flap_counts(FlapCounts) ->
+ merge_flap_counts(FlapCounts, orddict:new()).
+
+merge_flap_counts([], D) ->
+ orddict:to_list(D);
+merge_flap_counts([FlapCount|Rest], D1) ->
+ %% We know that FlapCount is list({Actor, {{_epk,FlapStartTime},NumFlaps}}).
+ D2 = orddict:from_list(FlapCount),
+ D2 = orddict:from_list(FlapCount),
+ %% If the FlapStartTimes are identical, then pick the bigger flap count.
+ %% If the FlapStartTimes differ, then pick the larger start time tuple.
+ D3 = orddict:merge(fun(_Key, {{_,T1}, NF1}= V1, {{_,T2}, NF2}=V2)
+ when T1 == T2 ->
+ if NF1 > NF2 ->
+ V1;
+ true ->
+ V2
+ end;
+ (_Key, {{_,T1},_NF1}= V1, {{_,T2},_NF2}=V2) ->
+ if T1 > T2 ->
+ V1;
+ true ->
+ V2
+ end;
+ (_Key, V1, V2) ->
+ exit({bad_merge_2tuples,mod,?MODULE,line,?LINE,
+ _Key, V1, V2})
+ end, D1, D2),
+ merge_flap_counts(Rest, D3).
+
+proxy_pid(Name, #ch_mgr{proxies_dict=ProxiesDict}) ->
+ orddict:fetch(Name, ProxiesDict).
+
+gimme_random_uniform(N, S) ->
+ RunEnv1 = S#ch_mgr.runenv,
+ Seed1 = proplists:get_value(seed, RunEnv1),
+ {X, Seed2} = random:uniform_s(N, Seed1),
+ RunEnv2 = [{seed, Seed2}|lists:keydelete(seed, 1, RunEnv1)],
+ {X, S#ch_mgr{runenv=RunEnv2}}.
+
+inner_projection_exists(P) ->
+ case proplists:get_value(inner_projection, P#projection_v1.dbg) of
+ undefined ->
+ false;
+ _ ->
+ true
+ end.
+
+inner_projection_or_self(P) ->
+ case proplists:get_value(inner_projection, P#projection_v1.dbg) of
+ undefined ->
+ P;
+ P_inner ->
+ P_inner
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+perhaps_call_t(S, Partitions, FLU, DoIt) ->
+ try
+ perhaps_call(S, Partitions, FLU, DoIt)
+ catch
+ exit:timeout ->
+ t_timeout
+ end.
+
+perhaps_call(#ch_mgr{name=MyName}=S, Partitions, FLU, DoIt) ->
+ ProxyPid = proxy_pid(FLU, S),
+ RemoteFLU_p = FLU /= MyName,
+ case RemoteFLU_p andalso lists:member({MyName, FLU}, Partitions) of
+ false ->
+ Res = DoIt(ProxyPid),
+ case RemoteFLU_p andalso lists:member({FLU, MyName}, Partitions) of
+ false ->
+ Res;
+ _ ->
+ (catch put(react, [{timeout2,me,MyName,to,FLU,RemoteFLU_p,Partitions}|get(react)])),
+ exit(timeout)
+ end;
+ _ ->
+ (catch put(react, [{timeout1,me,MyName,to,FLU,RemoteFLU_p,Partitions}|get(react)])),
+ exit(timeout)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
diff --git a/src/machi_chash.erl b/src/machi_chash.erl
index f45473a..6ad46f3 100644
--- a/src/machi_chash.erl
+++ b/src/machi_chash.erl
@@ -16,9 +16,13 @@
%%%
%%%-------------------------------------------------------------------
-%% Consistent hashing library. Also known as "random slicing".
-%% Originally from the Hibari DB source code at https://github.com/hibari
+%% @doc Consistent hashing library. Also known as "random slicing".
%%
+%% This code was originally from the Hibari DB source code at
+%% [https://github.com/hibari]
+
+-module(machi_chash).
+
%% TODO items:
%%
%% 1. Refactor to use bigints instead of floating point numbers. The
@@ -26,8 +30,6 @@
%% much wiggle-room for making really small hashing range
%% definitions.
--module(machi_chash).
-
-define(SMALLEST_SIGNIFICANT_FLOAT_SIZE, 0.1e-12).
-define(SHA_MAX, (1 bsl (20*8))).
diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl
index 02f7925..459aad3 100644
--- a/src/machi_flu1.erl
+++ b/src/machi_flu1.erl
@@ -18,6 +18,33 @@
%%
%% -------------------------------------------------------------------
+%% @doc The Machi FLU file server + file location sequencer.
+%%
+%% This module implements only the Machi FLU file server and its
+%% implicit sequencer.
+%% Please see the EDoc "Overview" for details about the FLU as a
+%% primitive file server process vs. the larger Machi design of a FLU
+%% as a sequencer + file server + chain manager group of processes.
+%%
+%% For the moment, this module also implements a rudimentary TCP-based
+%% protocol as the sole supported access method to the server,
+%% sequencer, and projection store. Conceptually, those three
+%% services are independent and ought to have their own protocols. As
+%% a practical matter, there is no need for wire protocol
+%% compatibility. Furthermore, from the perspective of failure
+%% detection, it is very convenient that all three FLU-related
+%% services are accessed using the same single TCP port.
+%%
+%% The FLU is named after the CORFU server "FLU" or "FLash Unit" server.
+%%
+%% TODO There is one major missing feature in this FLU implementation:
+%% there is no "write-once" enforcement for any position in a Machi
+%% file. At the moment, we rely on correct behavior of the client
+%% & the sequencer to avoid overwriting data. In the Real World,
+%% however, all Machi file data is supposed to be exactly write-once
+%% to avoid problems with bugs, wire protocol corruption, malicious
+%% clients, etc.
+
-module(machi_flu1).
-include_lib("kernel/include/file.hrl").
@@ -46,7 +73,7 @@ start_link([{FluName, TcpPort, DataDir}|Rest])
stop(Pid) ->
case erlang:is_process_alive(Pid) of
true ->
- Pid ! forever,
+ Pid ! killme,
ok;
false ->
error
@@ -86,7 +113,11 @@ main2(RegName, TcpPort, DataDir, Rest) ->
put(flu_append_pid, AppendPid),
put(flu_projection_pid, ProjectionPid),
put(flu_listen_pid, ListenPid),
- receive forever -> ok end.
+ receive killme -> ok end,
+ (catch exit(AppendPid, kill)),
+ (catch exit(ProjectionPid, kill)),
+ (catch exit(ListenPid, kill)),
+ ok.
start_listen_server(S) ->
spawn_link(fun() -> run_listen_server(S) end).
@@ -214,7 +245,7 @@ do_net_server_append2(RegName, Sock, LenHex, Prefix) ->
<> = machi_util:hexstr_to_bin(LenHex),
ok = inet:setopts(Sock, [{packet, raw}]),
{ok, Chunk} = gen_tcp:recv(Sock, Len, 60*1000),
- CSum = machi_util:checksum(Chunk),
+ CSum = machi_util:checksum_chunk(Chunk),
try
RegName ! {seq_append, self(), Prefix, Chunk, CSum}
catch error:badarg ->
@@ -296,7 +327,7 @@ do_net_server_write2(Sock, OffsetHex, LenHex, FileBin, DataDir, FHc) ->
DoItFun = fun(FHd, Offset, Len) ->
ok = inet:setopts(Sock, [{packet, raw}]),
{ok, Chunk} = gen_tcp:recv(Sock, Len),
- CSum = machi_util:checksum(Chunk),
+ CSum = machi_util:checksum_chunk(Chunk),
case file:pwrite(FHd, Offset, Chunk) of
ok ->
CSumHex = machi_util:bin_to_hexstr(CSum),
@@ -494,11 +525,14 @@ run_seq_append_server2(Prefix, DataDir) ->
end.
+-spec seq_name_hack() -> string().
+seq_name_hack() ->
+ lists:flatten(io_lib:format("~.36B~.36B",
+ [element(3,now()),
+ list_to_integer(os:getpid())])).
+
seq_append_server_loop(DataDir, Prefix, FileNum) ->
- SequencerNameHack = lists:flatten(io_lib:format(
- "~.36B~.36B",
- [element(3,now()),
- list_to_integer(os:getpid())])),
+ SequencerNameHack = seq_name_hack(),
{File, FullPath} = machi_util:make_data_filename(
DataDir, Prefix, SequencerNameHack, FileNum),
{ok, FHd} = file:open(FullPath,
@@ -576,12 +610,12 @@ handle_projection_command({read_projection, ProjType, Epoch},
handle_projection_command({write_projection, ProjType, Proj},
#state{proj_store=ProjStore}) ->
machi_projection_store:write(ProjStore, ProjType, Proj);
-handle_projection_command({get_all, ProjType},
+handle_projection_command({get_all_projections, ProjType},
#state{proj_store=ProjStore}) ->
- machi_projection_store:get_all(ProjStore, ProjType);
-handle_projection_command({list_all, ProjType},
+ machi_projection_store:get_all_projections(ProjStore, ProjType);
+handle_projection_command({list_all_projections, ProjType},
#state{proj_store=ProjStore}) ->
- machi_projection_store:list_all(ProjStore, ProjType);
+ machi_projection_store:list_all_projections(ProjStore, ProjType);
handle_projection_command(Else, _S) ->
{error, unknown_cmd, Else}.
diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl
index 6dd6c65..d2dac02 100644
--- a/src/machi_flu1_client.erl
+++ b/src/machi_flu1_client.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Erlang API for the Machi FLU TCP protocol version 1.
+
-module(machi_flu1_client).
-include("machi.hrl").
@@ -35,8 +37,8 @@
read_latest_projection/2, read_latest_projection/3,
read_projection/3, read_projection/4,
write_projection/3, write_projection/4,
- get_all/2, get_all/3,
- list_all/2, list_all/3,
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3,
%% Common API
quit/1
@@ -54,7 +56,7 @@
-type chunk_pos() :: {file_offset(), chunk_size(), file_name_s()}.
-type chunk_size() :: non_neg_integer().
-type epoch_csum() :: binary().
--type epoch_num() :: non_neg_integer().
+-type epoch_num() :: -1 | non_neg_integer().
-type epoch_id() :: {epoch_num(), epoch_csum()}.
-type file_info() :: {file_size(), file_name_s()}.
-type file_name() :: binary() | list().
@@ -151,19 +153,19 @@ list_files(Host, TcpPort, EpochID) when is_integer(TcpPort) ->
catch gen_tcp:close(Sock)
end.
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
-spec get_latest_epoch(port(), projection_type()) ->
- {ok, -1|non_neg_integer()} | {error, term()}.
+ {ok, epoch_id()} | {error, term()}.
get_latest_epoch(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
get_latest_epoch2(Sock, ProjType).
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
-spec get_latest_epoch(inet_host(), inet_port(),
projection_type()) ->
- {ok, -1|non_neg_integer()} | {error, term()}.
+ {ok, epoch_id()} | {error, term()}.
get_latest_epoch(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
@@ -173,7 +175,7 @@ get_latest_epoch(Host, TcpPort, ProjType)
catch gen_tcp:close(Sock)
end.
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
-spec read_latest_projection(port(), projection_type()) ->
{ok, projection()} | {error, not_written} | {error, term()}.
@@ -181,7 +183,7 @@ read_latest_projection(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
read_latest_projection2(Sock, ProjType).
-%% @doc Get the latest epoch number from the FLU's projection store.
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
-spec read_latest_projection(inet_host(), inet_port(),
projection_type()) ->
@@ -243,44 +245,44 @@ write_projection(Host, TcpPort, ProjType, Proj)
%% @doc Get all projections from the FLU's projection store.
--spec get_all(port(), projection_type()) ->
+-spec get_all_projections(port(), projection_type()) ->
{ok, [projection()]} | {error, term()}.
-get_all(Sock, ProjType)
+get_all_projections(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
- get_all2(Sock, ProjType).
+ get_all_projections2(Sock, ProjType).
%% @doc Get all projections from the FLU's projection store.
--spec get_all(inet_host(), inet_port(),
+-spec get_all_projections(inet_host(), inet_port(),
projection_type()) ->
{ok, [projection()]} | {error, term()}.
-get_all(Host, TcpPort, ProjType)
+get_all_projections(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
try
- get_all2(Sock, ProjType)
+ get_all_projections2(Sock, ProjType)
after
catch gen_tcp:close(Sock)
end.
%% @doc Get all epoch numbers from the FLU's projection store.
--spec list_all(port(), projection_type()) ->
+-spec list_all_projections(port(), projection_type()) ->
{ok, [non_neg_integer()]} | {error, term()}.
-list_all(Sock, ProjType)
+list_all_projections(Sock, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
- list_all2(Sock, ProjType).
+ list_all_projections2(Sock, ProjType).
%% @doc Get all epoch numbers from the FLU's projection store.
--spec list_all(inet_host(), inet_port(),
+-spec list_all_projections(inet_host(), inet_port(),
projection_type()) ->
{ok, [non_neg_integer()]} | {error, term()}.
-list_all(Host, TcpPort, ProjType)
+list_all_projections(Host, TcpPort, ProjType)
when ProjType == 'public' orelse ProjType == 'private' ->
Sock = machi_util:connect(Host, TcpPort),
try
- list_all2(Sock, ProjType)
+ list_all_projections2(Sock, ProjType)
after
catch gen_tcp:close(Sock)
end.
@@ -365,9 +367,10 @@ trunc_hack(Host, TcpPort, EpochID, File) when is_integer(TcpPort) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%
append_chunk2(Sock, EpochID, Prefix0, Chunk0) ->
+ erase(bad_sock),
try
%% TODO: add client-side checksum to the server's protocol
- %% _ = crypto:hash(md5, Chunk),
+ %% _ = machi_util:checksum_chunk(Chunk),
Prefix = machi_util:make_binary(Prefix0),
Chunk = machi_util:make_binary(Chunk0),
Len = iolist_size(Chunk0),
@@ -391,47 +394,59 @@ append_chunk2(Sock, EpochID, Prefix0, Chunk0) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
read_chunk2(Sock, EpochID, File0, Offset, Size) ->
- {EpochNum, EpochCSum} = EpochID,
- EpochIDRaw = <>,
- File = machi_util:make_binary(File0),
- PrefixHex = machi_util:int_to_hexbin(Offset, 64),
- SizeHex = machi_util:int_to_hexbin(Size, 32),
- CmdLF = [$R, 32, EpochIDRaw, PrefixHex, SizeHex, File, 10],
- ok = gen_tcp:send(Sock, CmdLF),
- case gen_tcp:recv(Sock, 3) of
- {ok, <<"OK\n">>} ->
- {ok, _Chunk}=Res = gen_tcp:recv(Sock, Size),
- Res;
- {ok, Else} ->
- {ok, OldOpts} = inet:getopts(Sock, [packet]),
- ok = inet:setopts(Sock, [{packet, line}]),
- {ok, Else2} = gen_tcp:recv(Sock, 0),
- ok = inet:setopts(Sock, OldOpts),
- case Else of
- <<"ERA">> ->
- {error, todo_erasure_coded}; %% escript_cc_parse_ec_info(Sock, Line, Else2);
- <<"ERR">> ->
- case Else2 of
- <<"OR BAD-IO\n">> ->
- {error, no_such_file};
- <<"OR NOT-ERASURE\n">> ->
- {error, no_such_file};
- <<"OR BAD-ARG\n">> ->
- {error, bad_arg};
- <<"OR PARTIAL-READ\n">> ->
- {error, partial_read};
- _ ->
- {error, Else2}
- end;
- _ ->
- {error, {whaaa, <>}}
- end
+ erase(bad_sock),
+ try
+ {EpochNum, EpochCSum} = EpochID,
+ EpochIDRaw = <>,
+ File = machi_util:make_binary(File0),
+ PrefixHex = machi_util:int_to_hexbin(Offset, 64),
+ SizeHex = machi_util:int_to_hexbin(Size, 32),
+ CmdLF = [$R, 32, EpochIDRaw, PrefixHex, SizeHex, File, 10],
+ ok = gen_tcp:send(Sock, CmdLF),
+ case gen_tcp:recv(Sock, 3) of
+ {ok, <<"OK\n">>} ->
+ {ok, _Chunk}=Res = gen_tcp:recv(Sock, Size),
+ Res;
+ {ok, Else} ->
+ {ok, OldOpts} = inet:getopts(Sock, [packet]),
+ ok = inet:setopts(Sock, [{packet, line}]),
+ {ok, Else2} = gen_tcp:recv(Sock, 0),
+ ok = inet:setopts(Sock, OldOpts),
+ case Else of
+ <<"ERA">> ->
+ {error, todo_erasure_coded}; %% escript_cc_parse_ec_info(Sock, Line, Else2);
+ <<"ERR">> ->
+ case Else2 of
+ <<"OR BAD-IO\n">> ->
+ {error, no_such_file};
+ <<"OR NOT-ERASURE\n">> ->
+ {error, no_such_file};
+ <<"OR BAD-ARG\n">> ->
+ {error, bad_arg};
+ <<"OR PARTIAL-READ\n">> ->
+ {error, partial_read};
+ _ ->
+ {error, Else2}
+ end;
+ _ ->
+ {error, {whaaa_todo, <>}}
+ end
+ end
+ catch
+ throw:Error ->
+ put(bad_sock, Sock),
+ Error;
+ error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
+ {error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
list2(Sock, EpochID) ->
@@ -462,6 +477,7 @@ list3(Else, _Sock) ->
throw({server_protocol_error, Else}).
checksum_list2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -484,8 +500,10 @@ checksum_list2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
@@ -515,11 +533,12 @@ checksum_list_finish(Chunks) ->
Line /= <<>>].
write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
%% TODO: add client-side checksum to the server's protocol
- %% _ = crypto:hash(md5, Chunk),
+ %% _ = machi_util:checksum_chunk(Chunk),
File = machi_util:make_binary(File0),
true = (Offset >= ?MINIMUM_OFFSET),
OffsetHex = machi_util:int_to_hexbin(Offset, 64),
@@ -542,12 +561,15 @@ write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
delete_migration2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -566,12 +588,15 @@ delete_migration2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
trunc_hack2(Sock, EpochID, File) ->
+ erase(bad_sock),
try
{EpochNum, EpochCSum} = EpochID,
EpochIDRaw = <>,
@@ -590,8 +615,10 @@ trunc_hack2(Sock, EpochID, File) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch}}
end.
@@ -611,15 +638,16 @@ write_projection2(Sock, ProjType, Proj) ->
ProjCmd = {write_projection, ProjType, Proj},
do_projection_common(Sock, ProjCmd).
-get_all2(Sock, ProjType) ->
- ProjCmd = {get_all, ProjType},
+get_all_projections2(Sock, ProjType) ->
+ ProjCmd = {get_all_projections, ProjType},
do_projection_common(Sock, ProjCmd).
-list_all2(Sock, ProjType) ->
- ProjCmd = {list_all, ProjType},
+list_all_projections2(Sock, ProjType) ->
+ ProjCmd = {list_all_projections, ProjType},
do_projection_common(Sock, ProjCmd).
do_projection_common(Sock, ProjCmd) ->
+ erase(bad_sock),
try
ProjCmdBin = term_to_binary(ProjCmd),
Len = iolist_size(ProjCmdBin),
@@ -641,7 +669,9 @@ do_projection_common(Sock, ProjCmd) ->
end
catch
throw:Error ->
+ put(bad_sock, Sock),
Error;
error:{badmatch,_}=BadMatch ->
+ put(bad_sock, Sock),
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
end.
diff --git a/src/machi_flu_sup.erl b/src/machi_flu_sup.erl
index 4ad26fc..ce29502 100644
--- a/src/machi_flu_sup.erl
+++ b/src/machi_flu_sup.erl
@@ -18,6 +18,9 @@
%%
%% -------------------------------------------------------------------
+%% @doc Supervisor for Machi FLU servers and their related support
+%% servers.
+
-module(machi_flu_sup).
-behaviour(supervisor).
diff --git a/src/machi_projection.erl b/src/machi_projection.erl
index d4f7e42..5f97f94 100644
--- a/src/machi_projection.erl
+++ b/src/machi_projection.erl
@@ -18,47 +18,47 @@
%%
%% -------------------------------------------------------------------
+%% @doc API for manipulating Machi projection data structures (i.e., records).
+
-module(machi_projection).
-include("machi_projection.hrl").
-export([
new/6, new/7, new/8,
- update_projection_checksum/1,
- update_projection_dbg2/2,
+ update_checksum/1,
+ update_dbg2/2,
compare/2,
- make_projection_summary/1
+ make_summary/1,
+ make_members_dict/1
]).
-new(MyName, All_list, UPI_list, Down_list, Repairing_list, Ps) ->
- new(0, MyName, All_list, Down_list, UPI_list, Repairing_list, Ps).
+%% @doc Create a new projection record.
-new(EpochNum, MyName, All_list, Down_list, UPI_list, Repairing_list, Dbg) ->
- new(EpochNum, MyName, All_list, Down_list, UPI_list, Repairing_list,
+new(MyName, MemberDict, UPI_list, Down_list, Repairing_list, Ps) ->
+ new(0, MyName, MemberDict, Down_list, UPI_list, Repairing_list, Ps).
+
+%% @doc Create a new projection record.
+
+new(EpochNum, MyName, MemberDict, Down_list, UPI_list, Repairing_list, Dbg) ->
+ new(EpochNum, MyName, MemberDict, Down_list, UPI_list, Repairing_list,
Dbg, []).
-new(EpochNum, MyName, All_list0, Down_list, UPI_list, Repairing_list,
+%% @doc Create a new projection record.
+%%
+%% The `MemberDict0' argument may be a true `p_srvr_dict()' (i.e, it
+%% is a well-formed `orddict' with the correct 2-tuple key-value form)
+%% or it may be simply `list(p_srvr())', in which case we'll convert it
+%% to a `p_srvr_dict()'.
+
+new(EpochNum, MyName, MembersDict0, Down_list, UPI_list, Repairing_list,
Dbg, Dbg2)
when is_integer(EpochNum), EpochNum >= 0,
is_atom(MyName) orelse is_binary(MyName),
- is_list(All_list0), is_list(Down_list), is_list(UPI_list),
+ is_list(MembersDict0), is_list(Down_list), is_list(UPI_list),
is_list(Repairing_list), is_list(Dbg), is_list(Dbg2) ->
- {All_list, MemberDict} =
- case lists:all(fun(P) when is_record(P, p_srvr) -> true;
- (_) -> false
- end, All_list0) of
- true ->
- All = [S#p_srvr.name || S <- All_list0],
- TmpL = [{S#p_srvr.name, S} || S <- All_list0],
- {All, orddict:from_list(TmpL)};
- false ->
- All_list1 = lists:zip(All_list0,lists:seq(0,length(All_list0)-1)),
- All_list2 = [#p_srvr{name=S, address="localhost",
- port=?MACHI_DEFAULT_TCP_PORT+I} ||
- {S, I} <- All_list1],
- TmpL = [{S#p_srvr.name, S} || S <- All_list2],
- {All_list0, orddict:from_list(TmpL)}
- end,
+ MembersDict = make_members_dict(MembersDict0),
+ All_list = [Name || {Name, _P} <- MembersDict],
true = lists:all(fun(X) when is_atom(X) orelse is_binary(X) -> true;
(_) -> false
end, All_list),
@@ -79,23 +79,34 @@ new(EpochNum, MyName, All_list0, Down_list, UPI_list, Repairing_list,
creation_time=now(),
author_server=MyName,
all_members=All_list,
- member_dict=MemberDict,
+ members_dict=MembersDict,
down=Down_list,
upi=UPI_list,
repairing=Repairing_list,
dbg=Dbg
},
- update_projection_dbg2(update_projection_checksum(P), Dbg2).
+ update_dbg2(update_checksum(P), Dbg2).
-update_projection_checksum(P) ->
+%% @doc Update the checksum element of a projection record.
+
+update_checksum(P) ->
CSum = crypto:hash(sha,
term_to_binary(P#projection_v1{epoch_csum= <<>>,
dbg2=[]})),
P#projection_v1{epoch_csum=CSum}.
-update_projection_dbg2(P, Dbg2) when is_list(Dbg2) ->
+%% @doc Update the `dbg2' element of a projection record.
+
+update_dbg2(P, Dbg2) when is_list(Dbg2) ->
P#projection_v1{dbg2=Dbg2}.
+%% @doc Compare two projection records for equality (assuming that the
+%% checksum element has been correctly calculated).
+%%
+%% The name "compare" is probably too close to "rank"? This
+%% comparison has nothing to do with projection ranking.
+%% TODO: change the name of this function?
+
-spec compare(#projection_v1{}, #projection_v1{}) ->
integer().
compare(#projection_v1{epoch_number=E1, epoch_csum=C1},
@@ -107,13 +118,48 @@ compare(#projection_v1{epoch_number=E1},
E1 > E2 -> 1
end.
-make_projection_summary(#projection_v1{epoch_number=EpochNum,
- all_members=_All_list,
- down=Down_list,
- author_server=Author,
- upi=UPI_list,
- repairing=Repairing_list,
- dbg=Dbg, dbg2=Dbg2}) ->
+%% @doc Create a proplist-style summary of a projection record.
+
+make_summary(#projection_v1{epoch_number=EpochNum,
+ all_members=_All_list,
+ down=Down_list,
+ author_server=Author,
+ upi=UPI_list,
+ repairing=Repairing_list,
+ dbg=Dbg, dbg2=Dbg2}) ->
[{epoch,EpochNum},{author,Author},
{upi,UPI_list},{repair,Repairing_list},{down,Down_list},
{d,Dbg}, {d2,Dbg2}].
+
+%% @doc Make a `p_srvr_dict()' out of a list of `p_srvr()' or out of a
+%% `p_srvr_dict()'.
+%%
+%% If `Ps' is a `p_srvr_dict()', then this function is usually a
+%% no-op. However, if someone has tampered with the list and screwed
+%% up its order, then we should fix it so `orddict' can work
+%% correctly.
+%%
+%% If `Ps' is simply `list(p_srvr())', in which case we'll convert it
+%% to a `p_srvr_dict()'.
+
+-spec make_members_dict(list(p_srvr()) | p_srvr_dict()) ->
+ p_srvr_dict().
+make_members_dict(Ps) ->
+ F_rec = fun(P) when is_record(P, p_srvr) -> true;
+ (_) -> false
+ end,
+ F_tup = fun({_K, P}) when is_record(P, p_srvr) -> true;
+ (_) -> false
+ end,
+ case lists:all(F_rec, Ps) of
+ true ->
+ orddict:from_list([{P#p_srvr.name, P} || P <- Ps]);
+ false ->
+ case lists:all(F_tup, Ps) of
+ true ->
+ orddict:from_list(Ps);
+ false ->
+ F_neither = fun(X) -> not (F_rec(X) or F_tup(X)) end,
+ exit({badarg, {make_members_dict, lists:filter(F_neither, Ps)}})
+ end
+ end.
diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl
index c88a21b..4a68aa1 100644
--- a/src/machi_projection_store.erl
+++ b/src/machi_projection_store.erl
@@ -18,6 +18,25 @@
%%
%% -------------------------------------------------------------------
+%% @doc The Machi write-once projection store service.
+%%
+%% This API is gen_server-style message passing, intended for use
+%% within a single Erlang node to glue together the projection store
+%% server with the node-local process that implements Machi's TCP
+%% client access protocol (on the "server side" of the TCP connection).
+%%
+%% All Machi client access to the projection store SHOULD NOT use this
+%% module's API.
+%%
+%% The projection store is implemented by an Erlang/OTP `gen_server'
+%% process that is associated with each FLU. Conceptually, the
+%% projection store is an array of write-once registers. For each
+%% projection store register, the key is a 2-tuple of an epoch number
+%% (`non_neg_integer()' type) and a projection type (`public' or
+%% `private' type); the value is a projection data structure
+%% (`projection_v1()' type).
+
+
-module(machi_projection_store).
-include("machi_projection.hrl").
@@ -29,52 +48,79 @@
read_latest_projection/2, read_latest_projection/3,
read/3, read/4,
write/3, write/4,
- get_all/2, get_all/3,
- list_all/2, list_all/3
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3
]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
+-define(NO_EPOCH, {-1,<<0:(20*8)/big>>}).
+
-record(state, {
public_dir = "" :: string(),
private_dir = "" :: string(),
wedged = true :: boolean(),
wedge_notify_pid :: pid() | atom(),
- max_public_epoch = -1 :: -1 | non_neg_integer(),
- max_private_epoch = -1 :: -1 | non_neg_integer()
+ max_public_epoch = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()},
+ max_private_epoch = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()}
}).
+%% @doc Start a new projection store server.
+%%
+%% The `DataDir' argument should be the same directory as specified
+%% for use by our companion FLU data server -- all file system paths
+%% used by this server are intended to be stored underneath a common
+%% file system parent directory as the FLU data server & sequencer
+%% servers.
+
start_link(RegName, DataDir, NotifyWedgeStateChanges) ->
gen_server:start_link({local, RegName},
?MODULE, [DataDir, NotifyWedgeStateChanges], []).
+%% @doc Fetch the latest epoch number + checksum for type `ProjType'.
+
get_latest_epoch(PidSpec, ProjType) ->
get_latest_epoch(PidSpec, ProjType, infinity).
+%% @doc Fetch the latest epoch number + checksum for type `ProjType'.
+%% projection.
+
get_latest_epoch(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {get_latest_epoch, ProjType}, Timeout).
+%% @doc Fetch the latest projection record for type `ProjType'.
+
read_latest_projection(PidSpec, ProjType) ->
read_latest_projection(PidSpec, ProjType, infinity).
+%% @doc Fetch the latest projection record for type `ProjType'.
+
read_latest_projection(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
g_call(PidSpec, {read_latest_projection, ProjType}, Timeout).
+%% @doc Fetch the projection record type `ProjType' for epoch number `Epoch' .
+
read(PidSpec, ProjType, Epoch) ->
read(PidSpec, ProjType, Epoch, infinity).
+%% @doc Fetch the projection record type `ProjType' for epoch number `Epoch' .
+
read(PidSpec, ProjType, Epoch, Timeout)
when ProjType == 'public' orelse ProjType == 'private',
is_integer(Epoch), Epoch >= 0 ->
g_call(PidSpec, {read, ProjType, Epoch}, Timeout).
+%% @doc Write the projection record type `ProjType' for epoch number `Epoch' .
+
write(PidSpec, ProjType, Proj) ->
write(PidSpec, ProjType, Proj, infinity).
+%% @doc Write the projection record type `ProjType' for epoch number `Epoch' .
+
write(PidSpec, ProjType, Proj, Timeout)
when ProjType == 'public' orelse ProjType == 'private',
is_record(Proj, projection_v1),
@@ -82,19 +128,27 @@ write(PidSpec, ProjType, Proj, Timeout)
Proj#projection_v1.epoch_number >= 0 ->
g_call(PidSpec, {write, ProjType, Proj}, Timeout).
-get_all(PidSpec, ProjType) ->
- get_all(PidSpec, ProjType, infinity).
+%% @doc Fetch all projection records of type `ProjType'.
-get_all(PidSpec, ProjType, Timeout)
+get_all_projections(PidSpec, ProjType) ->
+ get_all_projections(PidSpec, ProjType, infinity).
+
+%% @doc Fetch all projection records of type `ProjType'.
+
+get_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
- g_call(PidSpec, {get_all, ProjType}, Timeout).
+ g_call(PidSpec, {get_all_projections, ProjType}, Timeout).
-list_all(PidSpec, ProjType) ->
- list_all(PidSpec, ProjType, infinity).
+%% @doc Fetch all projection epoch numbers of type `ProjType'.
-list_all(PidSpec, ProjType, Timeout)
+list_all_projections(PidSpec, ProjType) ->
+ list_all_projections(PidSpec, ProjType, infinity).
+
+%% @doc Fetch all projection epoch numbers of type `ProjType'.
+
+list_all_projections(PidSpec, ProjType, Timeout)
when ProjType == 'public' orelse ProjType == 'private' ->
- g_call(PidSpec, {list_all, ProjType}, Timeout).
+ g_call(PidSpec, {list_all_projections, ProjType}, Timeout).
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -124,16 +178,16 @@ init([DataDir, NotifyWedgeStateChanges]) ->
handle_call({{get_latest_epoch, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
- Epoch = if ProjType == public -> S#state.max_public_epoch;
- ProjType == private -> S#state.max_private_epoch
- end,
- {reply, {{ok, Epoch}, LC2}, S};
+ EpochT = if ProjType == public -> S#state.max_public_epoch;
+ ProjType == private -> S#state.max_private_epoch
+ end,
+ {reply, {{ok, EpochT}, LC2}, S};
handle_call({{read_latest_projection, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
- Epoch = if ProjType == public -> S#state.max_public_epoch;
- ProjType == private -> S#state.max_private_epoch
+ {EpochNum, _CSum} = if ProjType == public -> S#state.max_public_epoch;
+ ProjType == private -> S#state.max_private_epoch
end,
- {Reply, NewS} = do_proj_read(ProjType, Epoch, S),
+ {Reply, NewS} = do_proj_read(ProjType, EpochNum, S),
{reply, {Reply, LC2}, NewS};
handle_call({{read, ProjType, Epoch}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
@@ -143,7 +197,7 @@ handle_call({{write, ProjType, Proj}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
{Reply, NewS} = do_proj_write(ProjType, Proj, S),
{reply, {Reply, LC2}, NewS};
-handle_call({{get_all, ProjType}, LC1}, _From, S) ->
+handle_call({{get_all_projections, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
Dir = pick_path(ProjType, S),
Epochs = find_all(Dir),
@@ -152,7 +206,7 @@ handle_call({{get_all, ProjType}, LC1}, _From, S) ->
Proj
end || Epoch <- Epochs],
{reply, {{ok, All}, LC2}, S};
-handle_call({{list_all, ProjType}, LC1}, _From, S) ->
+handle_call({{list_all_projections, ProjType}, LC1}, _From, S) ->
LC2 = lclock_update(LC1),
Dir = pick_path(ProjType, S),
{reply, {{ok, find_all(Dir)}, LC2}, S};
@@ -176,17 +230,21 @@ code_change(_OldVsn, S, _Extra) ->
do_proj_read(_ProjType, Epoch, S) when Epoch < 0 ->
{{error, not_written}, S};
-do_proj_read(ProjType, Epoch, S) ->
- Dir = pick_path(ProjType, S),
+do_proj_read(ProjType, Epoch, S_or_Dir) ->
+ Dir = if is_record(S_or_Dir, state) ->
+ pick_path(ProjType, S_or_Dir);
+ is_list(S_or_Dir) ->
+ S_or_Dir
+ end,
Path = filename:join(Dir, epoch2name(Epoch)),
case file:read_file(Path) of
{ok, Bin} ->
%% TODO and if Bin is corrupt? (even if binary_to_term() succeeds)
- {{ok, binary_to_term(Bin)}, S};
+ {{ok, binary_to_term(Bin)}, S_or_Dir};
{error, enoent} ->
- {{error, not_written}, S};
+ {{error, not_written}, S_or_Dir};
{error, Else} ->
- {{error, Else}, S}
+ {{error, Else}, S_or_Dir}
end.
do_proj_write(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
@@ -201,12 +259,15 @@ do_proj_write(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
ok = file:write(FH, term_to_binary(Proj)),
ok = file:sync(FH),
ok = file:close(FH),
- NewS = if ProjType == public, Epoch > S#state.max_public_epoch ->
- io:format(user, "TODO: tell ~p we are wedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
- S#state{max_public_epoch=Epoch, wedged=true};
- ProjType == private, Epoch > S#state.max_private_epoch ->
- io:format(user, "TODO: tell ~p we are unwedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
- S#state{max_private_epoch=Epoch, wedged=false};
+ EpochT = {Epoch, Proj#projection_v1.epoch_csum},
+ NewS = if ProjType == public,
+ Epoch > element(1, S#state.max_public_epoch) ->
+ %io:format(user, "TODO: tell ~p we are wedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
+ S#state{max_public_epoch=EpochT, wedged=true};
+ ProjType == private,
+ Epoch > element(1, S#state.max_private_epoch) ->
+ %io:format(user, "TODO: tell ~p we are unwedged by epoch ~p\n", [S#state.wedge_notify_pid, Epoch]),
+ S#state{max_private_epoch=EpochT, wedged=false};
true ->
S
end,
@@ -233,9 +294,11 @@ find_all(Dir) ->
find_max_epoch(Dir) ->
Fs = lists:sort(filelib:wildcard("*", Dir)),
if Fs == [] ->
- -1;
+ ?NO_EPOCH;
true ->
- name2epoch(lists:last(Fs))
+ EpochNum = name2epoch(lists:last(Fs)),
+ {{ok, Proj}, _} = do_proj_read(proj_type_ignored, EpochNum, Dir),
+ {EpochNum, Proj}
end.
%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl
new file mode 100644
index 0000000..ba3a3d2
--- /dev/null
+++ b/src/machi_proxy_flu1_client.erl
@@ -0,0 +1,309 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% @doc Erlang API for the Machi FLU TCP protocol version 1, with a
+%% proxy-process style API for hiding messy details such as TCP
+%% connection/disconnection with the remote Machi server.
+%%
+%% Machi is intentionally avoiding using distributed Erlang for
+%% Machi's communication. This design decision makes Erlang-side code
+%% more difficult & complex, but it's the price to pay for some
+%% language independence. Later in Machi's life cycle, we need to
+%% (re-)implement some components in a non-Erlang/BEAM-based language.
+%%
+%% This module implements a "man in the middle" proxy between the
+%% Erlang client and Machi server (which is on the "far side" of a TCP
+%% connection to somewhere). This proxy process will always execute
+%% on the same Erlang node as the Erlang client that uses it. The
+%% proxy is intended to be a stable, long-lived process that survives
+%% TCP communication problems with the remote server.
+
+-module(machi_proxy_flu1_client).
+
+-behaviour(gen_server).
+
+-include("machi.hrl").
+-include("machi_projection.hrl").
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif. % TEST.
+
+-export([start_link/1]).
+%% FLU1 API
+-export([
+ %% File API
+ append_chunk/4, append_chunk/5,
+ read_chunk/5, read_chunk/6,
+ checksum_list/3, checksum_list/4,
+ list_files/2, list_files/3,
+
+ %% %% Projection API
+ get_latest_epoch/2, get_latest_epoch/3,
+ read_latest_projection/2, read_latest_projection/3,
+ read_projection/3, read_projection/4,
+ write_projection/3, write_projection/4,
+ get_all_projections/2, get_all_projections/3,
+ list_all_projections/2, list_all_projections/3,
+
+ %% Common API
+ quit/1
+ ]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-define(FLU_C, machi_flu1_client).
+
+-record(state, {
+ i :: #p_srvr{},
+ sock :: 'undefined' | port()
+ }).
+
+%% @doc Start a local, long-lived process that will be our steady
+%% & reliable communication proxy with the fickle & flaky
+%% remote Machi server.
+
+start_link(#p_srvr{}=I) ->
+ gen_server:start_link(?MODULE, [I], []).
+
+%% @doc Append a chunk (binary- or iolist-style) of data to a file
+%% with `Prefix'.
+
+append_chunk(PidSpec, EpochID, Prefix, Chunk) ->
+ append_chunk(PidSpec, EpochID, Prefix, Chunk, infinity).
+
+%% @doc Append a chunk (binary- or iolist-style) of data to a file
+%% with `Prefix'.
+
+append_chunk(PidSpec, EpochID, Prefix, Chunk, Timeout) ->
+ gen_server:call(PidSpec, {req, {append_chunk, EpochID, Prefix, Chunk}},
+ Timeout).
+
+%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
+
+read_chunk(PidSpec, EpochID, File, Offset, Size) ->
+ read_chunk(PidSpec, EpochID, File, Offset, Size, infinity).
+
+%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
+
+read_chunk(PidSpec, EpochID, File, Offset, Size, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_chunk, EpochID, File, Offset, Size}},
+ Timeout).
+
+%% @doc Fetch the list of chunk checksums for `File'.
+
+checksum_list(PidSpec, EpochID, File) ->
+ checksum_list(PidSpec, EpochID, File, infinity).
+
+%% @doc Fetch the list of chunk checksums for `File'.
+
+checksum_list(PidSpec, EpochID, File, Timeout) ->
+ gen_server:call(PidSpec, {req, {checksum_list, EpochID, File}},
+ Timeout).
+
+%% @doc Fetch the list of all files on the remote FLU.
+
+list_files(PidSpec, EpochID) ->
+ list_files(PidSpec, EpochID, infinity).
+
+%% @doc Fetch the list of all files on the remote FLU.
+
+list_files(PidSpec, EpochID, Timeout) ->
+ gen_server:call(PidSpec, {req, {list_files, EpochID}},
+ Timeout).
+
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
+
+get_latest_epoch(PidSpec, ProjType) ->
+ get_latest_epoch(PidSpec, ProjType, infinity).
+
+%% @doc Get the latest epoch number + checksum from the FLU's projection store.
+
+get_latest_epoch(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {get_latest_epoch, ProjType}},
+ Timeout).
+
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
+
+read_latest_projection(PidSpec, ProjType) ->
+ read_latest_projection(PidSpec, ProjType, infinity).
+
+%% @doc Get the latest projection from the FLU's projection store for `ProjType'
+
+read_latest_projection(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_latest_projection, ProjType}},
+ Timeout).
+
+%% @doc Read a projection `Proj' of type `ProjType'.
+
+read_projection(PidSpec, ProjType, Epoch) ->
+ read_projection(PidSpec, ProjType, Epoch, infinity).
+
+%% @doc Read a projection `Proj' of type `ProjType'.
+
+read_projection(PidSpec, ProjType, Epoch, Timeout) ->
+ gen_server:call(PidSpec, {req, {read_projection, ProjType, Epoch}},
+ Timeout).
+
+%% @doc Write a projection `Proj' of type `ProjType'.
+
+write_projection(PidSpec, ProjType, Proj) ->
+ write_projection(PidSpec, ProjType, Proj, infinity).
+
+%% @doc Write a projection `Proj' of type `ProjType'.
+
+write_projection(PidSpec, ProjType, Proj, Timeout) ->
+ gen_server:call(PidSpec, {req, {write_projection, ProjType, Proj}},
+ Timeout).
+
+%% @doc Get all projections from the FLU's projection store.
+
+get_all_projections(PidSpec, ProjType) ->
+ get_all_projections(PidSpec, ProjType, infinity).
+
+%% @doc Get all projections from the FLU's projection store.
+
+get_all_projections(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {get_all_projections, ProjType}},
+ Timeout).
+
+%% @doc Get all epoch numbers from the FLU's projection store.
+
+list_all_projections(PidSpec, ProjType) ->
+ list_all_projections(PidSpec, ProjType, infinity).
+
+%% @doc Get all epoch numbers from the FLU's projection store.
+
+list_all_projections(PidSpec, ProjType, Timeout) ->
+ gen_server:call(PidSpec, {req, {list_all_projections, ProjType}},
+ Timeout).
+
+%% @doc Quit & close the connection to remote FLU and stop our
+%% proxy process.
+
+quit(PidSpec) ->
+ gen_server:call(PidSpec, quit, infinity).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init([I]) ->
+ S0 = #state{i=I},
+ S1 = try_connect(S0),
+ {ok, S1}.
+
+handle_call({req, Req}, _From, S) ->
+ {Reply, NewS} = do_req(Req, S),
+ {reply, Reply, NewS};
+handle_call(quit, _From, S) ->
+ {stop, normal, ok, disconnect(S)};
+handle_call(_Request, _From, S) ->
+ Reply = ok,
+ {reply, Reply, S}.
+
+handle_cast(_Msg, S) ->
+ {noreply, S}.
+
+handle_info(_Info, S) ->
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+do_req(Req, S) ->
+ S2 = try_connect(S),
+ Fun = make_req_fun(Req, S2),
+ case connected_p(S2) of
+ true ->
+ case Fun() of
+ T when element(1, T) == ok ->
+ {T, S2};
+ Else ->
+ case get(bad_sock) of
+ Bad when Bad == S2#state.sock ->
+ {Else, disconnect(S2)};
+ _ ->
+ {Else, S2}
+ end
+ end;
+ false ->
+ {{error, not_connected}, S2}
+ end.
+
+make_req_fun({append_chunk, EpochID, Prefix, Chunk}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:append_chunk(Sock, EpochID, Prefix, Chunk) end;
+make_req_fun({read_chunk, EpochID, File, Offset, Size}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_chunk(Sock, EpochID, File, Offset, Size) end;
+make_req_fun({checksum_list, EpochID, File}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:checksum_list(Sock, EpochID, File) end;
+make_req_fun({list_files, EpochID}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:list_files(Sock, EpochID) end;
+make_req_fun({get_latest_epoch, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:get_latest_epoch(Sock, ProjType) end;
+make_req_fun({read_latest_projection, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_latest_projection(Sock, ProjType) end;
+make_req_fun({read_projection, ProjType, Epoch}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:read_projection(Sock, ProjType, Epoch) end;
+make_req_fun({write_projection, ProjType, Proj}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:write_projection(Sock, ProjType, Proj) end;
+make_req_fun({get_all_projections, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:get_all_projections(Sock, ProjType) end;
+make_req_fun({list_all_projections, ProjType}, #state{sock=Sock}) ->
+ fun() -> ?FLU_C:list_all_projections(Sock, ProjType) end.
+
+connected_p(#state{sock=SockMaybe,
+ i=#p_srvr{proto=ipv4}=_I}=_S) ->
+ is_port(SockMaybe);
+connected_p(#state{i=#p_srvr{proto=disterl,
+ name=_NodeName}=_I}=_S) ->
+ true.
+ %% case net_adm:ping(NodeName) of
+ %% ping ->
+ %% true;
+ %% _ ->
+ %% false
+ %% end.
+
+try_connect(#state{sock=undefined,
+ i=#p_srvr{proto=ipv4, address=Host, port=TcpPort}=_I}=S) ->
+ try
+ Sock = machi_util:connect(Host, TcpPort),
+ S#state{sock=Sock}
+ catch
+ _:_ ->
+ S
+ end;
+try_connect(S) ->
+ %% If we're connection-based, we're already connected.
+ %% If we're not connection-based, then there's nothing to do.
+ S.
+
+disconnect(#state{sock=Sock,
+ i=#p_srvr{proto=ipv4}=_I}=S) ->
+ (catch gen_tcp:close(Sock)),
+ S#state{sock=undefined};
+disconnect(S) ->
+ S.
diff --git a/src/machi_sequencer.erl b/src/machi_sequencer.erl
index ddd81a5..4d1116d 100644
--- a/src/machi_sequencer.erl
+++ b/src/machi_sequencer.erl
@@ -18,6 +18,9 @@
%%
%% -------------------------------------------------------------------
+%% @doc "Mothballed" sequencer code, perhaps to be reused sometime in
+%% the future?
+
-module(machi_sequencer).
-compile(export_all).
diff --git a/src/machi_sup.erl b/src/machi_sup.erl
index dcaadbe..31fcc9b 100644
--- a/src/machi_sup.erl
+++ b/src/machi_sup.erl
@@ -18,6 +18,8 @@
%%
%% -------------------------------------------------------------------
+%% @doc Top Machi application supervisor.
+
-module(machi_sup).
-behaviour(supervisor).
diff --git a/src/machi_util.erl b/src/machi_util.erl
index 1331d11..6025c25 100644
--- a/src/machi_util.erl
+++ b/src/machi_util.erl
@@ -18,20 +18,24 @@
%%
%% -------------------------------------------------------------------
+%% @doc Miscellaneous utility functions.
+
-module(machi_util).
-export([
- checksum/1,
+ checksum_chunk/1,
hexstr_to_bin/1, bin_to_hexstr/1,
hexstr_to_int/1, int_to_hexstr/2, int_to_hexbin/2,
make_binary/1, make_string/1,
make_regname/1,
- make_checksum_filename/2, make_data_filename/2,
+ make_config_filename/2,
+ make_checksum_filename/4, make_checksum_filename/2,
+ make_data_filename/4, make_data_filename/2,
make_projection_filename/2,
read_max_filenum/2, increment_max_filenum/2,
info_msg/2, verb/1, verb/2,
%% TCP protocol helpers
- connect/2
+ connect/2, connect/3
]).
-compile(export_all).
@@ -39,33 +43,54 @@
-include("machi_projection.hrl").
-include_lib("kernel/include/file.hrl").
-append(Server, Prefix, Chunk) when is_binary(Prefix), is_binary(Chunk) ->
- CSum = checksum(Chunk),
- Server ! {seq_append, self(), Prefix, Chunk, CSum},
- receive
- {assignment, Offset, File} ->
- {Offset, File}
- after 10*1000 ->
- bummer
- end.
+%% @doc Create a registered name atom for FLU sequencer internal
+%% rendezvous/message passing use.
+-spec make_regname(binary()|list()) ->
+ atom().
make_regname(Prefix) when is_binary(Prefix) ->
erlang:binary_to_atom(Prefix, latin1);
make_regname(Prefix) when is_list(Prefix) ->
erlang:list_to_atom(Prefix).
+%% @doc Calculate a config file path, by common convention.
+
+-spec make_config_filename(string(), string()) ->
+ string().
make_config_filename(DataDir, Prefix) ->
lists:flatten(io_lib:format("~s/config/~s", [DataDir, Prefix])).
+%% @doc Calculate a checksum file path, by common convention.
+
+-spec make_checksum_filename(string(), string(), atom()|string()|binary(), integer()) ->
+ string().
make_checksum_filename(DataDir, Prefix, SequencerName, FileNum) ->
lists:flatten(io_lib:format("~s/config/~s.~s.~w.csum",
[DataDir, Prefix, SequencerName, FileNum])).
+%% @doc Calculate a checksum file path, by common convention.
+
+-spec make_checksum_filename(string(), [] | string() | binary()) ->
+ string().
make_checksum_filename(DataDir, "") ->
lists:flatten(io_lib:format("~s/config", [DataDir]));
make_checksum_filename(DataDir, FileName) ->
lists:flatten(io_lib:format("~s/config/~s.csum", [DataDir, FileName])).
+%% @doc Calculate a file data file path, by common convention.
+
+-spec make_data_filename(string(), string(), atom()|string()|binary(), integer()) ->
+ {binary(), string()}.
+make_data_filename(DataDir, Prefix, SequencerName, FileNum) ->
+ File = erlang:iolist_to_binary(io_lib:format("~s.~s.~w",
+ [Prefix, SequencerName, FileNum])),
+ FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
+ {File, FullPath}.
+
+%% @doc Calculate a file data file path, by common convention.
+
+-spec make_data_filename(string(), [] | string() | binary()) ->
+ {binary(), string()}.
make_data_filename(DataDir, "") ->
FullPath = lists:flatten(io_lib:format("~s/data", [DataDir])),
{"", FullPath};
@@ -73,17 +98,20 @@ make_data_filename(DataDir, File) ->
FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
{File, FullPath}.
-make_data_filename(DataDir, Prefix, SequencerName, FileNum) ->
- File = erlang:iolist_to_binary(io_lib:format("~s.~s.~w",
- [Prefix, SequencerName, FileNum])),
- FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
- {File, FullPath}.
+%% @doc Calculate a projection store file path, by common convention.
+-spec make_projection_filename(string(), [] | string()) ->
+ string().
make_projection_filename(DataDir, "") ->
lists:flatten(io_lib:format("~s/projection", [DataDir]));
make_projection_filename(DataDir, File) ->
lists:flatten(io_lib:format("~s/projection/~s", [DataDir, File])).
+%% @doc Read the file size of a config file, which is used as the
+%% basis for a minimum sequence number.
+
+-spec read_max_filenum(string(), string()) ->
+ non_neg_integer().
read_max_filenum(DataDir, Prefix) ->
case file:read_file_info(make_config_filename(DataDir, Prefix)) of
{error, enoent} ->
@@ -92,6 +120,11 @@ read_max_filenum(DataDir, Prefix) ->
FI#file_info.size
end.
+%% @doc Increase the file size of a config file, which is used as the
+%% basis for a minimum sequence number.
+
+-spec increment_max_filenum(string(), string()) ->
+ ok | {error, term()}.
increment_max_filenum(DataDir, Prefix) ->
try
{ok, FH} = file:open(make_config_filename(DataDir, Prefix), [append]),
@@ -100,9 +133,13 @@ increment_max_filenum(DataDir, Prefix) ->
ok = file:close(FH)
catch
error:{badmatch,_}=Error ->
- {error, Error, erlang:get_stacktrace()}
+ {error, {Error, erlang:get_stacktrace()}}
end.
+%% @doc Convert a hexadecimal string to a `binary()'.
+
+-spec hexstr_to_bin(string() | binary()) ->
+ binary().
hexstr_to_bin(S) when is_list(S) ->
hexstr_to_bin(S, []);
hexstr_to_bin(B) when is_binary(B) ->
@@ -114,6 +151,10 @@ hexstr_to_bin([X,Y|T], Acc) ->
{ok, [V], []} = io_lib:fread("~16u", [X,Y]),
hexstr_to_bin(T, [V | Acc]).
+%% @doc Convert a `binary()' to a hexadecimal string.
+
+-spec bin_to_hexstr(binary()) ->
+ string().
bin_to_hexstr(<<>>) ->
[];
bin_to_hexstr(<>) ->
@@ -124,40 +165,75 @@ hex_digit(X) when X < 10 ->
hex_digit(X) ->
X - 10 + $a.
+%% @doc Convert a compatible Erlang data type into a `binary()' equivalent.
+
+-spec make_binary(binary() | iolist()) ->
+ binary().
make_binary(X) when is_binary(X) ->
X;
make_binary(X) when is_list(X) ->
iolist_to_binary(X).
+%% @doc Convert a compatible Erlang data type into a `string()' equivalent.
+
+-spec make_string(binary() | iolist()) ->
+ string().
make_string(X) when is_list(X) ->
lists:flatten(X);
make_string(X) when is_binary(X) ->
binary_to_list(X).
+%% @doc Convert a hexadecimal string to an integer.
+
+-spec hexstr_to_int(string() | binary()) ->
+ non_neg_integer().
hexstr_to_int(X) ->
B = hexstr_to_bin(X),
B_size = byte_size(B) * 8,
<> = B,
I.
+%% @doc Convert an integer into a hexadecimal string whose length is
+%% based on `I_size'.
+
+-spec int_to_hexstr(non_neg_integer(), non_neg_integer()) ->
+ string().
int_to_hexstr(I, I_size) ->
bin_to_hexstr(<>).
+%% @doc Convert an integer into a hexadecimal string (in `binary()'
+%% form) whose length is based on `I_size'.
+
+-spec int_to_hexbin(non_neg_integer(), non_neg_integer()) ->
+ binary().
int_to_hexbin(I, I_size) ->
list_to_binary(int_to_hexstr(I, I_size)).
-checksum(Bin) when is_binary(Bin) ->
- crypto:hash(md5, Bin).
+%% @doc Calculate a checksum for a chunk of file data.
+-spec checksum_chunk(binary() | iolist()) ->
+ binary().
+checksum_chunk(Chunk) when is_binary(Chunk); is_list(Chunk) ->
+ crypto:hash(sha, Chunk).
+
+%% @doc Log a verbose message.
+
+-spec verb(string()) -> term().
verb(Fmt) ->
verb(Fmt, []).
+%% @doc Log a verbose message.
+
+-spec verb(string(), list()) -> term().
verb(Fmt, Args) ->
case application:get_env(kernel, verbose) of
{ok, true} -> io:format(Fmt, Args);
_ -> ok
end.
+%% @doc Log an 'info' level message.
+
+-spec info_msg(string(), list()) -> term().
info_msg(Fmt, Args) ->
case application:get_env(kernel, verbose) of {ok, false} -> ok;
_ -> error_logger:info_msg(Fmt, Args)
@@ -165,16 +241,26 @@ info_msg(Fmt, Args) ->
%%%%%%%%%%%%%%%%%
+%% @doc Create a TCP connection to a remote Machi server.
+
-spec connect(inet:ip_address() | inet:hostname(), inet:port_number()) ->
port().
connect(Host, Port) ->
- escript_connect(Host, Port).
+ escript_connect(Host, Port, 4500).
-escript_connect(Host, PortStr) when is_list(PortStr) ->
+%% @doc Create a TCP connection to a remote Machi server.
+
+-spec connect(inet:ip_address() | inet:hostname(), inet:port_number(),
+ timeout()) ->
+ port().
+connect(Host, Port, Timeout) ->
+ escript_connect(Host, Port, Timeout).
+
+escript_connect(Host, PortStr, Timeout) when is_list(PortStr) ->
Port = list_to_integer(PortStr),
- escript_connect(Host, Port);
-escript_connect(Host, Port) when is_integer(Port) ->
+ escript_connect(Host, Port, Timeout);
+escript_connect(Host, Port, Timeout) when is_integer(Port) ->
{ok, Sock} = gen_tcp:connect(Host, Port, [{active,false}, {mode,binary},
- {packet, raw}]),
+ {packet, raw}], Timeout),
Sock.
diff --git a/test/machi_chain_manager1_converge_demo.erl b/test/machi_chain_manager1_converge_demo.erl
new file mode 100644
index 0000000..1d2c537
--- /dev/null
+++ b/test/machi_chain_manager1_converge_demo.erl
@@ -0,0 +1,447 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1_converge_demo).
+
+-include("machi.hrl").
+-include("machi_projection.hrl").
+
+-define(MGR, machi_chain_manager1).
+
+-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
+-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+-define(FLU_C, machi_flu1_client).
+-define(FLU_PC, machi_proxy_flu1_client).
+
+-compile(export_all).
+
+-ifdef(TEST).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+%% -include_lib("eqc/include/eqc_statem.hrl").
+-define(QC_OUT(P),
+ eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
+-endif.
+
+-include_lib("eunit/include/eunit.hrl").
+
+short_doc() ->
+"
+A visualization of the convergence behavior of the chain self-management
+algorithm for Machi.
+ 1. Set up 4 FLUs and chain manager pairs.
+ 2. Create a number of different network partition scenarios, where
+ (simulated) partitions may be symmetric or asymmetric. Then halt changing
+ the partitions and keep the simulated network stable and broken.
+ 3. Run a number of iterations of the algorithm in parallel by poking each
+ of the manager processes on a random'ish basis.
+ 4. Afterward, fetch the chain transition changes made by each FLU and
+ verify that no transition was unsafe.
+
+During the iteration periods, the following is a cheatsheet for the output.
+See the internal source for interpreting the rest of the output.
+
+ 'Let loose the dogs of war!' Network instability
+ 'SET partitions = ' Network stability (but broken)
+ 'x uses:' The FLU x has made an internal state transition. The rest of
+ the line is a dump of internal state.
+ '{t}' This is a tick event which triggers one of the manager processes
+ to evaluate its environment and perhaps make a state transition.
+
+A long chain of '{t}{t}{t}{t}' means that the chain state has settled
+to a stable configuration, which is the goal of the algorithm.
+Press control-c to interrupt....".
+
+long_doc() ->
+ "
+'Let loose the dogs of war!'
+
+ The simulated network is very unstable for a few seconds.
+
+'x uses'
+
+ After a single iteration, server x has determined that the chain
+ should be defined by the upi, repair, and down list in this record.
+ If all participants reach the same conclusion at the same epoch
+ number (and checksum, see next item below), then the chain is
+ stable, fully configured, and can provide full service.
+
+'epoch,E'
+
+ The epoch number for this decision is E. The checksum of the full
+ record is not shown. For purposes of the protocol, a server will
+ 'wedge' itself and refuse service (until a new config is chosen)
+ whenever: a). it sees a bigger epoch number mentioned somewhere, or
+ b). it sees the same epoch number but a different checksum. In case
+ of b), there was a network partition that has healed, and both sides
+ had chosen to operate with an identical epoch number but different
+ chain configs.
+
+'upi', 'repair', and 'down'
+
+ Members in the chain that are fully in sync and thus preserving the
+ Update Propagation Invariant, up but under repair (simulated), and
+ down, respectively.
+
+'ps,[some list]'
+
+ The list of asymmetric network partitions. {a,b} means that a
+ cannot send to b, but b can send to a.
+
+ This partition list is recorded for debugging purposes but is *not*
+ used by the algorithm. The algorithm only 'feels' its effects via
+ simulated timeout whenever there's a partition in one of the
+ messaging directions.
+
+'nodes_up,[list]'
+
+ The best guess right now of which ndoes are up, relative to the
+ author node, specified by '{author,X}'
+
+'SET partitions = [some list]'
+
+ All subsequent iterations should have a stable list of partitions,
+ i.e. the 'ps' list described should be stable.
+
+'{FLAP: x flaps n}!'
+
+ Server x has detected that it's flapping/oscillating after iteration
+ n of a naive/1st draft detection algorithm.
+".
+
+%% convergence_demo_test_() ->
+%% {timeout, 98*300, fun() -> convergence_demo_testfun() end}.
+
+%% convergence_demo_testfun() ->
+%% convergence_demo_testfun(3).
+
+t() ->
+ t(3).
+
+t(N) ->
+ convergence_demo_testfun(N).
+
+convergence_demo_testfun(NumFLUs) ->
+ timer:sleep(100),
+ %% Faster test startup, commented: io:format(user, short_doc(), []),
+ %% Faster test startup, commented: timer:sleep(3000),
+
+ TcpPort = 62877,
+ FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"},
+ {c,TcpPort+2,"./data.c"}, {d,TcpPort+3,"./data.d"},
+ {e,TcpPort+4,"./data.e"}, {f,TcpPort+5,"./data.f"}],
+ FLU_biglist = [X || {X,_,_} <- FluInfo],
+ All_list = lists:sublist(FLU_biglist, NumFLUs),
+ io:format(user, "\nSET # of FLus = ~w members ~w).\n",
+ [NumFLUs, All_list]),
+ machi_partition_simulator:start_link({111,222,33}, 0, 100),
+ _ = machi_partition_simulator:get(All_list),
+
+ Ps = [#p_srvr{name=Name,address="localhost",port=Port} ||
+ {Name,Port,_Dir} <- lists:sublist(FluInfo, NumFLUs)],
+ PsDirs = lists:zip(Ps,
+ [Dir || {_,_,Dir} <- lists:sublist(FluInfo, NumFLUs)]),
+ FLU_pids = [machi_flu1_test:setup_test_flu(Name, Port, Dir) ||
+ {#p_srvr{name=Name,port=Port}, Dir} <- PsDirs],
+ Namez = [begin
+ {ok, PPid} = ?FLU_PC:start_link(P),
+ {Name, PPid}
+ end || {#p_srvr{name=Name}=P, _Dir} <- PsDirs],
+ MembersDict = machi_projection:make_members_dict(Ps),
+ MgrOpts = [private_write_verbose, {active_mode,false}],
+ MgrNamez =
+ [begin
+ {ok, MPid} = ?MGR:start_link(P#p_srvr.name, MembersDict, MgrOpts),
+ {P#p_srvr.name, MPid}
+ end || P <- Ps],
+
+ try
+ [{_, Ma}|_] = MgrNamez,
+ {ok, P1} = ?MGR:test_calc_projection(Ma, false),
+ [ok = ?FLU_PC:write_projection(FLUPid, public, P1) ||
+ {_, FLUPid} <- Namez, FLUPid /= Ma],
+
+ machi_partition_simulator:reset_thresholds(10, 50),
+ _ = machi_partition_simulator:get(All_list),
+
+ Parent = self(),
+ DoIt = fun(Iters, S_min, S_max) ->
+ io:format(user, "\nDoIt: top\n\n", []),
+ Pids = [spawn(fun() ->
+ random:seed(now()),
+ [begin
+ erlang:yield(),
+ S_max_rand = random:uniform(
+ S_max + 1),
+ io:format(user, "{t}", []),
+ Elapsed =
+ ?MGR:sleep_ranked_order(
+ S_min, S_max_rand,
+ M_name, All_list),
+ _ = ?MGR:test_react_to_env(MMM),
+ %% if M_name == d ->
+ %% [_ = ?MGR:test_react_to_env(MMM) ||
+ %% _ <- lists:seq(1,3)],
+ %% superunfair;
+ %% true ->
+ %% ok
+ %% end,
+ %% Be more unfair by not
+ %% sleeping here.
+ %% timer:sleep(S_max - Elapsed),
+ Elapsed
+ end || _ <- lists:seq(1, Iters)],
+ Parent ! done
+ end) || {M_name, MMM} <- MgrNamez ],
+ [receive
+ done ->
+ ok
+ after 995000 ->
+ exit(icky_timeout)
+ end || _ <- Pids]
+ end,
+
+ _XandYs1 = [[{X,Y}] || X <- All_list, Y <- All_list, X /= Y],
+ _XandYs2 = [[{X,Y}, {A,B}] || X <- All_list, Y <- All_list, X /= Y,
+ A <- All_list, B <- All_list, A /= B,
+ X /= A],
+ _XandYs3 = [[{X,Y}, {A,B}, {C,D}] || X <- All_list, Y <- All_list, X /= Y,
+ A <- All_list, B <- All_list, A /= B,
+ C <- All_list, D <- All_list, C /= D,
+ X /= A, X /= C, A /= C],
+ %% AllPartitionCombinations = _XandYs1 ++ _XandYs2,
+ %% AllPartitionCombinations = _XandYs3,
+ AllPartitionCombinations = _XandYs1 ++ _XandYs2 ++ _XandYs3,
+ ?D({?LINE, length(AllPartitionCombinations)}),
+
+ machi_partition_simulator:reset_thresholds(10, 50),
+ io:format(user, "\nLet loose the dogs of war!\n", []),
+ DoIt(30, 0, 0),
+ [begin
+ %% io:format(user, "\nSET partitions = ~w.\n", [ [] ]),machi_partition_simulator:no_partitions(),
+ %% [DoIt(50, 10, 100) || _ <- [1,2,3]],
+ io:format(user, "\nLet loose the dogs of war!\n", []),
+ DoIt(30, 0, 0),
+ io:format(user, "\nSET partitions = ~w.\n", [ [] ]),machi_partition_simulator:no_partitions(),
+ [DoIt(10, 10, 100) || _ <- [1]],
+
+ %% machi_partition_simulator:reset_thresholds(10, 50),
+ %% io:format(user, "\nLet loose the dogs of war!\n", []),
+ %% DoIt(30, 0, 0),
+
+ machi_partition_simulator:always_these_partitions(Partition),
+ io:format(user, "\nSET partitions = ~w.\n", [Partition]),
+ [DoIt(50, 10, 100) || _ <- [1,2,3,4] ],
+ _PPP =
+ [begin
+ {ok, PPPallPubs} = ?FLU_PC:list_all_projections(FLU,public),
+ [begin
+ {ok, Pr} = todo_why_does_this_crash_sometimes(
+ FLUName, FLU, PPPepoch),
+ {Pr#projection_v1.epoch_number, FLUName, Pr}
+ end || PPPepoch <- PPPallPubs]
+ end || {FLUName, FLU} <- Namez],
+ %% io:format(user, "PPP ~p\n", [lists:sort(lists:append(_PPP))]),
+
+ %%%%%%%% {stable,true} = {stable,private_projections_are_stable(Namez, DoIt)},
+ {hosed_ok,true} = {hosed_ok,all_hosed_lists_are_identical(Namez, Partition)},
+ io:format(user, "\nSweet, all_hosed are identical-or-islands-inconclusive.\n", []),
+ timer:sleep(1000),
+ ok
+ %% end || Partition <- AllPartitionCombinations
+ %% end || Partition <- [ [{a,b},{b,d},{c,b}],
+ %% [{a,b},{b,d},{c,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
+ %% %% [{a,b},{b,d},{c,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
+ %% [{a,b},{b,d},{c,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}],
+ %% [{a,b},{b,d},{c,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]
+ end || Partition <- [ [{a,b}, {b,c}],
+ [{a,b}, {c,b}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}] ] %% hosed-not-equal @ 3 FLUs
+ %% end || Partition <- [ [{b,d}] ]
+ %% end || Partition <- [ [{a,b}, {b,a}] ]
+ %% end || Partition <- [ [{a,b}, {b,a}, {a,c},{c,a}] ]
+ %% end || Partition <- [ [{a,b}],
+ %% [{b,a}] ]
+ %% end || Partition <- [ [{a,b}, {c,b}],
+ %% [{a,b}, {b,c}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}, {c,d}],
+ %% [{a,b}, {b,c},{b,d}, {c,d}],
+ %% [{b,a}, {b,c}, {c,d}],
+ %% [{a,b}, {c,b}, {c,d}],
+ %% [{a,b}, {b,c}, {d,c}] ]
+ %% end || Partition <- [ [{a,b}, {b,c}, {c,d}, {d,e}],
+ %% [{b,a}, {b,c}, {c,d}, {d,e}],
+ %% [{a,b}, {c,b}, {c,d}, {d,e}],
+ %% [{a,b}, {b,c}, {d,c}, {d,e}],
+ %% [{a,b}, {b,c}, {c,d}, {e,d}] ]
+ %% end || Partition <- [ [{c,a}] ]
+ %% end || Partition <- [ [{c,a}], [{c,b}, {a, b}] ]
+ %% end || Partition <- [ [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}],
+ %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {b,c}],
+ %% [{a,b},{b,a}, {a,c},{c,a}, {a,d},{d,a}, {c,d}] ]
+ %% end || Partition <- [ [{a,b}],
+ %% [{a,b}, {a,b},{b,a},{a,c},{c,a},{a,d},{d,a}],
+ %% [{a,b}, {b,a},{a,b},{b,c},{c,b},{b,d},{d,b}],
+ %% [{a,b}, {c,a},{a,c},{c,b},{b,c},{c,d},{d,c}],
+ %% [{a,b}, {d,a},{a,d},{d,b},{b,d},{d,c},{c,d}] ]
+ ],
+ %% exit(end_experiment),
+
+ io:format(user, "\nSET partitions = []\n", []),
+ io:format(user, "We should see convergence to 1 correct chain.\n", []),
+ machi_partition_simulator:no_partitions(),
+ [DoIt(50, 10, 100) || _ <- [1]],
+ io:format(user, "Sweet, finishing early\n", []), exit(yoyoyo_testing_hack),
+ %% WARNING: In asymmetric partitions, private_projections_are_stable()
+ %% will never be true; code beyond this point on the -exp3
+ %% branch is bit-rotted, sorry!
+ true = private_projections_are_stable(Namez, DoIt),
+ io:format(user, "~s\n", [os:cmd("date")]),
+
+ %% We are stable now ... analyze it.
+
+ %% Create a report where at least one FLU has written a
+ %% private projection.
+ Report = machi_chain_manager1_test:unanimous_report(Namez),
+ %% ?D(Report),
+
+ %% Report is ordered by Epoch. For each private projection
+ %% written during any given epoch, confirm that all chain
+ %% members appear in only one unique chain, i.e., the sets of
+ %% unique chains are disjoint.
+ true = machi_chain_manager1_test:all_reports_are_disjoint(Report),
+
+ %% Given the report, we flip it around so that we observe the
+ %% sets of chain transitions relative to each FLU.
+ R_Chains = [machi_chain_manager1_test:extract_chains_relative_to_flu(
+ FLU, Report) || FLU <- All_list],
+ %% ?D(R_Chains),
+ R_Projs = [{FLU, [machi_chain_manager1_test:chain_to_projection(
+ FLU, Epoch, UPI, Repairing, All_list) ||
+ {Epoch, UPI, Repairing} <- E_Chains]} ||
+ {FLU, E_Chains} <- R_Chains],
+
+ %% For each chain transition experienced by a particular FLU,
+ %% confirm that each state transition is OK.
+ try
+ [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane(Psx, FLU)} ||
+ {FLU, Psx} <- R_Projs],
+ io:format(user, "\nAll sanity checks pass, hooray!\n", [])
+ catch _Err:_What ->
+ io:format(user, "Report ~p\n", [Report]),
+ exit({line, ?LINE, _Err, _What})
+ end,
+ %% ?D(R_Projs),
+
+ ok
+ catch
+ XX:YY ->
+ io:format(user, "BUMMER ~p ~p @ ~p\n",
+ [XX, YY, erlang:get_stacktrace()]),
+ exit({bummer,XX,YY})
+ after
+ [ok = ?MGR:stop(MgrPid) || {_, MgrPid} <- MgrNamez],
+ [ok = ?FLU_PC:quit(PPid) || {_, PPid} <- Namez],
+ [ok = machi_flu1:stop(FLUPid) || FLUPid <- FLU_pids],
+ ok = machi_partition_simulator:stop()
+ end.
+
+todo_why_does_this_crash_sometimes(FLUName, FLU, PPPepoch) ->
+ try
+ {ok, _}=Res = ?FLU_PC:read_projection(FLU, public, PPPepoch),
+ Res
+ catch _:_ ->
+ io:format(user, "QQQ Whoa, it crashed this time for ~p at epoch ~p\n",
+ [FLUName, PPPepoch]),
+ timer:sleep(1000),
+ ?FLU_PC:read_projection(FLU, public, PPPepoch)
+ end.
+
+private_projections_are_stable(Namez, PollFunc) ->
+ Private1 = [?FLU_PC:get_latest_epoch(FLU, private) ||
+ {_Name, FLU} <- Namez],
+ PollFunc(5, 1, 10),
+ Private2 = [?FLU_PC:get_latest_epoch(FLU, private) ||
+ {_Name, FLU} <- Namez],
+ true = (Private1 == Private2).
+
+all_hosed_lists_are_identical(Namez, Partition0) ->
+ Partition = lists:usort(Partition0),
+ Ps = [element(2,?FLU_PC:read_latest_projection(FLU, private)) ||
+ {_Name, FLU} <- Namez],
+ UniqueAllHoseds = lists:usort([machi_chain_manager1:get_all_hosed(P) ||
+ {ok, P} <- Ps]),
+ Members = [M || {M, _Pid} <- Namez],
+ Islands = machi_partition_simulator:partitions2num_islands(
+ Members, Partition),
+ %% io:format(user, "all_hosed_lists_are_identical:\n", []),
+ %% io:format(user, " Uniques = ~p Islands ~p\n Partition ~p\n",
+ %% [Uniques, Islands, Partition]),
+ case length(UniqueAllHoseds) of
+ 1 ->
+ true;
+ %% TODO: With the addition of the digraph stuff below, the clause
+ %% below probably isn't necessary anymore, since the
+ %% digraph calculation should catch complete partition islands?
+ _ when Islands == 'many' ->
+ %% There are at least two partitions, so yes, it's quite
+ %% possible that the all_hosed lists may differ.
+ %% TODO Fix this up to be smarter about fully-isolated
+ %% islands of partition.
+ true;
+ _ ->
+ DG = digraph:new(),
+ Connection = machi_partition_simulator:partition2connection(
+ Members, Partition),
+ [digraph:add_vertex(DG, X) || X <- Members],
+ [digraph:add_edge(DG, X, Y) || {X,Y} <- Connection],
+ Any =
+ lists:any(
+ fun(X) ->
+ NotX = Members -- [X],
+ lists:any(
+ fun(Y) ->
+ %% There must be a shortest path of length
+ %% two in both directions, otherwise
+ %% the read projection call will fail.
+ %% And it's that failure that we're
+ %% interested in here.
+ XtoY = digraph:get_short_path(DG, X, Y),
+ YtoX = digraph:get_short_path(DG, Y, X),
+ (XtoY == false orelse
+ length(XtoY) > 2)
+ orelse
+ (YtoX == false orelse
+ length(YtoX) > 2)
+ end, NotX)
+ end, Members),
+ digraph:delete(DG),
+ if Any == true ->
+ %% There's a missing path of length 2 between some
+ %% two FLUs, so yes, there's going to be
+ %% non-identical all_hosed lists.
+ true;
+ true ->
+ false % There's no excuse, buddy
+ end
+ end.
+-endif. % TEST
diff --git a/test/machi_chain_manager1_pulse.erl b/test/machi_chain_manager1_pulse.erl
new file mode 100644
index 0000000..b95cf00
--- /dev/null
+++ b/test/machi_chain_manager1_pulse.erl
@@ -0,0 +1,379 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1_pulse).
+
+%% The while module is ifdef:ed, rebar should set PULSE
+-ifdef(PULSE).
+
+-compile(export_all).
+
+-include_lib("eqc/include/eqc.hrl").
+-include_lib("eqc/include/eqc_statem.hrl").
+
+-include("machi.hrl").
+
+-include_lib("eunit/include/eunit.hrl").
+
+-compile({parse_transform, pulse_instrument}).
+-compile({pulse_replace_module, [{application, pulse_application}]}).
+%% The following functions contains side_effects but are run outside
+%% PULSE, i.e. PULSE needs to leave them alone
+-compile({pulse_skip,[{prop_pulse_test_,0}]}).
+-compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}).
+
+%% Used for output within EUnit...
+-define(QC_FMT(Fmt, Args),
+ io:format(user, Fmt, Args)).
+
+%% And to force EUnit to output QuickCheck output...
+-define(QC_OUT(P),
+ eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)).
+
+-define(MGR, machi_chain_manager1).
+-define(MGRTEST, machi_chain_manager1_test).
+
+-record(state, {
+ step=0,
+ num_pids,
+ pids,
+ dump_state
+ }).
+
+initial_state() ->
+ #state{}.
+
+gen_num_pids() ->
+ choose(2, 5).
+
+gen_seed() ->
+ noshrink({choose(1, 10000), choose(1, 10000), choose(1, 10000)}).
+
+gen_old_threshold() ->
+ noshrink(choose(1, 100)).
+
+gen_no_partition_threshold() ->
+ noshrink(choose(1, 100)).
+
+command(#state{step=0}) ->
+ {call, ?MODULE, setup, [gen_num_pids(), gen_seed()]};
+command(S) ->
+ frequency([
+ { 1, {call, ?MODULE, change_partitions,
+ [gen_old_threshold(), gen_no_partition_threshold()]}},
+ {50, {call, ?MODULE, do_ticks,
+ [choose(5, 100), S#state.pids,
+ gen_old_threshold(), gen_no_partition_threshold()]}}
+ ]).
+
+precondition(_S, _) ->
+ true.
+
+next_state(#state{step=Step}=S, Res, Call) ->
+ next_state2(S#state{step=Step + 1}, Res, Call).
+
+next_state2(S, Res, {call, _, setup, [NumPids, _Seed]}) ->
+ S#state{num_pids=NumPids, pids=Res};
+next_state2(S, Res, {call, _, dump_state, _Args}) ->
+ S#state{dump_state=Res};
+next_state2(S, _Res, {call, _, _Func, _Args}) ->
+ S.
+
+postcondition(_S, {call, _, _Func, _Args}, _Res) ->
+ true.
+
+all_list() ->
+ [a,b,c].
+ %% [a,b,c,d,e].
+
+setup(_Num, Seed) ->
+ ?QC_FMT("\nsetup,", []),
+ All_list = all_list(),
+ _ = machi_partition_simulator:start_link(Seed, 0, 100),
+ _Partitions = machi_partition_simulator:get(All_list),
+
+ FLU_pids = [begin
+ {ok, FLUPid} = machi_flu0:start_link(Name),
+ _ = machi_flu0:get_epoch(FLUPid),
+ FLUPid
+ end || Name <- All_list],
+ Namez = lists:zip(All_list, FLU_pids),
+ Mgr_pids = [begin
+ {ok, Mgr} = ?MGR:start_link(Name, All_list, FLU_pid),
+ Mgr
+ end || {Name, FLU_pid} <- Namez],
+ timer:sleep(1),
+ {ok, P1} = ?MGR:test_calc_projection(hd(Mgr_pids), false),
+ P1Epoch = P1#projection.epoch_number,
+ [ok = machi_flu0:proj_write(FLU, P1Epoch, public, P1) || FLU <- FLU_pids],
+ [?MGR:test_react_to_env(Mgr) || Mgr <- Mgr_pids],
+
+ Res = {FLU_pids, Mgr_pids},
+ put(manager_pids_hack, Res),
+ Res.
+
+change_partitions(OldThreshold, NoPartitionThreshold) ->
+ machi_partition_simulator:reset_thresholds(OldThreshold,
+ NoPartitionThreshold).
+
+always_last_partitions() ->
+ machi_partition_simulator:always_last_partitions().
+
+private_stable_check(FLUs) ->
+ {_FLU_pids, Mgr_pids} = get(manager_pids_hack),
+ Res = private_projections_are_stable_check(FLUs, Mgr_pids),
+ if not Res ->
+ io:format(user, "BUMMER: private stable check failed!\n", []);
+ true ->
+ ok
+ end,
+ Res.
+
+do_ticks(Num, PidsMaybe, OldThreshold, NoPartitionThreshold) ->
+ io:format(user, "~p,~p,~p|", [Num, OldThreshold, NoPartitionThreshold]),
+ {_FLU_pids, Mgr_pids} = case PidsMaybe of
+ undefined -> get(manager_pids_hack);
+ _ -> PidsMaybe
+ end,
+ if is_integer(OldThreshold) ->
+ machi_partition_simulator:reset_thresholds(OldThreshold,
+ NoPartitionThreshold);
+ true ->
+ ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number()]),
+ machi_partition_simulator:no_partitions()
+ end,
+ Res = exec_ticks(Num, Mgr_pids),
+ if not is_integer(OldThreshold) ->
+ ?QC_FMT("{e=~w},", [get_biggest_private_epoch_number()]);
+ true ->
+ ok
+ end,
+ Res.
+
+get_biggest_private_epoch_number() ->
+ lists:last(
+ lists:usort(
+ lists:flatten(
+ [machi_flu0:proj_list_all(FLU, private) ||
+ FLU <- all_list()]))).
+
+dump_state() ->
+ try
+ ?QC_FMT("dump_state(", []),
+ {FLU_pids, _Mgr_pids} = get(manager_pids_hack),
+ Namez = zip(all_list(), FLU_pids),
+ Report = ?MGRTEST:unanimous_report(Namez),
+ %% ?QC_FMT("Report ~p\n", [Report]),
+
+ Diag1 = [begin
+ Ps = machi_flu0:proj_get_all(FLU, Type),
+ [io_lib:format("~p ~p ~p: ~w\n", [FLUName, Type, P#projection.epoch_number, ?MGR:make_projection_summary(P)]) || P <- Ps]
+ end || {FLUName, FLU} <- Namez,
+ Type <- [public] ],
+
+ UniquePrivateEs =
+ lists:usort(lists:flatten(
+ [machi_flu0:proj_list_all(FLU, private) ||
+ {_FLUName, FLU} <- Namez])),
+ P_lists0 = [{FLUName, Type, machi_flu0:proj_get_all(FLUPid, Type)} ||
+ {FLUName, FLUPid} <- Namez, Type <- [public,private]],
+ P_lists = [{FLUName, Type, P} || {FLUName, Type, Ps} <- P_lists0,
+ P <- Ps],
+ AllDict = lists:foldl(fun({FLU, Type, P}, D) ->
+ K = {FLU, Type, P#projection.epoch_number},
+ dict:store(K, P, D)
+ end, dict:new(), lists:flatten(P_lists)),
+ DumbFinderBackward =
+ fun(FLUName) ->
+ fun(E, error_unwritten) ->
+ case dict:find({FLUName, private, E}, AllDict) of
+ {ok, T} -> T;
+ error -> error_unwritten
+ end;
+ %% case machi_flu0:proj_read(FLU, E, private) of
+ %% {ok, T} -> T;
+ %% Else -> Else
+ %% end;
+ (_E, Acc) ->
+ Acc
+ end
+ end,
+ Diag2 = [[
+ io_lib:format("~p private: ~w\n",
+ [FLUName,
+ ?MGR:make_projection_summary(
+ lists:foldl(DumbFinderBackward(FLUName),
+ error_unwritten,
+ lists:seq(Epoch, 0, -1)))])
+ || {FLUName, _FLU} <- Namez]
+ || Epoch <- UniquePrivateEs],
+
+ ?QC_FMT(")", []),
+ {Report, lists:flatten([Diag1, Diag2])}
+ catch XX:YY ->
+ ?QC_FMT("OUCH: ~p ~p @ ~p\n", [XX, YY, erlang:get_stacktrace()])
+ end.
+
+prop_pulse() ->
+ ?FORALL({Cmds0, Seed}, {non_empty(commands(?MODULE)), pulse:seed()},
+ ?IMPLIES(1 < length(Cmds0) andalso length(Cmds0) < 5,
+ begin
+ ok = shutdown_hard(),
+ %% PULSE can be really unfair, of course, including having exec_ticks
+ %% run where all of FLU a does its ticks then FLU b. Such a situation
+ %% doesn't always allow unanimous private projection store values:
+ %% FLU a might need one more tick to write its private projection, but
+ %% it isn't given a chance at the end of the PULSE run. So we cheat
+ Stabilize1 = [{set,{var,99999995},
+ {call, ?MODULE, always_last_partitions, []}}],
+ Stabilize2 = [{set,{var,99999996},
+ {call, ?MODULE, private_stable_check, [all_list()]}}],
+ LastTriggerTicks = {set,{var,99999997},
+ {call, ?MODULE, do_ticks, [25, undefined, no, no]}},
+ Cmds1 = lists:duplicate(2, LastTriggerTicks),
+ %% Cmds1 = lists:duplicate(length(all_list())*2, LastTriggerTicks),
+ Cmds = Cmds0 ++
+ Stabilize1 ++
+ Cmds1 ++
+ Stabilize2 ++
+ [{set,{var,99999999}, {call, ?MODULE, dump_state, []}}],
+ {_H2, S2, Res} = pulse:run(
+ fun() ->
+ {_H, _S, _R} = run_commands(?MODULE, Cmds)
+ end, [{seed, Seed},
+ {strategy, unfair}]),
+ ok = shutdown_hard(),
+
+ {Report, Diag} = S2#state.dump_state,
+
+ %% Report is ordered by Epoch. For each private projection
+ %% written during any given epoch, confirm that all chain
+ %% members appear in only one unique chain, i.e., the sets of
+ %% unique chains are disjoint.
+ AllDisjointP = ?MGRTEST:all_reports_are_disjoint(Report),
+
+ %% Given the report, we flip it around so that we observe the
+ %% sets of chain transitions relative to each FLU.
+ R_Chains = [?MGRTEST:extract_chains_relative_to_flu(FLU, Report) ||
+ FLU <- all_list()],
+ R_Projs = [{FLU, [?MGRTEST:chain_to_projection(
+ FLU, Epoch, UPI, Repairing, all_list()) ||
+ {Epoch, UPI, Repairing} <- E_Chains]} ||
+ {FLU, E_Chains} <- R_Chains],
+
+ %% For each chain transition experienced by a particular FLU,
+ %% confirm that each state transition is OK.
+ Sane =
+ [{FLU,_SaneRes} = {FLU,?MGR:projection_transitions_are_sane_retrospective(
+ Ps, FLU)} ||
+ {FLU, Ps} <- R_Projs],
+ SaneP = lists:all(fun({_FLU, SaneRes}) -> SaneRes == true end, Sane),
+
+ %% The final report item should say that all are agreed_membership.
+ {_LastEpoch, {ok_disjoint, LastRepXs}} = lists:last(Report),
+ AgreedOrNot = lists:usort([element(1, X) || X <- LastRepXs]),
+
+ %% TODO: Check that we've converged to a single chain with no repairs.
+ SingleChainNoRepair = case LastRepXs of
+ [{agreed_membership,{_UPI,[]}}] ->
+ true;
+ _ ->
+ LastRepXs
+ end,
+
+ ?WHENFAIL(
+ begin
+ ?QC_FMT("Res = ~p\n", [Res]),
+ ?QC_FMT("Diag = ~s\n", [Diag]),
+ ?QC_FMT("Report = ~p\n", [Report]),
+ ?QC_FMT("Sane = ~p\n", [Sane]),
+ ?QC_FMT("SingleChainNoRepair failure =\n ~p\n", [SingleChainNoRepair])
+ end,
+ conjunction([{res, Res == true orelse Res == ok},
+ {all_disjoint, AllDisjointP},
+ {sane, SaneP},
+ {all_agreed_at_end, AgreedOrNot == [agreed_membership]},
+ {single_chain_no_repair, SingleChainNoRepair}
+ ]))
+ end)).
+
+prop_pulse_test_() ->
+ Timeout = case os:getenv("PULSE_TIME") of
+ false -> 60;
+ Val -> list_to_integer(Val)
+ end,
+ ExtraTO = case os:getenv("PULSE_SHRINK_TIME") of
+ false -> 0;
+ Val2 -> list_to_integer(Val2)
+ end,
+ {timeout, (Timeout+ExtraTO+300), % 300 = a bit more fudge time
+ fun() ->
+ ?assert(eqc:quickcheck(eqc:testing_time(Timeout,
+ ?QC_OUT(prop_pulse()))))
+ end}.
+
+shutdown_hard() ->
+ (catch machi_partition_simulator:stop()),
+ [(catch machi_flu0:stop(X)) || X <- all_list()],
+ timer:sleep(1),
+ (catch exit(whereis(machi_partition_simulator), kill)),
+ [(catch exit(whereis(X), kill)) || X <- all_list()],
+ erlang:yield(),
+ ok.
+
+exec_ticks(Num, Mgr_pids) ->
+ Parent = self(),
+ Pids = [spawn_link(fun() ->
+ [begin
+ erlang:yield(),
+ Max = 10,
+ Elapsed =
+ ?MGR:sleep_ranked_order(1, Max, M_name, all_list()),
+ Res = ?MGR:test_react_to_env(MMM),
+ timer:sleep(erlang:max(0, Max - Elapsed)),
+ Res=Res %% ?D({self(), Res})
+ end || _ <- lists:seq(1,Num)],
+ Parent ! done
+ end) || {M_name, MMM} <- lists:zip(all_list(), Mgr_pids) ],
+ [receive
+ done ->
+ ok
+ after 5000 ->
+ exit(icky_timeout)
+ end || _ <- Pids],
+ ok.
+
+private_projections_are_stable_check(All_list, Mgr_pids) ->
+ %% TODO: extend the check to look not only for latest num, but
+ %% also check for flapping, and if yes, to see if all_hosed are
+ %% all exactly equal.
+
+ _ = exec_ticks(40, Mgr_pids),
+ Private1 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ FLU <- All_list],
+ _ = exec_ticks(5, Mgr_pids),
+ Private2 = [machi_flu0:proj_get_latest_num(FLU, private) ||
+ FLU <- All_list],
+
+ (Private1 == Private2).
+
+
+-endif. % PULSE
diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl
new file mode 100644
index 0000000..89f586a
--- /dev/null
+++ b/test/machi_chain_manager1_test.erl
@@ -0,0 +1,259 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_chain_manager1_test).
+
+-include("machi.hrl").
+-include("machi_projection.hrl").
+
+-define(MGR, machi_chain_manager1).
+
+-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
+-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
+-define(FLU_C, machi_flu1_client).
+-define(FLU_PC, machi_proxy_flu1_client).
+
+-export([]).
+
+-ifdef(TEST).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+%% -include_lib("eqc/include/eqc_statem.hrl").
+-define(QC_OUT(P),
+ eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
+-endif.
+
+-include_lib("eunit/include/eunit.hrl").
+-compile(export_all).
+
+unanimous_report(Namez) ->
+ UniquePrivateEs =
+ lists:usort(lists:flatten(
+ [element(2, ?FLU_PC:list_all_projections(FLU, private)) ||
+ {_FLUName, FLU} <- Namez])),
+ [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
+
+unanimous_report(Epoch, Namez) ->
+ Projs = [{FLUName, case ?FLU_PC:read_projection(FLU, private, Epoch) of
+ {ok, T} -> T;
+ _Else -> not_in_this_epoch
+ end} || {FLUName, FLU} <- Namez],
+ UPI_R_Sums = [{Proj#projection_v1.upi, Proj#projection_v1.repairing,
+ Proj#projection_v1.epoch_csum} ||
+ {_FLUname, Proj} <- Projs,
+ is_record(Proj, projection_v1)],
+ UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
+ Res =
+ [begin
+ case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
+ U == UPI]) of
+ [_1CSum] ->
+ %% Yay, there's only 1 checksum. Let's check
+ %% that all FLUs are in agreement.
+ {UPI, Repairing, _CSum} =
+ lists:keyfind(UPI, 1, UPI_R_Sums),
+ %% TODO: make certain that this subtlety doesn't get
+ %% last in later implementations.
+
+ %% So, this is a bit of a tricky thing. If we're at
+ %% upi=[c] and repairing=[a,b], then the transition
+ %% (eventually!) to upi=[c,a] does not currently depend
+ %% on b being an active participant in the repair.
+ %%
+ %% Yes, b's state is very important for making certain
+ %% that all repair operations succeed both to a & b.
+ %% However, in this simulation, we only consider that
+ %% the head(Repairing) is sane. Therefore, we use only
+ %% the "HeadOfRepairing" in our considerations here.
+ HeadOfRepairing = case Repairing of
+ [H_Rep|_] ->
+ [H_Rep];
+ _ ->
+ []
+ end,
+ Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
+ P when is_record(P, projection_v1) ->
+ P#projection_v1.epoch_csum;
+ Else ->
+ Else
+ end} || FLU <- UPI ++ HeadOfRepairing],
+ case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
+ [_] ->
+ {agreed_membership, {UPI, Repairing}};
+ Else2 ->
+ {not_agreed, {UPI, Repairing}, Else2}
+ end;
+ _Else ->
+ {UPI, not_unique, Epoch, _Else}
+ end
+ end || UPI <- UniqueUPIs],
+ AgreedResUPI_Rs = [UPI++Repairing ||
+ {agreed_membership, {UPI, Repairing}} <- Res],
+ Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
+ lists:sort(lists:flatten(AgreedResUPI_Rs)) of
+ true ->
+ ok_disjoint;
+ false ->
+ bummer_NOT_DISJOINT
+ end,
+ {Epoch, {Tag, Res}}.
+
+all_reports_are_disjoint(Report) ->
+ [] == [X || {_Epoch, Tuple}=X <- Report,
+ element(1, Tuple) /= ok_disjoint].
+
+extract_chains_relative_to_flu(FLU, Report) ->
+ {FLU, [{Epoch, UPI, Repairing} ||
+ {Epoch, {ok_disjoint, Es}} <- Report,
+ {agreed_membership, {UPI, Repairing}} <- Es,
+ lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
+
+chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
+ exit({todo_broken_fixme,?MODULE,?LINE}),
+ machi_projection:new(Epoch, MyName, All_list,
+ All_list -- (UPI_list ++ Repairing_list),
+ UPI_list, Repairing_list, []).
+
+-ifndef(PULSE).
+
+smoke0_test() ->
+ {ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50),
+ Host = "localhost",
+ TcpPort = 6623,
+ {ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]),
+ Pa = #p_srvr{name=a, proto=ipv4, address=Host, port=TcpPort},
+ Members_Dict = machi_projection:make_members_dict([Pa]),
+ %% Egadz, more racing on startup, yay. TODO fix.
+ timer:sleep(1),
+ {ok, FLUaP} = ?FLU_PC:start_link(Pa),
+ {ok, M0} = ?MGR:start_link(a, Members_Dict, [{active_mode, false}]),
+ _SockA = machi_util:connect(Host, TcpPort),
+ try
+ pong = ?MGR:ping(M0)
+ after
+ ok = ?MGR:stop(M0),
+ ok = machi_flu1:stop(FLUa),
+ ok = ?FLU_PC:quit(FLUaP),
+ ok = machi_partition_simulator:stop()
+ end.
+
+smoke1_test() ->
+ machi_partition_simulator:start_link({1,2,3}, 100, 0),
+ TcpPort = 62777,
+ FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}, {c,TcpPort+2,"./data.c"}],
+ P_s = [#p_srvr{name=Name, address="localhost", port=Port} ||
+ {Name,Port,_Dir} <- FluInfo],
+
+ [machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo],
+ FLUs = [element(2, machi_flu1:start_link([{Name,Port,Dir}])) ||
+ {Name,Port,Dir} <- FluInfo],
+ MembersDict = machi_projection:make_members_dict(P_s),
+ {ok, M0} = ?MGR:start_link(a, MembersDict, [{active_mode,false}]),
+ try
+ {ok, P1} = ?MGR:test_calc_projection(M0, false),
+ {local_write_result, ok,
+ {remote_write_results, [{b,ok},{c,ok}]}} =
+ ?MGR:test_write_public_projection(M0, P1),
+ {unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(M0, false),
+
+ ok
+ after
+ ok = ?MGR:stop(M0),
+ [ok = machi_flu1:stop(X) || X <- FLUs],
+ ok = machi_partition_simulator:stop()
+ end.
+
+nonunanimous_setup_and_fix_test() ->
+ %% TODO attack list:
+ %% __ Add start option to chain manager to be "passive" only, i.e.,
+ %% not immediately go to work on
+ %% 1. Start FLUs with full complement of FLU+proj+chmgr.
+ %% 2. Put each of them under a supervisor?
+ %% - Sup proc could be a created-specifically-for-test thing, perhaps?
+ %% Rather than relying on a supervisor with reg name + OTP app started
+ %% plus plus more more yaddayadda?
+ %% 3. Add projection catalog/orddict of #p_srvr records??
+ %% 4. Fix this test, etc etc.
+ machi_partition_simulator:start_link({1,2,3}, 100, 0),
+ TcpPort = 62877,
+ FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}],
+ P_s = [#p_srvr{name=Name, address="localhost", port=Port} ||
+ {Name,Port,_Dir} <- FluInfo],
+
+ [machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo],
+ FLUs = [element(2, machi_flu1:start_link([{Name,Port,Dir}])) ||
+ {Name,Port,Dir} <- FluInfo],
+ [Proxy_a, Proxy_b] = Proxies =
+ [element(2,?FLU_PC:start_link(P)) || P <- P_s],
+ MembersDict = machi_projection:make_members_dict(P_s),
+ XX = [],
+ %% XX = [{private_write_verbose,true}],
+ {ok, Ma} = ?MGR:start_link(a, MembersDict, [{active_mode, false}]++XX),
+ {ok, Mb} = ?MGR:start_link(b, MembersDict, [{active_mode, false}]++XX),
+ try
+ {ok, P1} = ?MGR:test_calc_projection(Ma, false),
+
+ P1a = machi_projection:update_checksum(
+ P1#projection_v1{down=[b], upi=[a], dbg=[{hackhack, ?LINE}]}),
+ P1b = machi_projection:update_checksum(
+ P1#projection_v1{author_server=b, creation_time=now(),
+ down=[a], upi=[b], dbg=[{hackhack, ?LINE}]}),
+ %% Scribble different projections
+ ok = ?FLU_PC:write_projection(Proxy_a, public, P1a),
+ ok = ?FLU_PC:write_projection(Proxy_b, public, P1b),
+
+ %% ?D(x),
+ {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
+ %% ?Dw(_XX),
+ {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
+ %% The read repair here doesn't automatically trigger the creation of
+ %% a new projection (to try to create a unanimous projection). So
+ %% we expect nothing to change when called again.
+ {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
+
+ {now_using, _, EpochNum_a} = ?MGR:test_react_to_env(Ma),
+ {no_change, _, EpochNum_a} = ?MGR:test_react_to_env(Ma),
+ {unanimous,P2,_E2} = ?MGR:test_read_latest_public_projection(Ma, false),
+ {ok, P2pa} = ?FLU_PC:read_latest_projection(Proxy_a, private),
+ P2 = P2pa#projection_v1{dbg2=[]},
+
+ %% FLUb should have nothing written to private because it hasn't
+ %% reacted yet.
+ {error, not_written} = ?FLU_PC:read_latest_projection(Proxy_b, private),
+
+ %% Poke FLUb to react ... should be using the same private proj
+ %% as FLUa.
+ {now_using, _, EpochNum_a} = ?MGR:test_react_to_env(Mb),
+ {ok, P2pb} = ?FLU_PC:read_latest_projection(Proxy_b, private),
+ P2 = P2pb#projection_v1{dbg2=[]},
+
+ ok
+ after
+ ok = ?MGR:stop(Ma),
+ ok = ?MGR:stop(Mb),
+ [ok = ?FLU_PC:quit(X) || X <- Proxies],
+ [ok = machi_flu1:stop(X) || X <- FLUs],
+ ok = machi_partition_simulator:stop()
+ end.
+
+-endif. % not PULSE
+-endif. % TEST
diff --git a/test/machi_flu1_test.erl b/test/machi_flu1_test.erl
index 136d6d0..b4580e2 100644
--- a/test/machi_flu1_test.erl
+++ b/test/machi_flu1_test.erl
@@ -33,7 +33,12 @@ setup_test_flu(RegName, TcpPort, DataDir) ->
setup_test_flu(RegName, TcpPort, DataDir, []).
setup_test_flu(RegName, TcpPort, DataDir, DbgProps) ->
- clean_up_data_dir(DataDir),
+ case proplists:get_value(save_data_dir, DbgProps) of
+ true ->
+ ok;
+ _ ->
+ clean_up_data_dir(DataDir)
+ end,
{ok, FLU1} = ?FLU:start_link([{RegName, TcpPort, DataDir},
{dbg, DbgProps}]),
@@ -125,19 +130,21 @@ flu_projection_smoke_test() ->
FLU1 = setup_test_flu(projection_test_flu, TcpPort, DataDir),
try
[begin
- {ok, -1} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
+ {ok, {-1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{error, not_written} =
?FLU_C:read_latest_projection(Host, TcpPort, T),
- {ok, []} = ?FLU_C:list_all(Host, TcpPort, T),
- {ok, []} = ?FLU_C:get_all(Host, TcpPort, T),
+ {ok, []} = ?FLU_C:list_all_projections(Host, TcpPort, T),
+ {ok, []} = ?FLU_C:get_all_projections(Host, TcpPort, T),
- P1 = machi_projection:new(1, a, [a], [], [a], [], []),
+ P_a = #p_srvr{name=a},
+ P1 = machi_projection:new(1, a, [P_a], [], [a], [], []),
ok = ?FLU_C:write_projection(Host, TcpPort, T, P1),
{error, written} = ?FLU_C:write_projection(Host, TcpPort, T, P1),
{ok, P1} = ?FLU_C:read_projection(Host, TcpPort, T, 1),
+ {ok, {1,_}} = ?FLU_C:get_latest_epoch(Host, TcpPort, T),
{ok, P1} = ?FLU_C:read_latest_projection(Host, TcpPort, T),
- {ok, [1]} = ?FLU_C:list_all(Host, TcpPort, T),
- {ok, [P1]} = ?FLU_C:get_all(Host, TcpPort, T),
+ {ok, [1]} = ?FLU_C:list_all_projections(Host, TcpPort, T),
+ {ok, [P1]} = ?FLU_C:get_all_projections(Host, TcpPort, T),
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort, T, 2)
end || T <- [public, private] ]
after
diff --git a/test/machi_partition_simulator.erl b/test/machi_partition_simulator.erl
new file mode 100644
index 0000000..fbbdcb9
--- /dev/null
+++ b/test/machi_partition_simulator.erl
@@ -0,0 +1,240 @@
+%% -------------------------------------------------------------------
+%%
+%% Machi: a small village of replicated files
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(machi_partition_simulator).
+
+-behaviour(gen_server).
+
+-ifdef(TEST).
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-endif.
+-ifdef(PULSE).
+-compile({parse_transform, pulse_instrument}).
+-endif.
+
+-export([start_link/3, stop/0,
+ get/1, reset_thresholds/2,
+ no_partitions/0, always_last_partitions/0, always_these_partitions/1]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([islands2partitions/1,
+ partition2connection/2,
+ connection2partition/2,
+ partitions2num_islands/2,
+ partition_list_is_symmetric_p/2]).
+
+-define(TAB, ?MODULE).
+
+-record(state, {
+ seed,
+ old_partitions,
+ old_threshold,
+ no_partition_threshold,
+ method=oneway_partitions :: 'island' | 'oneway_partitions'
+ }).
+
+start_link(Seed, OldThreshold, NoPartitionThreshold) ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE,
+ {Seed, OldThreshold, NoPartitionThreshold}, []).
+
+stop() ->
+ gen_server:call(?MODULE, {stop}, infinity).
+
+get(Nodes) ->
+ gen_server:call(?MODULE, {get, Nodes}, infinity).
+
+reset_thresholds(OldThreshold, NoPartitionThreshold) ->
+ gen_server:call(?MODULE, {reset_thresholds, OldThreshold, NoPartitionThreshold}, infinity).
+
+no_partitions() ->
+ reset_thresholds(-999, 999).
+
+always_last_partitions() ->
+ reset_thresholds(999, 0).
+
+always_these_partitions(Parts) ->
+ reset_thresholds(999, 0),
+ gen_server:call(?MODULE, {always_these_partitions, Parts}, infinity).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init({Seed, OldThreshold, NoPartitionThreshold}) ->
+ {ok, #state{seed=Seed,
+ old_partitions={[],[[]]},
+ old_threshold=OldThreshold,
+ no_partition_threshold=NoPartitionThreshold}}.
+
+handle_call({get, Nodes}, _From, S) ->
+ {Seed2, Partitions} =
+ calc_network_partitions(S#state.method,
+ Nodes,
+ S#state.seed,
+ S#state.old_partitions,
+ S#state.old_threshold,
+ S#state.no_partition_threshold),
+ {reply, Partitions, S#state{seed=Seed2,
+ old_partitions=Partitions}};
+handle_call({reset_thresholds, OldThreshold, NoPartitionThreshold}, _From, S) ->
+ {reply, ok, S#state{old_threshold=OldThreshold,
+ no_partition_threshold=NoPartitionThreshold}};
+handle_call({always_these_partitions, Parts}, _From, S) ->
+ {reply, ok, S#state{old_partitions={Parts,[na_reset_by_always]}}};
+handle_call({stop}, _From, S) ->
+ {stop, normal, ok, S}.
+
+handle_cast(_Cast, S) ->
+ {noreply, S}.
+
+handle_info(_Info, S) ->
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ ok.
+
+code_change(_OldVsn, S, _Extra) ->
+ {ok, S}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+calc_network_partitions(Method, Nodes, Seed1, OldPartition,
+ OldThreshold, NoPartitionThreshold) ->
+ {Cutoff2, Seed2} = random:uniform_s(100, Seed1),
+ if Cutoff2 < OldThreshold ->
+ {Seed2, OldPartition};
+ true ->
+ {Cutoff3, Seed3} = random:uniform_s(100, Seed1),
+ if Cutoff3 < NoPartitionThreshold ->
+ {Seed3, {[], [Nodes]}};
+ true ->
+ make_network_partition_locations(Method, Nodes, Seed3)
+ end
+ end.
+
+make_network_partition_locations(island=_Method, Nodes, Seed1) ->
+ Num = length(Nodes),
+ {Seed2, WeightsNodes} = lists:foldl(
+ fun(Node, {Seeda, Acc}) ->
+ {Cutoff0, Seedb} =
+ random:uniform_s(100, Seeda),
+ Cutoff = erlang:max(
+ 2, if Cutoff0 rem 4 == 0 ->
+ 0;
+ true ->
+ Cutoff0
+ end),
+ {Seedb, [{Cutoff, Node}|Acc]}
+ end, {Seed1, []}, Nodes),
+ IslandSep = 100 div Num,
+ Islands = [
+ lists:sort([Nd || {Weight, Nd} <- WeightsNodes,
+ (Max - IslandSep) =< Weight, Weight < Max])
+ || Max <- lists:seq(IslandSep + 1, 105, IslandSep)],
+ {Seed2, {lists:usort(islands2partitions(Islands)), lists:sort(Islands)}};
+make_network_partition_locations(oneway_partitions=_Method, Nodes, Seed1) ->
+ Pairs = make_all_pairs(Nodes),
+ Num = length(Pairs),
+ {Seed2, Weights} = lists:foldl(
+ fun(_, {Seeda, Acc}) ->
+ {Cutoff, Seedb} = random:uniform_s(100, Seeda),
+ {Seedb, [Cutoff|Acc]}
+ end, {Seed1, []}, lists:seq(1, Num)),
+ {Cutoff3, Seed3} = random:uniform_s(100, Seed2),
+ {Seed3, {[X || {Weight, X} <- lists:zip(Weights, Pairs),
+ Weight < Cutoff3], [islands_not_supported]}}.
+
+make_all_pairs(L) ->
+ lists:flatten(make_all_pairs2(lists:usort(L))).
+
+make_all_pairs2([]) ->
+ [];
+make_all_pairs2([_]) ->
+ [];
+make_all_pairs2([H1|T]) ->
+ [[{H1, X}, {X, H1}] || X <- T] ++ make_all_pairs(T).
+
+islands2partitions([]) ->
+ [];
+islands2partitions([Island|Rest]) ->
+ [{X,Y} || X <- Island,
+ Y <- lists:append(Rest), X /= Y]
+ ++
+ [{Y,X} || X <- Island,
+ Y <- lists:append(Rest), X /= Y]
+ ++
+ islands2partitions(Rest).
+
+partition2connection(Members0, Partition0) ->
+ p2c_invert(lists:usort(Members0), lists:usort(Partition0)).
+
+connection2partition(Members0, Partition0) ->
+ p2c_invert(lists:usort(Members0), lists:usort(Partition0)).
+
+p2c_invert(Members, Partition_list_Or_Connection_list) ->
+ All = [{X,Y} || X <- Members, Y <- Members, X /= Y],
+ All -- Partition_list_Or_Connection_list.
+
+partitions2num_islands(Members0, Partition0) ->
+ %% Ignore duplicates in either arg, if any.
+ Members = lists:usort(Members0),
+ Partition = lists:usort(Partition0),
+
+ Connections = partition2connection(Members, Partition),
+ Cs = [lists:member({X,Y}, Connections)
+ orelse
+ lists:member({Y,X}, Connections) || X <- Members, Y <- Members,
+ X /= Y],
+ case lists:usort(Cs) of
+ [true] -> 1;
+ [false] -> many;
+ [false, true] -> many % TODO too lazy to finish
+ end.
+
+partition_list_is_symmetric_p(Members0, Partition0) ->
+ %% %% Ignore duplicates in either arg, if any.
+ Members = lists:usort(Members0),
+ NumMembers = length(Members),
+ Partition = lists:usort(Partition0),
+
+ NewDict = lists:foldl(
+ fun({A,B}, Dict) ->
+ Key = if A > B -> {A,B};
+ true -> {B,A}
+ end,
+ orddict:update_counter(Key, 1, Dict)
+ end, orddict:new(), Partition),
+ AllOddP = orddict:fold(
+ fun(_Key, Count, true) when Count rem 2 == 0 ->
+ true;
+ (_, _, _) ->
+ false
+ end, true, NewDict),
+ if not AllOddP ->
+ false;
+ true ->
+ TwosCount = [Key || {Key, Count} <- orddict:to_list(NewDict),
+ Count == 2],
+ length(TwosCount) >= (NumMembers - 1)
+ end.
+
+-endif. % TEST
diff --git a/test/machi_projection_test.erl b/test/machi_projection_test.erl
index f30411a..3f2d59a 100644
--- a/test/machi_projection_test.erl
+++ b/test/machi_projection_test.erl
@@ -25,36 +25,45 @@
-include("machi_projection.hrl").
+new_fake(Name) ->
+ #p_srvr{name=Name}.
+
+%% Bleh, hey QuickCheck ... except that any model probably equals
+%% code under test, bleh.
+
new_test() ->
- %% Bleh, hey QuickCheck ... except that any model probably equals
- %% code under test, bleh.
- true = try_it(a, [a,b,c], [a,b], [], [c], []),
- true = try_it(<<"a">>, [<<"a">>,b,c], [<<"a">>,b], [], [c], []),
- Servers = [#p_srvr{name=a}, #p_srvr{name=b}, #p_srvr{name=c}],
- Servers_bad1 = [#p_srvr{name= <<"a">>}, #p_srvr{name=b}, #p_srvr{name=c}],
- Servers_bad2 = [#p_srvr{name=z}, #p_srvr{name=b}, #p_srvr{name=c}],
+ All0 = [new_fake(X) || X <- [a,b,c]],
+ All_binA = [new_fake(<<"a">>)] ++ [new_fake(X) || X <- [b,c]],
+
+ true = try_it(a, All0, [a,b], [], [c], []),
+ true = try_it(<<"a">>, All_binA, [<<"a">>,b], [], [c], []),
+ Servers = All0,
+ Servers_bad1 = [new_fake(X) || X <- [<<"a">>,b,c]],
+ Servers_bad2 = [new_fake(X) || X <- [z,b,c]],
true = try_it(a, Servers, [a,b], [], [c], []),
false = try_it(a, not_list, [a,b], [], [c], []),
- false = try_it(a, [a,b,c], not_list, [], [c], []),
- false = try_it(a, [a,b,c], [a,b], not_list, [c], []),
- false = try_it(a, [a,b,c], [a,b], [], not_list, []),
- false = try_it(a, [a,b,c], [a,b], [], [c], not_list),
+ false = try_it(a, All0, not_list, [], [c], []),
+ false = try_it(a, All0, [a,b], not_list, [c], []),
+ false = try_it(a, All0, [a,b], [], not_list, []),
+ false = try_it(a, All0, [a,b], [], [c], not_list),
- false = try_it(<<"x">>, [a,b,c], [a,b], [], [c], []),
- false = try_it(a, [a,b,c], [a,b,c], [], [c], []),
- false = try_it(a, [a,b,c], [a,b], [c], [c], []),
- false = try_it(a, [a,b,c], [a,b], [], [c,c], []),
+ false = try_it(<<"x">>, All0, [a,b], [], [c], []),
+ false = try_it(a, All0, [a,b,c], [], [c], []),
+ false = try_it(a, All0, [a,b], [c], [c], []),
+ false = try_it(a, All0, [a,b], [], [c,c], []),
false = try_it(a, Servers_bad1, [a,b], [], [c], []),
false = try_it(a, Servers_bad2, [a,b], [], [c], []),
ok.
compare_test() ->
- P0 = machi_projection:new(0, a, [a,b,c], [a,b], [], [c], []),
- P1a = machi_projection:new(1, a, [a,b,c], [a,b], [], [c], []),
- P1b = machi_projection:new(1, b, [a,b,c], [a,b], [], [c], []),
- P2 = machi_projection:new(2, a, [a,b,c], [a,b], [], [c], []),
+ All0 = [new_fake(X) || X <- [a,b,c]],
+
+ P0 = machi_projection:new(0, a, All0, [a,b], [], [c], []),
+ P1a = machi_projection:new(1, a, All0, [a,b], [], [c], []),
+ P1b = machi_projection:new(1, b, All0, [a,b], [], [c], []),
+ P2 = machi_projection:new(2, a, All0, [a,b], [], [c], []),
0 = machi_projection:compare(P0, P0),
-1 = machi_projection:compare(P0, P1a),
diff --git a/test/machi_proxy_flu1_client_test.erl b/test/machi_proxy_flu1_client_test.erl
new file mode 100644
index 0000000..a22456e
--- /dev/null
+++ b/test/machi_proxy_flu1_client_test.erl
@@ -0,0 +1,87 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(machi_proxy_flu1_client_test).
+-compile(export_all).
+
+-include("machi_projection.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+-define(MUT, machi_proxy_flu1_client).
+
+-ifdef(TEST).
+
+api_smoke_test() ->
+ RegName = api_smoke_flu,
+ Host = "localhost",
+ TcpPort = 57124,
+ DataDir = "./data.api_smoke_flu",
+ FLU1 = machi_flu1_test:setup_test_flu(RegName, TcpPort, DataDir),
+ erase(flu_pid),
+
+ try
+ I = #p_srvr{name=RegName, proto=ipv4, address=Host, port=TcpPort},
+ {ok, Prox1} = ?MUT:start_link(I),
+ try
+ FakeEpoch = {-1, <<0:(20*8)/big>>},
+ [{ok, {_,_,_}} = ?MUT:append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity) || _ <- lists:seq(1,5)],
+ %% Stop the FLU, what happens?
+ machi_flu1:stop(FLU1),
+ {error,_} = ?MUT:append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity),
+ {error,not_connected} = ?MUT:append_chunk(Prox1,
+ FakeEpoch, <<"prefix">>, <<"data">>,
+ infinity),
+ %% Start the FLU again, we should be able to do stuff immediately
+ FLU1b = machi_flu1_test:setup_test_flu(RegName, TcpPort, DataDir,
+ [save_data_dir]),
+ put(flu_pid, FLU1b),
+ MyChunk = <<"my chunk data">>,
+ {ok, {MyOff,MySize,MyFile}} =
+ ?MUT:append_chunk(Prox1, FakeEpoch, <<"prefix">>, MyChunk,
+ infinity),
+ {ok, MyChunk} = ?MUT:read_chunk(Prox1, FakeEpoch, MyFile, MyOff, MySize),
+
+ %% Alright, now for the rest of the API, whee
+ BadFile = <<"no-such-file">>,
+ {error, no_such_file} = ?MUT:checksum_list(Prox1, FakeEpoch, BadFile),
+ {ok, [_]} = ?MUT:list_files(Prox1, FakeEpoch),
+ {ok, FakeEpoch} = ?MUT:get_latest_epoch(Prox1, public),
+ {error, not_written} = ?MUT:read_latest_projection(Prox1, public),
+ {error, not_written} = ?MUT:read_projection(Prox1, public, 44),
+ P_a = #p_srvr{name=a, address="localhost", port=6622},
+ P1 = machi_projection:new(1, a, [P_a], [], [a], [], []),
+ ok = ?MUT:write_projection(Prox1, public, P1),
+ {ok, P1} = ?MUT:read_projection(Prox1, public, 1),
+ {ok, [P1]} = ?MUT:get_all_projections(Prox1, public),
+ {ok, [1]} = ?MUT:list_all_projections(Prox1, public),
+ ok
+ after
+ _ = (catch ?MUT:quit(Prox1))
+ end
+ after
+ (catch machi_flu1:stop(FLU1)),
+ (catch machi_flu1:stop(get(flu_pid)))
+ end.
+
+-endif. % TEST