Merge branch 'slf/manager-cleanup1'

2015-03-03 20:10:26 +09:00 · 2015-03-03 20:10:26 +09:00 · e0066660ef
commit e0066660ef
parent 8487d5759d 54266c4196
22 changed files with 316 additions and 25552 deletions
--- a/README.md
+++ b/README.md
@ -12,7 +12,7 @@ permits.

 ## Initial re-porting on 'prototype' directory

-* `chain-manager`: working on it now......
+* `chain-manager`: finished
 * `corfurl`: finished
 * `demo-day-hack`: not started
 * `tango`: finished
--- a/doc/chain-self-management-sketch.org
+++ b/doc/chain-self-management-sketch.org
@ -48,6 +48,8 @@ the simulator.

 See [[https://tools.ietf.org/html/rfc7282][On Consensus and Humming in the IETF]], RFC 7282.

+See also: [[http://www.snookles.com/slf-blog/2015/03/01/on-humming-consensus-an-allegory/][On “Humming Consensus”, an allegory]].
+
 ** Tunesmith?

 A mix of orchestral conducting, music composition, humming?
@ -365,7 +367,8 @@ document presents a detailed example.)

 * Sketch of the self-management algorithm
 ** Introduction
-See also, the diagram (((Diagram1.eps))), a flowchart of the
+Refer to the diagram `chain-self-management-sketch.Diagram1.pdf`, a
+flowchart of the 
 algorithm.  The code is structured as a state machine where function
 executing for the flowchart's state is named by the approximate
 location of the state within the flowchart.  The flowchart has three
--- a/prototype/README.md
+++ b/prototype/README.md
@ -23,7 +23,9 @@ We will also accept fixes for bugs in the test code.

 ## The chain-manager prototype

-TODO
+This is a very early experiment to try to create a distributed "rough
+consensus" algorithm that is sufficient & safe for managing the order
+of a Chain Replication chain, its members, and its chain order.

 ## The corfurl prototype

--- a/prototype/chain-manager/.gitignore
+++ b/prototype/chain-manager/.gitignore
@ -6,3 +6,4 @@ deps
 ebin/*.beam
 ebin/*.app
 erl_crash.dump
+RUNLOG*
--- a/prototype/chain-manager/Makefile
+++ b/prototype/chain-manager/Makefile
@ -23,7 +23,7 @@ eunit:

 pulse: compile
 	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
-	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
+	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE -v eunit

 CONC_ARGS = --pz ./.eunit --treat_as_normal shutdown --after_timeout 1000

@ -41,3 +41,15 @@ concuerror: deps compile
 	concuerror -m machi_flu0_test -t proj_store_test $(CONC_ARGS)
 	concuerror -m machi_flu0_test -t wedge_test $(CONC_ARGS)
 	concuerror -m machi_flu0_test -t proj0_test $(CONC_ARGS)
+
+APPS = kernel stdlib sasl erts ssl compiler eunit
+PLT = $(HOME)/.chmgr_dialyzer_plt
+
+build_plt: deps compile
+	dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin
+
+dialyzer: deps compile
+	dialyzer -Wno_return --plt $(PLT) ebin
+
+clean_plt:
+	rm $(PLT)
--- a/prototype/chain-manager/README.md
+++ b/prototype/chain-manager/README.md
@ -0,0 +1,166 @@
+# The chain manager prototype
+
+This is a very early experiment to try to create a distributed "rough
+consensus" algorithm that is sufficient & safe for managing the order
+of a Chain Replication chain, its members, and its chain order.  A
+name hasn't been chosen yet, though the following are contenders:
+
+* chain self-management
+* rough consensus
+* humming consensus
+* foggy consensus
+
+## Code status: active!
+
+Unlike the other code projects in this repository's `prototype`
+directory, the chain management code is still under active
+development.  It is quite likely (as of early March 2015) that this
+code will be robust enough to move to the "real" Machi code base soon.
+
+The most up-to-date documentation for this prototype will **not** be
+found in this subdirectory.  Rather, please see the `doc` directory at
+the top of the Machi source repository.
+ 
+## Testing, testing, testing
+
+It's important to implement any Chain Replication chain manager as
+close to 100% bug-free as possible.  Any bug can introduce the
+possibility of data loss, which is something we must avoid.
+Therefore, we will spend a large amount of effort to use as many
+robust testing tools and methods as feasible to test this code.
+
+* [Concuerror](http://concuerror.com), a DPOR-based full state space
+  exploration tool.  Some preliminary Concuerror tests can be found in the
+  `test/machi_flu0_test.erl` module.
+* [QuickCheck](http://www.quviq.com/products/erlang-quickcheck/), a
+  property-based testing tool for Erlang.  QuickCheck doesn't provide
+  the reassurance of 100% state exploration, but it proven quite
+  effective at Basho for finding numerous subtle bugs.
+* Automatic simulation of arbitrary network partition failures.  This
+  code is already in progress and is used, for example, by the
+  `test/machi_chain_manager1_test.erl` module.
+* TLA+ (future work), to try to create a rigorous model of the
+  algorithm and its behavior
+
+If you'd like to work on additional testing of this component, please
+[open a new GitHub Issue ticket](https://github.com/basho/machi) with
+any questions you have.  Or just open a GitHub pull request.  <tt>^_^</tt>
+
+## Compilation & unit testing
+
+Use `make` and `make test`.  Note that the Makefile assumes that the
+`rebar` utility is available somewhere in your path.
+
+Tested using Erlang/OTP R16B and Erlang/OTP 17, both on OS X.
+
+If you wish to run the PULSE test in
+`test/machi_chain_manager1_pulse.erl` module, you must use Erlang
+R16B and Quviq QuickCheck 1.30.2 -- there is a known problem with
+QuickCheck 1.33.2, sorry!  Also, please note that a single iteration
+of a PULSE test case in this model can run for 10s of seconds!
+
+Otherwise, it ought to "just work" on other versions of Erlang and on other OS
+platforms, but sorry, I haven't tested it.
+
+### Testing with simulated network partitions
+
+See the `doc/chain-self-management-sketch.org` file for details of how
+the simulator works.
+
+In summary, the simulator tries to emulate the effect of arbitrary
+asymmetric network partitions.  For example, for two simulated nodes A
+and B, it's possible to have node A send messages to B, but B cannot
+send messages to A.
+
+This kind of one-way message passing is nearly impossible do with
+distributed Erlang, because disterl uses TCP.  If a network partition
+happens at ISO Layer 2 (for example, due to a bad Ethernet cable that
+has a faulty receive wire), the entire TCP connection will hang rather
+than deliver disterl messages in only one direction.
+
+### Testing simulated data "repair"
+
+In the Machi documentation, "repair" is a re-syncronization of data
+between the UPI members of the chain (see below) and members which
+have been down/partitioned/gone-to-Hawaii-for-vacation for some period
+of time and may have state which is out-of-sync with the rest of the
+active-and-running-and-fully-in-sync chain members.
+
+A rough-and-inaccurate-but-useful summary of state transitions are:
+
+    down -> repair eligible -> repairing started -> repairing finished -> upi
+    
+        * Any state can transition back to 'down'
+        * Repair interruptions might trigger a transition to
+          'repair eligible instead of 'down'.
+        * UPI = Update Propagation Invariant (per the original
+                Chain Replication paper) preserving members.
+                
+                I.e., The state stored by any UPI member is fully
+                in sync with all other UPI chain members, except
+                for new updates which are being processed by Chain
+                Replication at a particular instant in time.
+
+In both the PULSE and `convergence_demo*()` tests, there is a
+simulated time when a FLU's repair state goes from "repair started" to
+"repair finished", which means that the FLU-under-repair is now
+eligible to join the UPI portion of the chain as a fully-sync'ed
+member of the chain.  The simulation is based on a simple "coin
+flip"-style random choice.
+
+The simulator framework is simulating repair failures when a network
+partition is detected with the repair destination FLU.  In the "real
+world", other kinds of failure could also interrupt the repair
+process.
+
+### The PULSE test in machi_chain_manager1_test.erl
+
+As mentioned above, this test is quite slow: it can take many dozens
+of seconds to execute a single test case.  However, the test really is using
+PULSE to play strange games with Erlang process scheduling.
+
+Unfortnately, the PULSE framework is very slow for this test.  We'd
+like something better, so I wrote the
+`machi_chain_manager1_test:convergence_demo_test()` test to use most
+of the network partition simulator to try to run many more partition
+scenarios in the same amount of time.
+
+### machi_chain_manager1_test:convergence_demo1()
+
+This function is intended both as a demo and as a possible
+fully-automated sanity checking function (it will throw an exception
+when a model failure happens).  It's purpose is to "go faster" than
+the PULSE test describe above.  It meets this purpose handily.
+However, it doesn't give quite as much confidence as PULSE does that
+Erlang process scheduling cannot somehow break algorithm running
+inside the simulator.
+
+To execute:
+
+    make test
+    erl -pz ./.eunit deps/*/ebin
+    ok = machi_chain_manager1_test:convergence_demo1().
+
+In summary:
+
+* Set up four FLUs, `[a,b,c,d]`, to be used for the test
+* Set up a set of random asymmetric network partitions, based on a
+  'seed' for a pseudo-random number generator.  Each call to the
+  partition simulator may yield a different partition scenario ... so
+  the simulated environment is very unstable.
+* Run the algorithm for a while so that it has witnessed the partition
+  instability for a long time.
+* Set the partitions definition to a fixed `[{a,b}]`, meaning that FLU `a`
+  cannot send messages to FLU `b`, but all other communication
+  (including messages from `b -> a`) works correctly.
+* Run the algorithm, wait for everyone to settle on rough consensus.
+* Set the partition definition to wildly random again.
+* Run the algorithm for a while so that it has witnessed the partition
+  instability for a long time.
+* Set the partitions definition to a fixed `[{a,c}]`.
+* Run the algorithm, wait for everyone to settle on rough consensus.
+* Set the partitions definition to a fixed `[]`, i.e., there are no
+  network partitions at all.
+* Run the algorithm, wait for everyone to settle on a **unanimous value**
+  of some ordering of all four FLUs.
+
--- a/prototype/chain-manager/docs/README.md
+++ b/prototype/chain-manager/docs/README.md
@ -0,0 +1,2 @@
+
+Please see the `doc` directory at the top of the Machi repo.
--- a/prototype/chain-manager/docs/corfurl.md
+++ b/prototype/chain-manager/docs/corfurl.md
@ -1,191 +0,0 @@
-## CORFU papers
-
-I recommend the "5 pages" paper below first, to give a flavor of
-what the CORFU is about.  When Scott first read the CORFU paper
-back in 2011 (and the Hyder paper), he thought it was insanity.
-He recommends waiting before judging quite so hastily.  :-)
-
-After that, then perhaps take a step back are skim over the
-Hyder paper.  Hyder started before CORFU, but since CORFU, the
-Hyder folks at Microsoft have rewritten Hyder to use CORFU as
-the shared log underneath it.  But the Hyder paper has lots of
-interesting bits about how you'd go about creating a distributed
-DB where the transaction log *is* the DB.
-
-### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log"
-
-MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN
-PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI,
-University of California, San Diego, TED WOBBER, Microsoft Research
-Silicon Valley
-
-Long version of introduction to CORFU (~30 pages)
-http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf
-
-### "CORFU: A Shared Log Design for Flash Clusters"
-
-Same authors as above
-
-Short version of introduction to CORFU paper above (~12 pages)
-
-http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf
-
-### "From Paxos to CORFU: A Flash-Speed Shared Log"
-
-Same authors as above
-
-5 pages, a short summary of CORFU basics and some trial applications
-that have been implemented on top of it.
-
-http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf
-
-### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware"
-
-Wei, Davis, Wobber, Balakrishnan, Malkhi
-
-Summary report of implmementing the CORFU server-side in
-FPGA-style hardware. (~11 pages)
-
-http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf
-
-### "Tango: Distributed Data Structures over a Shared Log"
-
-Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck
-
-Describes a framework for developing data structures that reside
-persistently within a CORFU log: the log *is* the database/data
-structure store.
-
-http://www.snookles.com/scottmp/corfu/Tango.pdf
-
-### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service"
-
-Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber
-
-The ZooKeeper inter-server communication is replaced with CORFU.
-Faster, fewer lines of code than ZK, and more features than the
-original ZK code base.
-
-http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf
-
-### "Hyder – A Transactional Record Manager for Shared Flash"
-
-Bernstein, Reid, Das
-
-Describes a distributed log-based DB system where the txn log is
-treated quite oddly: a "txn intent" record is written to a
-shared common log All participants read the shared log in
-parallel and make commit/abort decisions in parallel, based on
-what conflicts (or not) that they see in the log.  Scott's first
-reading was "No way, wacky" ... and has since changed his mind.
-
-http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf
-pages 9-20
-
-
-
-## Fiddling with PULSE
-
-Do the following:
-
-    make clean
-    make
-    make pulse
-
-... then watch the dots go across the screen for 60 seconds.  If you
-wish, you can press `Control-c` to interrupt the test.  We're really
-interested in the build artifacts.
-
-    erl -pz .eunit deps/*/ebin
-    eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
-
-This will run the PULSE test for 5 seconds.  Feel free to adjust for
-as many seconds as you wish.
-
-    Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
-    
-    Eshell V5.10.3  (abort with ^G)
-    1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
-    Starting Quviq QuickCheck version 1.30.4
-       (compiled at {{2014,2,7},{9,19,50}})
-    Licence for Basho reserved until {{2014,2,17},{1,41,39}}
-    ......................................................................................
-    OK, passed 86 tests
-    schedule:    Count: 86   Min: 2   Max: 1974   Avg: 3.2e+2   Total: 27260
-    true
-    2> 
-
-REPL interactive work can be done via:
-
-1. Edit code, e.g. `corfurl_pulse.erl`.
-2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP`
-to compile.
-3. Reload any recompiled modules, e.g. `l(corfurl_pulse).`
-4. Resume QuickCheck activities.
-
-## Seeing an PULSE scheduler interleaving failure in action
-
-1. Edit `corfurl_pulse:check_trace()` to uncomment the
-   use of `conjunction()` that mentions `bogus_order_check_do_not_use_me`
-   and comment out the real `conjunction()` call below it.
-2. Recompile & reload.
-3. Check.
-
-For example:
-
-    9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
-    .........Failed! After 9 tests.
-
-Sweet!  The first tuple below are the first `?FORALL()` values,
-and the 2nd is the list of commands,
-`{SequentialCommands, ListofParallelCommandLists}`.  The 3rd is the
-seed used to perturb the PULSE scheduler.
-
-In this case, `SequentialCommands` has two calls (to `setup()` then
-`append()`) and there are two parallel procs: one makes 1 call
-call to `append()` and the other makes 2 calls to `append()`.
-
-    {2,2,9}
-    {{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}],
-      [[{set,{var,3},
-             {call,corfurl_pulse,append,
-                   [{var,1},<<231,149,226,203,10,105,54,223,147>>]}}],
-       [{set,{var,2},
-             {call,corfurl_pulse,append,
-                   [{var,1},<<7,206,146,75,249,13,154,238,110>>]}},
-        {set,{var,4},
-             {call,corfurl_pulse,append,
-                   [{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]},
-     {27492,46961,4884}}
-
-Here are our results:
-
-    simple_result: passed
-    errors: passed
-    events: failed
-    identity: passed
-    bogus_order_check_do_not_use_me: failed
-    [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
-
-Our (bogus!) order expectation was violated.  Shrinking!
-
-    simple_result: passed
-    errors: passed
-    events: failed
-    identity: passed
-    bogus_order_check_do_not_use_me: failed
-    [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
-
-Shrinking was able to remove two `append()` calls and to shrink the
-size of the pages down from 9 bytes down to 1 byte.
-
-    Shrinking........(8 times)
-    {1,1,1}
-    {{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}],
-      [[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],
-       [{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]},
-     {27492,46961,4884}}
-    events: failed
-    bogus_order_check_do_not_use_me: failed
-    [{ok,2},{ok,1}] /= [{ok,1},{ok,2}]
-    false
--- a/prototype/chain-manager/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen
@ -1,35 +0,0 @@
-msc {
-    client1, FLU1, FLU2, client2, client3;
-
-    client1 box client3  [label="Epoch #1: chain = FLU1 -> FLU2"];
-    client1 -> FLU1      [label="{write,epoch1,<<Page YYY>>}"];
-    client1 <- FLU1      [label="ok"];
-    client1 box client1  [label="Client crash", textcolour="red"];
-
-    FLU1 box FLU1        [label="FLU crash", textcolour="red"];
-
-    client1 box client3  [label="Epoch #2: chain = FLU2"];
-
-    client2 -> FLU2      [label="{write,epoch2,<<Page ZZZ>>}"];
-    client2 <- FLU2      [label="ok"];
-
-    client3 box client3  [label="Read repair starts", textbgcolour="aqua"];
-
-    client3 -> FLU2      [label="{read,epoch2}"];
-    client3 <- FLU2      [label="{ok,<<Page ZZZ>>}"];
-    client3 -> FLU1      [label="{write,epoch2,<<Page ZZZ>>}"];
-    FLU1 box FLU1        [label="What do we do here?  Our current value is <<Page YYY>>.", textcolour="red"] ;
-    FLU1 box FLU1        [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ;
-    FLU1 box FLU1        [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ;
-    FLU1 -> client3      [label="I'm sorry, Dave, I cannot do that."];
-
-    FLU1 box FLU1        [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ;
-
-    client3 -> FLU1      [label="{write,epoch2,<<Page ZZZ>>,repair,witnesses=[FLU2]}",  textbgcolour="silver"];
-    FLU1 box FLU1        [label="Start an async process to ask the witness list to corroborate this repair."];
-    FLU1 -> FLU2         [label="{read,epoch2}", textbgcolour="aqua"];
-    FLU1 <- FLU2         [label="{ok,<<Page ZZ>>}", textbgcolour="aqua"];
-    FLU1 box FLU1        [label="Overwrite local storage with repair page.",  textbgcolour="silver"];
-    client3 <- FLU1      [label="Async proc replies: ok",  textbgcolour="silver"];
-
-}
--- a/prototype/chain-manager/docs/corfurl/notes/README.md
+++ b/prototype/chain-manager/docs/corfurl/notes/README.md
@ -1,92 +0,0 @@
-
-## read-repair-race.1.
-
-First attempt at using "mscgen" to make some Message Sequence
-Chart (MSC) for a race found at commit 087c2605ab.
-
-
-## read-repair-race.2.
-
-Second attempt.  This is almost exactly the trace that is
-generated by this failing test case at commit 087c2605ab:
-
-    C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]].
-    eqc:check(corfurl_pulse:prop_pulse(), C2).
-
-## read-repair-race.2b.*
-
-Same basic condition as read-repair-race.2, but edited
-substantially to make it clearer what is happening.
-Also for commit 087c2605ab.
-
-I believe that I have a fix for the silver-colored
-`error-overwritten` ... and it was indeed added to the code soon
-afterward, but it turns out that it doesn't solve the entire problem
-of "two clients try to write the exact same data at the same time to
-the same LPN".
-
-
-## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN"
-
-This situation is something that CORFU cannot protect against, IMO.
-
-I have been struggling for a while, to try to find a way for CORFU
-clients to know *always* when there is a conflict with another
-writer.  It usually works: the basic nature of write-once registers is
-very powerful.  However, in the case where two clients are trying to
-write the same page data to the same LPN, it looks impossible to
-resolve.
-
-How do you tell the difference between:
-
-1. A race between a client A writing page P at address LPN and
-   read-repair fixing P.  P *is* A's data and no other's, so this race
-   doesn't confuse anyone.
-
-1. A race between a client A writing page P at address LPN and client
-   B writing the exact same page data P at the same LPN.
-   A's page P = B's page P, but clients A & B don't know that.
-
-   If CORFU tells both A & B that they were successful, A & B assume
-   that the CORFU log has two new pages appended to it, but in truth
-   only one new page was appended.
-
-If we try to solve this by always avoiding the same LPN address
-conflict, we are deluding ourselves.  If we assume that the sequencer
-is 100% correct in that it never assigns the same LPN twice, and if we
-assume that a client must never write a block without an assignment
-from the sequencer, then the problem is solved.  But the problem has a
-_heavy_ price: the log is only available when the sequencer is
-available, and only when never more than one sequencer running at a
-time.
-
-The CORFU base system promises correct operation, even if:
-
-* Zero sequencers are running, and clients might choose the same LPN
-  to write to.
-* Two more more sequencers are running, and different sequencers
-  assign the same LPN to two different clients.
-
-But CORFU's "correct" behavior does not include detecting the same
-page at the same LPN.  The papers don't specifically say it, alas.
-But IMO it's impossible to guarantee, so all docs ought to explicitly
-say that it's impossible and that clients must not assume it.
-
-See also
-* two-clients-race.1.png
-
-## A scenario of chain repair & write-once registers
-
-See:
-* 2014-02-27.chain-repair-write-twice.png
-
-... for a scenario where write-once registers that are truly only
-write-once-ever-for-the-rest-of-the-future are "inconvenient" when it
-comes to chain repair.  Client 3 is attempting to do chain repair ops,
-bringing FLU1 back into sync with FLU2.
-
-The diagram proposes one possible idea for making overwriting a
-read-once register a bit safer: ask another node in the chain to
-verify that the page you've been asked to repair is exactly the same
-as that other FLU's page.
-
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.1.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.1.mscgen
@ -1,49 +0,0 @@
-msc {
-  "<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"];
-
-    "<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
-    "<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ;
-    "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"];
-    "<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
-    "<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ;
-    "<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ;
-    "<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ;
-    "<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
-    "<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ;
-    "<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ;
-    "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"];
-    "<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
-    "<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ;
-    "<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ;
-    "<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ;
-    "<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ;
-    "<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ;
-    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
-    "<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
-    "<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ;
-    "<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ;
-    "<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ;
-    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ;
-    "<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
-    "<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
-    "<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ;
-    "<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
-    "<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ;
-    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ;
-    "<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ;
-}
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2.mscgen
@ -1,60 +0,0 @@
-msc {
-  "<0.32555.4>" [label="Client1"],     "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
-
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
-    "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
-    "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ;
-
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
-#    "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
-#    "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
-    "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed.  Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
-    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ;
-    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
-    "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
-    "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
-}
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2b.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2b.mscgen
@ -1,57 +0,0 @@
-msc {
-  "<0.32555.4>" [label="Client1"],     "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
-
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
-    "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
-    "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
-#    "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
-#    "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
-    "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
-    "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ;
-
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed.  Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
-    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
-    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
-    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
-    "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
-    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
-    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
-    "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
-    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
-    "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
-}
--- a/prototype/chain-manager/docs/corfurl/notes/two-clients-race.1.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/two-clients-race.1.mscgen
@ -1,33 +0,0 @@
-msc {
-    client1, FLU1, FLU2, client2, client3;
-
-    client1 -> FLU1      [label="{write,epoch1,<<Not unique page>>}"];
-    client1 <- FLU1      [label="ok"];
-
-    client3 -> FLU2      [label="{seal,epoch1}"];
-    client3 <- FLU2      [label="{ok,...}"];
-    client3 -> FLU1      [label="{seal,epoch1}"];
-    client3 <- FLU1      [label="{ok,...}"];
-
-    client2 -> FLU1      [label="{write,epoch1,<<Not unique page>>}"];
-    client2 <- FLU1      [label="error_epoch"];
-    client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"];
-    client2 -> FLU1      [label="{write,epoch2,<<Not unique page>>}"];
-    client2 <- FLU1      [label="error_overwritten"];
-
-    client1 -> FLU2      [label="{write,epoch1,<<Not unique page>>}"];
-    client1 <- FLU2      [label="error_epoch"];
-    client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"];
-
-    client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ;
-    client3 -> FLU1      [label="{read,epoch2}"];
-    client3 <- FLU1      [label="{ok,<<Not unique page>>}"];
-    client3 -> FLU2      [label="{write,epoch2,<<Not unique page>>}"];
-    client3 <- FLU2      [label="ok"];
-    client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ;
-    client3 abox client3 [ label = "We saw <<Not unique page>>", textbgcolour="silver"] ;
-
-    client1 -> FLU2      [label="{write,epoch2,<<Not unique page>>}"];
-    client1 <- FLU2      [label="error_overwritten"];
-
-}
--- a/prototype/chain-manager/docs/machi/Diagram1.eps
+++ b/prototype/chain-manager/docs/machi/Diagram1.eps
--- a/prototype/chain-manager/docs/machi/chain-mgmt-flowchart.dot
+++ b/prototype/chain-manager/docs/machi/chain-mgmt-flowchart.dot
@ -1,109 +0,0 @@
-digraph {
-    compound=true
-    label="Machi chain management flowchart (sample)";
- 
-    node[shape="box", style="rounded"] 
-        start;
-    node[shape="box", style="rounded", label="stop1"] 
-        stop1;
-    node[shape="box", style="rounded", label="stop2"] 
-        stop2;
-    node[shape="box", style="rounded"] 
-        crash;
-
- subgraph clustera {
-    node[shape="parallelogram", style="", label="Set retry counter = 0"] 
-        a05_retry;
-    node[shape="parallelogram", style="", label="Create P_newprop @ epoch E+1\nbased on P_current @ epoch E"] 
-        a10_create;
-    node[shape="parallelogram", style="", label="Get latest public projection, P_latest"] 
-        a20_get;
-    node[shape="diamond", style="", label="Epoch(P_latest) > Epoch(P_current)\norelse\nP_latest was not unanimous"]
-        a30_epoch;
-    node[shape="diamond", style="", label="Epoch(P_latest) == Epoch(P_current)"]
-        a40_epochequal;
-    node[shape="diamond", style="", label="P_latest == P_current"]
-        a50_equal;
- }
-
- subgraph clustera100 {
-    node[shape="diamond", style="", label="Write P_newprop to everyone"]
-        a100_write;
- }
-
- subgraph clusterb {
-    node[shape="diamond", style="", label="P_latest was unanimous?"]
-        b10_unanimous;
-    node[shape="diamond", style="", label="Retry counter too big?"]
-        b20_counter;
-    node[shape="diamond", style="", label="Rank(P_latest) >= Rank(P_newprop)"]
-        b30_rank;
-    node[shape="diamond", style="", label="P_latest.upi == P_newprop.upi\nand also\nPlatest.repairing == P_newprop.repairing"]
-        b40_condc;
-    node[shape="square", style="", label="P_latest author is\ntoo slow, let's try!"]
-        b45_lets;
-    node[shape="parallelogram", style="", label="P_newprop is better than P_latest.\nSet P_newprop.epoch = P_latest.epoch + 1."]
-        b50_better;
- }
-
- subgraph clusterc {
-    node[shape="diamond", style="", label="Is Move(P_current, P_latest) ok?"]
-        c10_move;
-    node[shape="parallelogram", style="", label="Tell Author(P_latest) to rewrite\nwith a bigger epoch number"]
-        c20_tell;
-}
-
- subgraph clusterd {
-    node[shape="diamond", style="", label="Use P_latest as the\nnew P_current"]
-        d10_use;
-}        
-
-    start -> a05_retry;
-
-    a05_retry -> a10_create;
-    a10_create -> a20_get;
-    a20_get -> a30_epoch;
-    a30_epoch -> a40_epochequal[label="false"];
-    a30_epoch -> b10_unanimous[label="true"];
-    a40_epochequal -> a50_equal[label="true"];
-    a40_epochequal -> crash[label="falseXX"];
-    a50_equal -> stop1[label="true"];
-    a50_equal -> b20_counter[label="false"];
-
-    a100_write -> a10_create;
-
-    b10_unanimous -> c10_move[label="yes"];
-    b10_unanimous -> b20_counter[label="no"];
-    b20_counter -> b45_lets[label="true"];
-    b20_counter -> b30_rank[label="false"];
-    b30_rank -> b40_condc[label="false"];
-    b30_rank -> c20_tell[label="true"];
-    b40_condc -> b50_better[label="false"];
-    b40_condc -> c20_tell[label="true"];
-    b45_lets -> b50_better;
-    b50_better -> a100_write;
-
-    c10_move -> d10_use[label="yes"];
-    c10_move -> a100_write[label="no"];
-    c20_tell -> b50_better;
-
-    d10_use -> stop2;
-
-    {rank=same; clustera clusterb clusterc clusterd};
-
-//    {rank=same; a10_create b10_unanimous c10_move d10_use stop2};
-//    {rank=same; a20_get b20_counter c20_tell};
-//    {rank=same; a30_epoch b40_condc};
-//    {rank=same; a40_epochequal b40_condc crash};
-//    {rank=same; stop1 a50_equal b50_better};
-
-//       if_valid;
-// 
-//    start -> input;
-//    input -> if_valid;
-//    if_valid -> message[label="no"];
-//    if_valid -> end[label="yes"];
-//    message -> input;     
- 
-//    {rank=same; message input}
-}
--- a/prototype/chain-manager/rebar.config
+++ b/prototype/chain-manager/rebar.config
@ -0,0 +1,6 @@
+%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
+{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
+{deps, [
+        {lager, "2.0.1", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}}
+        ]}.
+
--- a/prototype/chain-manager/src/foo.app.src
+++ b/prototype/chain-manager/src/foo.app.src
@ -0,0 +1,9 @@
+{application, foo, [
+    {description, "Prototype of Machi chain manager."},
+    {vsn, "0.0.0"},
+    {applications, [kernel, stdlib, lager]},
+    {mod,{foo_unfinished_app,[]}},
+    {registered, []},
+    {env, [
+    ]}
+]}.
--- a/prototype/chain-manager/test/machi_chain_manager0_test.erl
+++ b/prototype/chain-manager/test/machi_chain_manager0_test.erl
@ -29,6 +29,7 @@
 -export([]).

 -ifdef(TEST).
+-ifndef(PULSE).

 -ifdef(EQC).
 -include_lib("eqc/include/eqc.hrl").
@ -458,4 +459,5 @@ combinations(L) ->
 perms([]) -> [[]];
 perms(L)  -> [[H|T] || H <- L, T <- perms(L--[H])].

+-endif. % ! PULSE
 -endif.
--- a/prototype/chain-manager/test/machi_chain_manager1_test.erl
+++ b/prototype/chain-manager/test/machi_chain_manager1_test.erl
@ -28,7 +28,7 @@
 -define(D(X), io:format(user, "~s ~p\n", [??X, X])).
 -define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).

-export([]).
+-export([unanimous_report/1, unanimous_report/2]).

 -ifdef(TEST).

@ -42,6 +42,8 @@
 -include_lib("eunit/include/eunit.hrl").
 -compile(export_all).

+-ifndef(PULSE).
+
 smoke0_test() ->
    machi_partition_simulator:start_link({1,2,3}, 50, 50),
    {ok, FLUa} = machi_flu0:start_link(a),
@ -52,10 +54,12 @@ smoke0_test() ->
        %% If/when calculate_projection_internal_old() disappears, then
        %% get rid of the comprehension below ... start/ping/stop is
        %% good enough for smoke0.
+        io:format(user, "\n\nBegin 5 lines of verbose stuff, check manually for differences\n", []),
        [begin
             Proj = ?MGR:calculate_projection_internal_old(M0),
             io:format(user, "~w\n", [?MGR:make_projection_summary(Proj)])
-         end || _ <- lists:seq(1,5)]
+         end || _ <- lists:seq(1,5)],
+        io:format(user, "\n", [])
    after
        ok = ?MGR:stop(M0),
        ok = machi_flu0:stop(FLUa),
@ -105,9 +109,9 @@ nonunanimous_setup_and_fix_test() ->
        ok = machi_flu0:proj_write(FLUa, P1Epoch, public, P1a),
        ok = machi_flu0:proj_write(FLUb, P1Epoch, public, P1b),

-        ?D(x),
+        %% ?D(x),
        {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
-        ?Dw(_XX),
+        %% ?Dw(_XX),
        {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
        %% The read repair here doesn't automatically trigger the creation of
        %% a new projection (to try to create a unanimous projection).  So
@ -138,101 +142,18 @@ nonunanimous_setup_and_fix_test() ->
        ok = machi_partition_simulator:stop()
    end.

-unanimous_report(Namez) ->
-    UniquePrivateEs =
-        lists:usort(lists:flatten(
-                      [machi_flu0:proj_list_all(FLU, private) ||
-                          {_FLUName, FLU} <- Namez])),
-    [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
+%% This test takes a long time and spits out a huge amount of logging
+%% cruft to the console.  Comment out the EUnit fixture and run manually.

-unanimous_report(Epoch, Namez) ->
-    Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
-                           {ok, T} -> T;
-                           _Else   -> not_in_this_epoch
-                       end} || {FLUName, FLU} <- Namez],
-    UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
-                   Proj#projection.epoch_csum} ||
-                     {_FLUname, Proj} <- Projs,
-                     is_record(Proj, projection)],
-    UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
-    Res =
-        [begin
-             case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
-                                       U == UPI]) of
-                 [_1CSum] ->
-                     %% Yay, there's only 1 checksum.  Let's check
-                     %% that all FLUs are in agreement.
-                     {UPI, Repairing, _CSum} =
-                         lists:keyfind(UPI, 1, UPI_R_Sums),
-                     %% TODO: make certain that this subtlety doesn't get
-                     %%       last in later implementations.
+%% convergence_demo_test_() ->
+%%     {timeout, 300, fun() -> convergence_demo1() end}.

-                     %% So, this is a bit of a tricky thing.  If we're at
-                     %% upi=[c] and repairing=[a,b], then the transition
-                     %% (eventually!) to upi=[c,a] does not currently depend
-                     %% on b being an active participant in the repair.
-                     %%
-                     %% Yes, b's state is very important for making certain
-                     %% that all repair operations succeed both to a & b.
-                     %% However, in this simulation, we only consider that
-                     %% the head(Repairing) is sane.  Therefore, we use only
-                     %% the "HeadOfRepairing" in our considerations here.
-                     HeadOfRepairing = case Repairing of
-                                          [H_Rep|_] ->
-                                              [H_Rep];
-                                          _ ->
-                                              []
-                                      end,
-                     Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
-                                      P when is_record(P, projection) ->
-                                          P#projection.epoch_csum;
-                                      Else ->
-                                          Else
-                                  end} || FLU <- UPI ++ HeadOfRepairing],
-                     case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
-                         [_] ->
-                             {agreed_membership, {UPI, Repairing}};
-                         Else2 ->
-                             {not_agreed, {UPI, Repairing}, Else2}
-                     end;
-                 _Else ->
-                     {UPI, not_unique, Epoch, _Else}
-             end
-         end || UPI <- UniqueUPIs],
-    AgreedResUPI_Rs = [UPI++Repairing ||
-                          {agreed_membership, {UPI, Repairing}} <- Res],
-    Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
-               lists:sort(lists:flatten(AgreedResUPI_Rs)) of
-              true ->
-                  ok_disjoint;
-              false ->
-                  bummer_NOT_DISJOINT
-          end,
-    {Epoch, {Tag, Res}}.
-
-all_reports_are_disjoint(Report) ->
-    [] == [X || {_Epoch, Tuple}=X <- Report,
-                element(1, Tuple) /= ok_disjoint].
-
-extract_chains_relative_to_flu(FLU, Report) ->
-    {FLU, [{Epoch, UPI, Repairing} ||
-              {Epoch, {ok_disjoint, Es}} <- Report,
-              {agreed_membership, {UPI, Repairing}} <- Es,
-              lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
-
-chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
-    ?MGR:make_projection(Epoch, MyName, All_list,
-                         All_list -- (UPI_list ++ Repairing_list),
-                         UPI_list, Repairing_list, []).
-
-ifndef(PULSE).
-
-convergence_demo_test_() ->
-    {timeout, 300, fun() -> convergence_demo_test(x) end}.
-
-convergence_demo_test(_) ->
+convergence_demo1() ->
    All_list = [a,b,c,d],
-    machi_partition_simulator:start_link({111,222,33}, 0, 100),
+    %% machi_partition_simulator:start_link({111,222,33}, 0, 100),
+    Seed = erlang:now(),
+    machi_partition_simulator:start_link(Seed, 0, 100),
+    io:format(user, "convergence_demo seed = ~p\n", [Seed]),
    _ = machi_partition_simulator:get(All_list),

    {ok, FLUa} = machi_flu0:start_link(a),
@ -357,4 +278,92 @@ convergence_demo_test(_) ->
    end.

 -endif. % not PULSE
+
+unanimous_report(Namez) ->
+    UniquePrivateEs =
+        lists:usort(lists:flatten(
+                      [machi_flu0:proj_list_all(FLU, private) ||
+                          {_FLUName, FLU} <- Namez])),
+    [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
+
+unanimous_report(Epoch, Namez) ->
+    Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
+                           {ok, T} -> T;
+                           _Else   -> not_in_this_epoch
+                       end} || {FLUName, FLU} <- Namez],
+    UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
+                   Proj#projection.epoch_csum} ||
+                     {_FLUname, Proj} <- Projs,
+                     is_record(Proj, projection)],
+    UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
+    Res =
+        [begin
+             case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
+                                       U == UPI]) of
+                 [_1CSum] ->
+                     %% Yay, there's only 1 checksum.  Let's check
+                     %% that all FLUs are in agreement.
+                     {UPI, Repairing, _CSum} =
+                         lists:keyfind(UPI, 1, UPI_R_Sums),
+                     %% TODO: make certain that this subtlety doesn't get
+                     %%       last in later implementations.
+
+                     %% So, this is a bit of a tricky thing.  If we're at
+                     %% upi=[c] and repairing=[a,b], then the transition
+                     %% (eventually!) to upi=[c,a] does not currently depend
+                     %% on b being an active participant in the repair.
+                     %%
+                     %% Yes, b's state is very important for making certain
+                     %% that all repair operations succeed both to a & b.
+                     %% However, in this simulation, we only consider that
+                     %% the head(Repairing) is sane.  Therefore, we use only
+                     %% the "HeadOfRepairing" in our considerations here.
+                     HeadOfRepairing = case Repairing of
+                                          [H_Rep|_] ->
+                                              [H_Rep];
+                                          _ ->
+                                              []
+                                      end,
+                     Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
+                                      P when is_record(P, projection) ->
+                                          P#projection.epoch_csum;
+                                      Else ->
+                                          Else
+                                  end} || FLU <- UPI ++ HeadOfRepairing],
+                     case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
+                         [_] ->
+                             {agreed_membership, {UPI, Repairing}};
+                         Else2 ->
+                             {not_agreed, {UPI, Repairing}, Else2}
+                     end;
+                 _Else ->
+                     {UPI, not_unique, Epoch, _Else}
+             end
+         end || UPI <- UniqueUPIs],
+    AgreedResUPI_Rs = [UPI++Repairing ||
+                          {agreed_membership, {UPI, Repairing}} <- Res],
+    Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
+               lists:sort(lists:flatten(AgreedResUPI_Rs)) of
+              true ->
+                  ok_disjoint;
+              false ->
+                  bummer_NOT_DISJOINT
+          end,
+    {Epoch, {Tag, Res}}.
+
+all_reports_are_disjoint(Report) ->
+    [] == [X || {_Epoch, Tuple}=X <- Report,
+                element(1, Tuple) /= ok_disjoint].
+
+extract_chains_relative_to_flu(FLU, Report) ->
+    {FLU, [{Epoch, UPI, Repairing} ||
+              {Epoch, {ok_disjoint, Es}} <- Report,
+              {agreed_membership, {UPI, Repairing}} <- Es,
+              lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
+
+chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
+    ?MGR:make_projection(Epoch, MyName, All_list,
+                         All_list -- (UPI_list ++ Repairing_list),
+                         UPI_list, Repairing_list, []).
+
 -endif. % TEST
--- a/prototype/chain-manager/test/machi_flu0_test.erl
+++ b/prototype/chain-manager/test/machi_flu0_test.erl
@ -29,6 +29,7 @@
 -endif.

 -ifdef(TEST).
+-ifndef(PULSE).

 repair_status_test() ->
    {ok, F} = machi_flu0:start_link(one),
@ -41,7 +42,6 @@ repair_status_test() ->
        ok = machi_flu0:stop(F)
    end.

-ifndef(PULSE).

 concuerror1_test() ->
    ok.
@ -375,5 +375,5 @@ event_get_all() ->
    Tab = ?MODULE,
    ets:tab2list(Tab).

-endif.
+-endif. % ! PULSE
 -endif.
--- a/prototype/chain-manager/test/machi_util_test.erl
+++ b/prototype/chain-manager/test/machi_util_test.erl
@ -26,6 +26,7 @@
 -export([]).

 -ifdef(TEST).
+-ifndef(PULSE).

 -ifdef(EQC).
 -include_lib("eqc/include/eqc.hrl").
@ -146,5 +147,6 @@ make_canonical_form2([{File, Start, End, Members}|T]) ->
                                 Member <- Members] ++
        make_canonical_form2(T).

+-endif. % ! PULSE
 -endif. % TEST
				`@ -0,0 +1,2 @@`

				Please see the `doc` directory at the top of the Machi repo.