Merge branch 'slf/manager-cleanup1'

2015-03-03 20:10:26 +09:00 · 2015-03-03 20:10:26 +09:00 · e0066660ef
commit e0066660ef
parent 8487d5759d 54266c4196
22 changed files with 316 additions and 25552 deletions
--- a/README.md
+++ b/README.md
@ -12,7 +12,7 @@ permits.
 ## Initial re-porting on 'prototype' directory
-* `chain-manager`: working on it now......
+* `chain-manager`: finished
 * `corfurl`: finished
 * `demo-day-hack`: not started
 * `tango`: finished
--- a/doc/chain-self-management-sketch.org
+++ b/doc/chain-self-management-sketch.org
@ -48,6 +48,8 @@ the simulator.
 See [[https://tools.ietf.org/html/rfc7282][On Consensus and Humming in the IETF]], RFC 7282.
 See also: [[http://www.snookles.com/slf-blog/2015/03/01/on-humming-consensus-an-allegory/][On “Humming Consensus”, an allegory]].
 ** Tunesmith?
 A mix of orchestral conducting, music composition, humming?
@ -365,7 +367,8 @@ document presents a detailed example.)
 * Sketch of the self-management algorithm
 ** Introduction
-See also, the diagram (((Diagram1.eps))), a flowchart of the
+Refer to the diagram `chain-self-management-sketch.Diagram1.pdf`, a
 flowchart of the 
 algorithm.  The code is structured as a state machine where function
 executing for the flowchart's state is named by the approximate
 location of the state within the flowchart.  The flowchart has three
--- a/prototype/README.md
+++ b/prototype/README.md
@ -23,7 +23,9 @@ We will also accept fixes for bugs in the test code.
 ## The chain-manager prototype
-TODO
+This is a very early experiment to try to create a distributed "rough
 consensus" algorithm that is sufficient & safe for managing the order
 of a Chain Replication chain, its members, and its chain order.
 ## The corfurl prototype
--- a/prototype/chain-manager/.gitignore
+++ b/prototype/chain-manager/.gitignore
@ -6,3 +6,4 @@ deps
 ebin/*.beam
 ebin/*.app
 erl_crash.dump
 RUNLOG*
--- a/prototype/chain-manager/Makefile
+++ b/prototype/chain-manager/Makefile
@ -23,7 +23,7 @@ eunit:
 pulse: compile
 	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
-	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
+	env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE -v eunit
 CONC_ARGS = --pz ./.eunit --treat_as_normal shutdown --after_timeout 1000
@ -41,3 +41,15 @@ concuerror: deps compile
 	concuerror -m machi_flu0_test -t proj_store_test $(CONC_ARGS)
 	concuerror -m machi_flu0_test -t wedge_test $(CONC_ARGS)
 	concuerror -m machi_flu0_test -t proj0_test $(CONC_ARGS)
 APPS = kernel stdlib sasl erts ssl compiler eunit
 PLT = $(HOME)/.chmgr_dialyzer_plt
 build_plt: deps compile
 	dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin
 dialyzer: deps compile
 	dialyzer -Wno_return --plt $(PLT) ebin
 clean_plt:
 	rm $(PLT)
--- a/prototype/chain-manager/README.md
+++ b/prototype/chain-manager/README.md
@ -0,0 +1,166 @@
 # The chain manager prototype
 This is a very early experiment to try to create a distributed "rough
 consensus" algorithm that is sufficient & safe for managing the order
 of a Chain Replication chain, its members, and its chain order.  A
 name hasn't been chosen yet, though the following are contenders:
 * chain self-management
 * rough consensus
 * humming consensus
 * foggy consensus
 ## Code status: active!
 Unlike the other code projects in this repository's `prototype`
 directory, the chain management code is still under active
 development.  It is quite likely (as of early March 2015) that this
 code will be robust enough to move to the "real" Machi code base soon.
 The most up-to-date documentation for this prototype will **not** be
 found in this subdirectory.  Rather, please see the `doc` directory at
 the top of the Machi source repository.
 ## Testing, testing, testing
 It's important to implement any Chain Replication chain manager as
 close to 100% bug-free as possible.  Any bug can introduce the
 possibility of data loss, which is something we must avoid.
 Therefore, we will spend a large amount of effort to use as many
 robust testing tools and methods as feasible to test this code.
 * [Concuerror](http://concuerror.com), a DPOR-based full state space
  exploration tool.  Some preliminary Concuerror tests can be found in the
  `test/machi_flu0_test.erl` module.
 * [QuickCheck](http://www.quviq.com/products/erlang-quickcheck/), a
  property-based testing tool for Erlang.  QuickCheck doesn't provide
  the reassurance of 100% state exploration, but it proven quite
  effective at Basho for finding numerous subtle bugs.
 * Automatic simulation of arbitrary network partition failures.  This
  code is already in progress and is used, for example, by the
  `test/machi_chain_manager1_test.erl` module.
 * TLA+ (future work), to try to create a rigorous model of the
  algorithm and its behavior
 If you'd like to work on additional testing of this component, please
 [open a new GitHub Issue ticket](https://github.com/basho/machi) with
 any questions you have.  Or just open a GitHub pull request.  <tt>^_^</tt>
 ## Compilation & unit testing
 Use `make` and `make test`.  Note that the Makefile assumes that the
 `rebar` utility is available somewhere in your path.
 Tested using Erlang/OTP R16B and Erlang/OTP 17, both on OS X.
 If you wish to run the PULSE test in
 `test/machi_chain_manager1_pulse.erl` module, you must use Erlang
 R16B and Quviq QuickCheck 1.30.2 -- there is a known problem with
 QuickCheck 1.33.2, sorry!  Also, please note that a single iteration
 of a PULSE test case in this model can run for 10s of seconds!
 Otherwise, it ought to "just work" on other versions of Erlang and on other OS
 platforms, but sorry, I haven't tested it.
 ### Testing with simulated network partitions
 See the `doc/chain-self-management-sketch.org` file for details of how
 the simulator works.
 In summary, the simulator tries to emulate the effect of arbitrary
 asymmetric network partitions.  For example, for two simulated nodes A
 and B, it's possible to have node A send messages to B, but B cannot
 send messages to A.
 This kind of one-way message passing is nearly impossible do with
 distributed Erlang, because disterl uses TCP.  If a network partition
 happens at ISO Layer 2 (for example, due to a bad Ethernet cable that
 has a faulty receive wire), the entire TCP connection will hang rather
 than deliver disterl messages in only one direction.
 ### Testing simulated data "repair"
 In the Machi documentation, "repair" is a re-syncronization of data
 between the UPI members of the chain (see below) and members which
 have been down/partitioned/gone-to-Hawaii-for-vacation for some period
 of time and may have state which is out-of-sync with the rest of the
 active-and-running-and-fully-in-sync chain members.
 A rough-and-inaccurate-but-useful summary of state transitions are:
    down -> repair eligible -> repairing started -> repairing finished -> upi
        * Any state can transition back to 'down'
        * Repair interruptions might trigger a transition to
          'repair eligible instead of 'down'.
        * UPI = Update Propagation Invariant (per the original
                Chain Replication paper) preserving members.
                I.e., The state stored by any UPI member is fully
                in sync with all other UPI chain members, except
                for new updates which are being processed by Chain
                Replication at a particular instant in time.
 In both the PULSE and `convergence_demo*()` tests, there is a
 simulated time when a FLU's repair state goes from "repair started" to
 "repair finished", which means that the FLU-under-repair is now
 eligible to join the UPI portion of the chain as a fully-sync'ed
 member of the chain.  The simulation is based on a simple "coin
 flip"-style random choice.
 The simulator framework is simulating repair failures when a network
 partition is detected with the repair destination FLU.  In the "real
 world", other kinds of failure could also interrupt the repair
 process.
 ### The PULSE test in machi_chain_manager1_test.erl
 As mentioned above, this test is quite slow: it can take many dozens
 of seconds to execute a single test case.  However, the test really is using
 PULSE to play strange games with Erlang process scheduling.
 Unfortnately, the PULSE framework is very slow for this test.  We'd
 like something better, so I wrote the
 `machi_chain_manager1_test:convergence_demo_test()` test to use most
 of the network partition simulator to try to run many more partition
 scenarios in the same amount of time.
 ### machi_chain_manager1_test:convergence_demo1()
 This function is intended both as a demo and as a possible
 fully-automated sanity checking function (it will throw an exception
 when a model failure happens).  It's purpose is to "go faster" than
 the PULSE test describe above.  It meets this purpose handily.
 However, it doesn't give quite as much confidence as PULSE does that
 Erlang process scheduling cannot somehow break algorithm running
 inside the simulator.
 To execute:
    make test
    erl -pz ./.eunit deps/*/ebin
    ok = machi_chain_manager1_test:convergence_demo1().
 In summary:
 * Set up four FLUs, `[a,b,c,d]`, to be used for the test
 * Set up a set of random asymmetric network partitions, based on a
  'seed' for a pseudo-random number generator.  Each call to the
  partition simulator may yield a different partition scenario ... so
  the simulated environment is very unstable.
 * Run the algorithm for a while so that it has witnessed the partition
  instability for a long time.
 * Set the partitions definition to a fixed `[{a,b}]`, meaning that FLU `a`
  cannot send messages to FLU `b`, but all other communication
  (including messages from `b -> a`) works correctly.
 * Run the algorithm, wait for everyone to settle on rough consensus.
 * Set the partition definition to wildly random again.
 * Run the algorithm for a while so that it has witnessed the partition
  instability for a long time.
 * Set the partitions definition to a fixed `[{a,c}]`.
 * Run the algorithm, wait for everyone to settle on rough consensus.
 * Set the partitions definition to a fixed `[]`, i.e., there are no
  network partitions at all.
 * Run the algorithm, wait for everyone to settle on a **unanimous value**
  of some ordering of all four FLUs.
--- a/prototype/chain-manager/docs/README.md
+++ b/prototype/chain-manager/docs/README.md
@ -0,0 +1,2 @@
 Please see the `doc` directory at the top of the Machi repo.
--- a/prototype/chain-manager/docs/corfurl.md
+++ b/prototype/chain-manager/docs/corfurl.md
@ -1,191 +0,0 @@
 ## CORFU papers
 I recommend the "5 pages" paper below first, to give a flavor of
 what the CORFU is about.  When Scott first read the CORFU paper
 back in 2011 (and the Hyder paper), he thought it was insanity.
 He recommends waiting before judging quite so hastily.  :-)
 After that, then perhaps take a step back are skim over the
 Hyder paper.  Hyder started before CORFU, but since CORFU, the
 Hyder folks at Microsoft have rewritten Hyder to use CORFU as
 the shared log underneath it.  But the Hyder paper has lots of
 interesting bits about how you'd go about creating a distributed
 DB where the transaction log *is* the DB.
 ### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log"
 MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN
 PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI,
 University of California, San Diego, TED WOBBER, Microsoft Research
 Silicon Valley
 Long version of introduction to CORFU (~30 pages)
 http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf
 ### "CORFU: A Shared Log Design for Flash Clusters"
 Same authors as above
 Short version of introduction to CORFU paper above (~12 pages)
 http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf
 ### "From Paxos to CORFU: A Flash-Speed Shared Log"
 Same authors as above
 5 pages, a short summary of CORFU basics and some trial applications
 that have been implemented on top of it.
 http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf
 ### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware"
 Wei, Davis, Wobber, Balakrishnan, Malkhi
 Summary report of implmementing the CORFU server-side in
 FPGA-style hardware. (~11 pages)
 http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf
 ### "Tango: Distributed Data Structures over a Shared Log"
 Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck
 Describes a framework for developing data structures that reside
 persistently within a CORFU log: the log *is* the database/data
 structure store.
 http://www.snookles.com/scottmp/corfu/Tango.pdf
 ### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service"
 Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber
 The ZooKeeper inter-server communication is replaced with CORFU.
 Faster, fewer lines of code than ZK, and more features than the
 original ZK code base.
 http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf
 ### "Hyder – A Transactional Record Manager for Shared Flash"
 Bernstein, Reid, Das
 Describes a distributed log-based DB system where the txn log is
 treated quite oddly: a "txn intent" record is written to a
 shared common log All participants read the shared log in
 parallel and make commit/abort decisions in parallel, based on
 what conflicts (or not) that they see in the log.  Scott's first
 reading was "No way, wacky" ... and has since changed his mind.
 http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf
 pages 9-20
 ## Fiddling with PULSE
 Do the following:
    make clean
    make
    make pulse
 ... then watch the dots go across the screen for 60 seconds.  If you
 wish, you can press `Control-c` to interrupt the test.  We're really
 interested in the build artifacts.
    erl -pz .eunit deps/*/ebin
    eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
 This will run the PULSE test for 5 seconds.  Feel free to adjust for
 as many seconds as you wish.
    Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
    Eshell V5.10.3  (abort with ^G)
    1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
    Starting Quviq QuickCheck version 1.30.4
       (compiled at {{2014,2,7},{9,19,50}})
    Licence for Basho reserved until {{2014,2,17},{1,41,39}}
    ......................................................................................
    OK, passed 86 tests
    schedule:    Count: 86   Min: 2   Max: 1974   Avg: 3.2e+2   Total: 27260
    true
    2> 
 REPL interactive work can be done via:
 1. Edit code, e.g. `corfurl_pulse.erl`.
 2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP`
 to compile.
 3. Reload any recompiled modules, e.g. `l(corfurl_pulse).`
 4. Resume QuickCheck activities.
 ## Seeing an PULSE scheduler interleaving failure in action
 1. Edit `corfurl_pulse:check_trace()` to uncomment the
   use of `conjunction()` that mentions `bogus_order_check_do_not_use_me`
   and comment out the real `conjunction()` call below it.
 2. Recompile & reload.
 3. Check.
 For example:
    9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
    .........Failed! After 9 tests.
 Sweet!  The first tuple below are the first `?FORALL()` values,
 and the 2nd is the list of commands,
 `{SequentialCommands, ListofParallelCommandLists}`.  The 3rd is the
 seed used to perturb the PULSE scheduler.
 In this case, `SequentialCommands` has two calls (to `setup()` then
 `append()`) and there are two parallel procs: one makes 1 call
 call to `append()` and the other makes 2 calls to `append()`.
    {2,2,9}
    {{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}],
      [[{set,{var,3},
             {call,corfurl_pulse,append,
                   [{var,1},<<231,149,226,203,10,105,54,223,147>>]}}],
       [{set,{var,2},
             {call,corfurl_pulse,append,
                   [{var,1},<<7,206,146,75,249,13,154,238,110>>]}},
        {set,{var,4},
             {call,corfurl_pulse,append,
                   [{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]},
     {27492,46961,4884}}
 Here are our results:
    simple_result: passed
    errors: passed
    events: failed
    identity: passed
    bogus_order_check_do_not_use_me: failed
    [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
 Our (bogus!) order expectation was violated.  Shrinking!
    simple_result: passed
    errors: passed
    events: failed
    identity: passed
    bogus_order_check_do_not_use_me: failed
    [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
 Shrinking was able to remove two `append()` calls and to shrink the
 size of the pages down from 9 bytes down to 1 byte.
    Shrinking........(8 times)
    {1,1,1}
    {{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}],
      [[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],
       [{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]},
     {27492,46961,4884}}
    events: failed
    bogus_order_check_do_not_use_me: failed
    [{ok,2},{ok,1}] /= [{ok,1},{ok,2}]
    false
--- a/prototype/chain-manager/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen
@ -1,35 +0,0 @@
 msc {
    client1, FLU1, FLU2, client2, client3;
    client1 box client3  [label="Epoch #1: chain = FLU1 -> FLU2"];
    client1 -> FLU1      [label="{write,epoch1,<<Page YYY>>}"];
    client1 <- FLU1      [label="ok"];
    client1 box client1  [label="Client crash", textcolour="red"];
    FLU1 box FLU1        [label="FLU crash", textcolour="red"];
    client1 box client3  [label="Epoch #2: chain = FLU2"];
    client2 -> FLU2      [label="{write,epoch2,<<Page ZZZ>>}"];
    client2 <- FLU2      [label="ok"];
    client3 box client3  [label="Read repair starts", textbgcolour="aqua"];
    client3 -> FLU2      [label="{read,epoch2}"];
    client3 <- FLU2      [label="{ok,<<Page ZZZ>>}"];
    client3 -> FLU1      [label="{write,epoch2,<<Page ZZZ>>}"];
    FLU1 box FLU1        [label="What do we do here?  Our current value is <<Page YYY>>.", textcolour="red"] ;
    FLU1 box FLU1        [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ;
    FLU1 box FLU1        [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ;
    FLU1 -> client3      [label="I'm sorry, Dave, I cannot do that."];
    FLU1 box FLU1        [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ;
    client3 -> FLU1      [label="{write,epoch2,<<Page ZZZ>>,repair,witnesses=[FLU2]}",  textbgcolour="silver"];
    FLU1 box FLU1        [label="Start an async process to ask the witness list to corroborate this repair."];
    FLU1 -> FLU2         [label="{read,epoch2}", textbgcolour="aqua"];
    FLU1 <- FLU2         [label="{ok,<<Page ZZ>>}", textbgcolour="aqua"];
    FLU1 box FLU1        [label="Overwrite local storage with repair page.",  textbgcolour="silver"];
    client3 <- FLU1      [label="Async proc replies: ok",  textbgcolour="silver"];
 }
--- a/prototype/chain-manager/docs/corfurl/notes/README.md
+++ b/prototype/chain-manager/docs/corfurl/notes/README.md
@ -1,92 +0,0 @@
 ## read-repair-race.1.
 First attempt at using "mscgen" to make some Message Sequence
 Chart (MSC) for a race found at commit 087c2605ab.
 ## read-repair-race.2.
 Second attempt.  This is almost exactly the trace that is
 generated by this failing test case at commit 087c2605ab:
    C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]].
    eqc:check(corfurl_pulse:prop_pulse(), C2).
 ## read-repair-race.2b.*
 Same basic condition as read-repair-race.2, but edited
 substantially to make it clearer what is happening.
 Also for commit 087c2605ab.
 I believe that I have a fix for the silver-colored
 `error-overwritten` ... and it was indeed added to the code soon
 afterward, but it turns out that it doesn't solve the entire problem
 of "two clients try to write the exact same data at the same time to
 the same LPN".
 ## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN"
 This situation is something that CORFU cannot protect against, IMO.
 I have been struggling for a while, to try to find a way for CORFU
 clients to know *always* when there is a conflict with another
 writer.  It usually works: the basic nature of write-once registers is
 very powerful.  However, in the case where two clients are trying to
 write the same page data to the same LPN, it looks impossible to
 resolve.
 How do you tell the difference between:
 1. A race between a client A writing page P at address LPN and
   read-repair fixing P.  P *is* A's data and no other's, so this race
   doesn't confuse anyone.
 1. A race between a client A writing page P at address LPN and client
   B writing the exact same page data P at the same LPN.
   A's page P = B's page P, but clients A & B don't know that.
   If CORFU tells both A & B that they were successful, A & B assume
   that the CORFU log has two new pages appended to it, but in truth
   only one new page was appended.
 If we try to solve this by always avoiding the same LPN address
 conflict, we are deluding ourselves.  If we assume that the sequencer
 is 100% correct in that it never assigns the same LPN twice, and if we
 assume that a client must never write a block without an assignment
 from the sequencer, then the problem is solved.  But the problem has a
 _heavy_ price: the log is only available when the sequencer is
 available, and only when never more than one sequencer running at a
 time.
 The CORFU base system promises correct operation, even if:
 * Zero sequencers are running, and clients might choose the same LPN
  to write to.
 * Two more more sequencers are running, and different sequencers
  assign the same LPN to two different clients.
 But CORFU's "correct" behavior does not include detecting the same
 page at the same LPN.  The papers don't specifically say it, alas.
 But IMO it's impossible to guarantee, so all docs ought to explicitly
 say that it's impossible and that clients must not assume it.
 See also
 * two-clients-race.1.png
 ## A scenario of chain repair & write-once registers
 See:
 * 2014-02-27.chain-repair-write-twice.png
 ... for a scenario where write-once registers that are truly only
 write-once-ever-for-the-rest-of-the-future are "inconvenient" when it
 comes to chain repair.  Client 3 is attempting to do chain repair ops,
 bringing FLU1 back into sync with FLU2.
 The diagram proposes one possible idea for making overwriting a
 read-once register a bit safer: ask another node in the chain to
 verify that the page you've been asked to repair is exactly the same
 as that other FLU's page.
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.1.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.1.mscgen
@ -1,49 +0,0 @@
 msc {
  "<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"];
    "<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
    "<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ;
    "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"];
    "<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
    "<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ;
    "<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ;
    "<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ;
    "<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
    "<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ;
    "<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ;
    "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"];
    "<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
    "<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ;
    "<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ;
    "<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ;
    "<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ;
    "<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ;
    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
    "<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
    "<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ;
    "<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ;
    "<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ;
    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ;
    "<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
    "<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
    "<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ;
    "<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
    "<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ;
    "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ;
    "<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ;
 }
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2.mscgen
@ -1,60 +0,0 @@
 msc {
  "<0.32555.4>" [label="Client1"],     "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
    "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
    "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
    "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
    "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
 #    "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
 #    "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
    "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
    "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed.  Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ;
    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
    "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
    "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
 }
--- a/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2b.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/read-repair-race.2b.mscgen
@ -1,57 +0,0 @@
 msc {
  "<0.32555.4>" [label="Client1"],     "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
    "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
    "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
    "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
    "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
 #    "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
 #    "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
    "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
    "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
    "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
    "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed.  Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
    "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
    "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
    "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
    "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
    "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
    "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
    "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
    "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
    "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
 }
--- a/prototype/chain-manager/docs/corfurl/notes/two-clients-race.1.mscgen
+++ b/prototype/chain-manager/docs/corfurl/notes/two-clients-race.1.mscgen
@ -1,33 +0,0 @@
 msc {
    client1, FLU1, FLU2, client2, client3;
    client1 -> FLU1      [label="{write,epoch1,<<Not unique page>>}"];
    client1 <- FLU1      [label="ok"];
    client3 -> FLU2      [label="{seal,epoch1}"];
    client3 <- FLU2      [label="{ok,...}"];
    client3 -> FLU1      [label="{seal,epoch1}"];
    client3 <- FLU1      [label="{ok,...}"];
    client2 -> FLU1      [label="{write,epoch1,<<Not unique page>>}"];
    client2 <- FLU1      [label="error_epoch"];
    client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"];
    client2 -> FLU1      [label="{write,epoch2,<<Not unique page>>}"];
    client2 <- FLU1      [label="error_overwritten"];
    client1 -> FLU2      [label="{write,epoch1,<<Not unique page>>}"];
    client1 <- FLU2      [label="error_epoch"];
    client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"];
    client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ;
    client3 -> FLU1      [label="{read,epoch2}"];
    client3 <- FLU1      [label="{ok,<<Not unique page>>}"];
    client3 -> FLU2      [label="{write,epoch2,<<Not unique page>>}"];
    client3 <- FLU2      [label="ok"];
    client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ;
    client3 abox client3 [ label = "We saw <<Not unique page>>", textbgcolour="silver"] ;
    client1 -> FLU2      [label="{write,epoch2,<<Not unique page>>}"];
    client1 <- FLU2      [label="error_overwritten"];
 }
--- a/prototype/chain-manager/docs/machi/Diagram1.eps
+++ b/prototype/chain-manager/docs/machi/Diagram1.eps
--- a/prototype/chain-manager/docs/machi/chain-mgmt-flowchart.dot
+++ b/prototype/chain-manager/docs/machi/chain-mgmt-flowchart.dot
@ -1,109 +0,0 @@
 digraph {
    compound=true
    label="Machi chain management flowchart (sample)";
    node[shape="box", style="rounded"] 
        start;
    node[shape="box", style="rounded", label="stop1"] 
        stop1;
    node[shape="box", style="rounded", label="stop2"] 
        stop2;
    node[shape="box", style="rounded"] 
        crash;
 subgraph clustera {
    node[shape="parallelogram", style="", label="Set retry counter = 0"] 
        a05_retry;
    node[shape="parallelogram", style="", label="Create P_newprop @ epoch E+1\nbased on P_current @ epoch E"] 
        a10_create;
    node[shape="parallelogram", style="", label="Get latest public projection, P_latest"] 
        a20_get;
    node[shape="diamond", style="", label="Epoch(P_latest) > Epoch(P_current)\norelse\nP_latest was not unanimous"]
        a30_epoch;
    node[shape="diamond", style="", label="Epoch(P_latest) == Epoch(P_current)"]
        a40_epochequal;
    node[shape="diamond", style="", label="P_latest == P_current"]
        a50_equal;
 }
 subgraph clustera100 {
    node[shape="diamond", style="", label="Write P_newprop to everyone"]
        a100_write;
 }
 subgraph clusterb {
    node[shape="diamond", style="", label="P_latest was unanimous?"]
        b10_unanimous;
    node[shape="diamond", style="", label="Retry counter too big?"]
        b20_counter;
    node[shape="diamond", style="", label="Rank(P_latest) >= Rank(P_newprop)"]
        b30_rank;
    node[shape="diamond", style="", label="P_latest.upi == P_newprop.upi\nand also\nPlatest.repairing == P_newprop.repairing"]
        b40_condc;
    node[shape="square", style="", label="P_latest author is\ntoo slow, let's try!"]
        b45_lets;
    node[shape="parallelogram", style="", label="P_newprop is better than P_latest.\nSet P_newprop.epoch = P_latest.epoch + 1."]
        b50_better;
 }
 subgraph clusterc {
    node[shape="diamond", style="", label="Is Move(P_current, P_latest) ok?"]
        c10_move;
    node[shape="parallelogram", style="", label="Tell Author(P_latest) to rewrite\nwith a bigger epoch number"]
        c20_tell;
 }
 subgraph clusterd {
    node[shape="diamond", style="", label="Use P_latest as the\nnew P_current"]
        d10_use;
 }        
    start -> a05_retry;
    a05_retry -> a10_create;
    a10_create -> a20_get;
    a20_get -> a30_epoch;
    a30_epoch -> a40_epochequal[label="false"];
    a30_epoch -> b10_unanimous[label="true"];
    a40_epochequal -> a50_equal[label="true"];
    a40_epochequal -> crash[label="falseXX"];
    a50_equal -> stop1[label="true"];
    a50_equal -> b20_counter[label="false"];
    a100_write -> a10_create;
    b10_unanimous -> c10_move[label="yes"];
    b10_unanimous -> b20_counter[label="no"];
    b20_counter -> b45_lets[label="true"];
    b20_counter -> b30_rank[label="false"];
    b30_rank -> b40_condc[label="false"];
    b30_rank -> c20_tell[label="true"];
    b40_condc -> b50_better[label="false"];
    b40_condc -> c20_tell[label="true"];
    b45_lets -> b50_better;
    b50_better -> a100_write;
    c10_move -> d10_use[label="yes"];
    c10_move -> a100_write[label="no"];
    c20_tell -> b50_better;
    d10_use -> stop2;
    {rank=same; clustera clusterb clusterc clusterd};
 //    {rank=same; a10_create b10_unanimous c10_move d10_use stop2};
 //    {rank=same; a20_get b20_counter c20_tell};
 //    {rank=same; a30_epoch b40_condc};
 //    {rank=same; a40_epochequal b40_condc crash};
 //    {rank=same; stop1 a50_equal b50_better};
 //       if_valid;
 // 
 //    start -> input;
 //    input -> if_valid;
 //    if_valid -> message[label="no"];
 //    if_valid -> end[label="yes"];
 //    message -> input;     
 //    {rank=same; message input}
 }
--- a/prototype/chain-manager/rebar.config
+++ b/prototype/chain-manager/rebar.config
@ -0,0 +1,6 @@
 %%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
 {erl_opts, [{parse_transform, lager_transform}, debug_info]}.
 {deps, [
        {lager, "2.0.1", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}}
        ]}.
--- a/prototype/chain-manager/src/foo.app.src
+++ b/prototype/chain-manager/src/foo.app.src
@ -0,0 +1,9 @@
 {application, foo, [
    {description, "Prototype of Machi chain manager."},
    {vsn, "0.0.0"},
    {applications, [kernel, stdlib, lager]},
    {mod,{foo_unfinished_app,[]}},
    {registered, []},
    {env, [
    ]}
 ]}.
--- a/prototype/chain-manager/test/machi_chain_manager0_test.erl
+++ b/prototype/chain-manager/test/machi_chain_manager0_test.erl
@ -29,6 +29,7 @@
 -export([]).
 -ifdef(TEST).
 -ifndef(PULSE).
 -ifdef(EQC).
 -include_lib("eqc/include/eqc.hrl").
@ -458,4 +459,5 @@ combinations(L) ->
 perms([]) -> [[]];
 perms(L)  -> [[H|T] || H <- L, T <- perms(L--[H])].
 -endif. % ! PULSE
 -endif.
--- a/prototype/chain-manager/test/machi_chain_manager1_test.erl
+++ b/prototype/chain-manager/test/machi_chain_manager1_test.erl
@ -28,7 +28,7 @@
 -define(D(X), io:format(user, "~s ~p\n", [??X, X])).
 -define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
-export([]).
+-export([unanimous_report/1, unanimous_report/2]).
 -ifdef(TEST).
@ -42,6 +42,8 @@
 -include_lib("eunit/include/eunit.hrl").
 -compile(export_all).
 -ifndef(PULSE).
 smoke0_test() ->
    machi_partition_simulator:start_link({1,2,3}, 50, 50),
    {ok, FLUa} = machi_flu0:start_link(a),
@ -52,10 +54,12 @@ smoke0_test() ->
        %% If/when calculate_projection_internal_old() disappears, then
        %% get rid of the comprehension below ... start/ping/stop is
        %% good enough for smoke0.
        io:format(user, "\n\nBegin 5 lines of verbose stuff, check manually for differences\n", []),
        [begin
             Proj = ?MGR:calculate_projection_internal_old(M0),
             io:format(user, "~w\n", [?MGR:make_projection_summary(Proj)])
-         end || _ <- lists:seq(1,5)]
+         end || _ <- lists:seq(1,5)],
        io:format(user, "\n", [])
    after
        ok = ?MGR:stop(M0),
        ok = machi_flu0:stop(FLUa),
@ -105,9 +109,9 @@ nonunanimous_setup_and_fix_test() ->
        ok = machi_flu0:proj_write(FLUa, P1Epoch, public, P1a),
        ok = machi_flu0:proj_write(FLUb, P1Epoch, public, P1b),
-        ?D(x),
+        %% ?D(x),
        {not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
-        ?Dw(_XX),
+        %% ?Dw(_XX),
        {not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
        %% The read repair here doesn't automatically trigger the creation of
        %% a new projection (to try to create a unanimous projection).  So
@ -138,101 +142,18 @@ nonunanimous_setup_and_fix_test() ->
        ok = machi_partition_simulator:stop()
    end.
-unanimous_report(Namez) ->
+%% This test takes a long time and spits out a huge amount of logging
-    UniquePrivateEs =
+%% cruft to the console.  Comment out the EUnit fixture and run manually.
        lists:usort(lists:flatten(
                      [machi_flu0:proj_list_all(FLU, private) ||
                          {_FLUName, FLU} <- Namez])),
    [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
-unanimous_report(Epoch, Namez) ->
+%% convergence_demo_test_() ->
-    Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
+%%     {timeout, 300, fun() -> convergence_demo1() end}.
                           {ok, T} -> T;
                           _Else   -> not_in_this_epoch
                       end} || {FLUName, FLU} <- Namez],
    UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
                   Proj#projection.epoch_csum} ||
                     {_FLUname, Proj} <- Projs,
                     is_record(Proj, projection)],
    UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
    Res =
        [begin
             case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
                                       U == UPI]) of
                 [_1CSum] ->
                     %% Yay, there's only 1 checksum.  Let's check
                     %% that all FLUs are in agreement.
                     {UPI, Repairing, _CSum} =
                         lists:keyfind(UPI, 1, UPI_R_Sums),
                     %% TODO: make certain that this subtlety doesn't get
                     %%       last in later implementations.
-                     %% So, this is a bit of a tricky thing.  If we're at
+convergence_demo1() ->
                     %% upi=[c] and repairing=[a,b], then the transition
                     %% (eventually!) to upi=[c,a] does not currently depend
                     %% on b being an active participant in the repair.
                     %%
                     %% Yes, b's state is very important for making certain
                     %% that all repair operations succeed both to a & b.
                     %% However, in this simulation, we only consider that
                     %% the head(Repairing) is sane.  Therefore, we use only
                     %% the "HeadOfRepairing" in our considerations here.
                     HeadOfRepairing = case Repairing of
                                          [H_Rep|_] ->
                                              [H_Rep];
                                          _ ->
                                              []
                                      end,
                     Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
                                      P when is_record(P, projection) ->
                                          P#projection.epoch_csum;
                                      Else ->
                                          Else
                                  end} || FLU <- UPI ++ HeadOfRepairing],
                     case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
                         [_] ->
                             {agreed_membership, {UPI, Repairing}};
                         Else2 ->
                             {not_agreed, {UPI, Repairing}, Else2}
                     end;
                 _Else ->
                     {UPI, not_unique, Epoch, _Else}
             end
         end || UPI <- UniqueUPIs],
    AgreedResUPI_Rs = [UPI++Repairing ||
                          {agreed_membership, {UPI, Repairing}} <- Res],
    Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
               lists:sort(lists:flatten(AgreedResUPI_Rs)) of
              true ->
                  ok_disjoint;
              false ->
                  bummer_NOT_DISJOINT
          end,
    {Epoch, {Tag, Res}}.
 all_reports_are_disjoint(Report) ->
    [] == [X || {_Epoch, Tuple}=X <- Report,
                element(1, Tuple) /= ok_disjoint].
 extract_chains_relative_to_flu(FLU, Report) ->
    {FLU, [{Epoch, UPI, Repairing} ||
              {Epoch, {ok_disjoint, Es}} <- Report,
              {agreed_membership, {UPI, Repairing}} <- Es,
              lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
 chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
    ?MGR:make_projection(Epoch, MyName, All_list,
                         All_list -- (UPI_list ++ Repairing_list),
                         UPI_list, Repairing_list, []).
 -ifndef(PULSE).
 convergence_demo_test_() ->
    {timeout, 300, fun() -> convergence_demo_test(x) end}.
 convergence_demo_test(_) ->
    All_list = [a,b,c,d],
-    machi_partition_simulator:start_link({111,222,33}, 0, 100),
+    %% machi_partition_simulator:start_link({111,222,33}, 0, 100),
    Seed = erlang:now(),
    machi_partition_simulator:start_link(Seed, 0, 100),
    io:format(user, "convergence_demo seed = ~p\n", [Seed]),
    _ = machi_partition_simulator:get(All_list),
    {ok, FLUa} = machi_flu0:start_link(a),
@ -357,4 +278,92 @@ convergence_demo_test(_) ->
    end.
 -endif. % not PULSE
 unanimous_report(Namez) ->
    UniquePrivateEs =
        lists:usort(lists:flatten(
                      [machi_flu0:proj_list_all(FLU, private) ||
                          {_FLUName, FLU} <- Namez])),
    [unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
 unanimous_report(Epoch, Namez) ->
    Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
                           {ok, T} -> T;
                           _Else   -> not_in_this_epoch
                       end} || {FLUName, FLU} <- Namez],
    UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
                   Proj#projection.epoch_csum} ||
                     {_FLUname, Proj} <- Projs,
                     is_record(Proj, projection)],
    UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
    Res =
        [begin
             case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
                                       U == UPI]) of
                 [_1CSum] ->
                     %% Yay, there's only 1 checksum.  Let's check
                     %% that all FLUs are in agreement.
                     {UPI, Repairing, _CSum} =
                         lists:keyfind(UPI, 1, UPI_R_Sums),
                     %% TODO: make certain that this subtlety doesn't get
                     %%       last in later implementations.
                     %% So, this is a bit of a tricky thing.  If we're at
                     %% upi=[c] and repairing=[a,b], then the transition
                     %% (eventually!) to upi=[c,a] does not currently depend
                     %% on b being an active participant in the repair.
                     %%
                     %% Yes, b's state is very important for making certain
                     %% that all repair operations succeed both to a & b.
                     %% However, in this simulation, we only consider that
                     %% the head(Repairing) is sane.  Therefore, we use only
                     %% the "HeadOfRepairing" in our considerations here.
                     HeadOfRepairing = case Repairing of
                                          [H_Rep|_] ->
                                              [H_Rep];
                                          _ ->
                                              []
                                      end,
                     Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
                                      P when is_record(P, projection) ->
                                          P#projection.epoch_csum;
                                      Else ->
                                          Else
                                  end} || FLU <- UPI ++ HeadOfRepairing],
                     case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
                         [_] ->
                             {agreed_membership, {UPI, Repairing}};
                         Else2 ->
                             {not_agreed, {UPI, Repairing}, Else2}
                     end;
                 _Else ->
                     {UPI, not_unique, Epoch, _Else}
             end
         end || UPI <- UniqueUPIs],
    AgreedResUPI_Rs = [UPI++Repairing ||
                          {agreed_membership, {UPI, Repairing}} <- Res],
    Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
               lists:sort(lists:flatten(AgreedResUPI_Rs)) of
              true ->
                  ok_disjoint;
              false ->
                  bummer_NOT_DISJOINT
          end,
    {Epoch, {Tag, Res}}.
 all_reports_are_disjoint(Report) ->
    [] == [X || {_Epoch, Tuple}=X <- Report,
                element(1, Tuple) /= ok_disjoint].
 extract_chains_relative_to_flu(FLU, Report) ->
    {FLU, [{Epoch, UPI, Repairing} ||
              {Epoch, {ok_disjoint, Es}} <- Report,
              {agreed_membership, {UPI, Repairing}} <- Es,
              lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
 chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
    ?MGR:make_projection(Epoch, MyName, All_list,
                         All_list -- (UPI_list ++ Repairing_list),
                         UPI_list, Repairing_list, []).
 -endif. % TEST
--- a/prototype/chain-manager/test/machi_flu0_test.erl
+++ b/prototype/chain-manager/test/machi_flu0_test.erl
@ -29,6 +29,7 @@
 -endif.
 -ifdef(TEST).
 -ifndef(PULSE).
 repair_status_test() ->
    {ok, F} = machi_flu0:start_link(one),
@ -41,7 +42,6 @@ repair_status_test() ->
        ok = machi_flu0:stop(F)
    end.
 -ifndef(PULSE).
 concuerror1_test() ->
    ok.
@ -375,5 +375,5 @@ event_get_all() ->
    Tab = ?MODULE,
    ets:tab2list(Tab).
-endif.
+-endif. % ! PULSE
 -endif.
--- a/prototype/chain-manager/test/machi_util_test.erl
+++ b/prototype/chain-manager/test/machi_util_test.erl
@ -26,6 +26,7 @@
 -export([]).
 -ifdef(TEST).
 -ifndef(PULSE).
 -ifdef(EQC).
 -include_lib("eqc/include/eqc.hrl").
@ -146,5 +147,6 @@ make_canonical_form2([{File, Start, End, Members}|T]) ->
                                 Member <- Members] ++
        make_canonical_form2(T).
 -endif. % ! PULSE
 -endif. % TEST
		`@ -0,0 +1,2 @@`

							Please see the `doc` directory at the top of the Machi repo.