Merge branch 'slf/manager-cleanup1'
This commit is contained in:
commit
e0066660ef
22 changed files with 316 additions and 25552 deletions
|
@ -12,7 +12,7 @@ permits.
|
||||||
|
|
||||||
## Initial re-porting on 'prototype' directory
|
## Initial re-porting on 'prototype' directory
|
||||||
|
|
||||||
* `chain-manager`: working on it now......
|
* `chain-manager`: finished
|
||||||
* `corfurl`: finished
|
* `corfurl`: finished
|
||||||
* `demo-day-hack`: not started
|
* `demo-day-hack`: not started
|
||||||
* `tango`: finished
|
* `tango`: finished
|
||||||
|
|
|
@ -48,6 +48,8 @@ the simulator.
|
||||||
|
|
||||||
See [[https://tools.ietf.org/html/rfc7282][On Consensus and Humming in the IETF]], RFC 7282.
|
See [[https://tools.ietf.org/html/rfc7282][On Consensus and Humming in the IETF]], RFC 7282.
|
||||||
|
|
||||||
|
See also: [[http://www.snookles.com/slf-blog/2015/03/01/on-humming-consensus-an-allegory/][On “Humming Consensus”, an allegory]].
|
||||||
|
|
||||||
** Tunesmith?
|
** Tunesmith?
|
||||||
|
|
||||||
A mix of orchestral conducting, music composition, humming?
|
A mix of orchestral conducting, music composition, humming?
|
||||||
|
@ -365,7 +367,8 @@ document presents a detailed example.)
|
||||||
|
|
||||||
* Sketch of the self-management algorithm
|
* Sketch of the self-management algorithm
|
||||||
** Introduction
|
** Introduction
|
||||||
See also, the diagram (((Diagram1.eps))), a flowchart of the
|
Refer to the diagram `chain-self-management-sketch.Diagram1.pdf`, a
|
||||||
|
flowchart of the
|
||||||
algorithm. The code is structured as a state machine where function
|
algorithm. The code is structured as a state machine where function
|
||||||
executing for the flowchart's state is named by the approximate
|
executing for the flowchart's state is named by the approximate
|
||||||
location of the state within the flowchart. The flowchart has three
|
location of the state within the flowchart. The flowchart has three
|
||||||
|
|
|
@ -23,7 +23,9 @@ We will also accept fixes for bugs in the test code.
|
||||||
|
|
||||||
## The chain-manager prototype
|
## The chain-manager prototype
|
||||||
|
|
||||||
TODO
|
This is a very early experiment to try to create a distributed "rough
|
||||||
|
consensus" algorithm that is sufficient & safe for managing the order
|
||||||
|
of a Chain Replication chain, its members, and its chain order.
|
||||||
|
|
||||||
## The corfurl prototype
|
## The corfurl prototype
|
||||||
|
|
||||||
|
|
1
prototype/chain-manager/.gitignore
vendored
1
prototype/chain-manager/.gitignore
vendored
|
@ -6,3 +6,4 @@ deps
|
||||||
ebin/*.beam
|
ebin/*.beam
|
||||||
ebin/*.app
|
ebin/*.app
|
||||||
erl_crash.dump
|
erl_crash.dump
|
||||||
|
RUNLOG*
|
||||||
|
|
|
@ -23,7 +23,7 @@ eunit:
|
||||||
|
|
||||||
pulse: compile
|
pulse: compile
|
||||||
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
|
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
|
||||||
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
|
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE -v eunit
|
||||||
|
|
||||||
CONC_ARGS = --pz ./.eunit --treat_as_normal shutdown --after_timeout 1000
|
CONC_ARGS = --pz ./.eunit --treat_as_normal shutdown --after_timeout 1000
|
||||||
|
|
||||||
|
@ -41,3 +41,15 @@ concuerror: deps compile
|
||||||
concuerror -m machi_flu0_test -t proj_store_test $(CONC_ARGS)
|
concuerror -m machi_flu0_test -t proj_store_test $(CONC_ARGS)
|
||||||
concuerror -m machi_flu0_test -t wedge_test $(CONC_ARGS)
|
concuerror -m machi_flu0_test -t wedge_test $(CONC_ARGS)
|
||||||
concuerror -m machi_flu0_test -t proj0_test $(CONC_ARGS)
|
concuerror -m machi_flu0_test -t proj0_test $(CONC_ARGS)
|
||||||
|
|
||||||
|
APPS = kernel stdlib sasl erts ssl compiler eunit
|
||||||
|
PLT = $(HOME)/.chmgr_dialyzer_plt
|
||||||
|
|
||||||
|
build_plt: deps compile
|
||||||
|
dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin
|
||||||
|
|
||||||
|
dialyzer: deps compile
|
||||||
|
dialyzer -Wno_return --plt $(PLT) ebin
|
||||||
|
|
||||||
|
clean_plt:
|
||||||
|
rm $(PLT)
|
||||||
|
|
166
prototype/chain-manager/README.md
Normal file
166
prototype/chain-manager/README.md
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
# The chain manager prototype
|
||||||
|
|
||||||
|
This is a very early experiment to try to create a distributed "rough
|
||||||
|
consensus" algorithm that is sufficient & safe for managing the order
|
||||||
|
of a Chain Replication chain, its members, and its chain order. A
|
||||||
|
name hasn't been chosen yet, though the following are contenders:
|
||||||
|
|
||||||
|
* chain self-management
|
||||||
|
* rough consensus
|
||||||
|
* humming consensus
|
||||||
|
* foggy consensus
|
||||||
|
|
||||||
|
## Code status: active!
|
||||||
|
|
||||||
|
Unlike the other code projects in this repository's `prototype`
|
||||||
|
directory, the chain management code is still under active
|
||||||
|
development. It is quite likely (as of early March 2015) that this
|
||||||
|
code will be robust enough to move to the "real" Machi code base soon.
|
||||||
|
|
||||||
|
The most up-to-date documentation for this prototype will **not** be
|
||||||
|
found in this subdirectory. Rather, please see the `doc` directory at
|
||||||
|
the top of the Machi source repository.
|
||||||
|
|
||||||
|
## Testing, testing, testing
|
||||||
|
|
||||||
|
It's important to implement any Chain Replication chain manager as
|
||||||
|
close to 100% bug-free as possible. Any bug can introduce the
|
||||||
|
possibility of data loss, which is something we must avoid.
|
||||||
|
Therefore, we will spend a large amount of effort to use as many
|
||||||
|
robust testing tools and methods as feasible to test this code.
|
||||||
|
|
||||||
|
* [Concuerror](http://concuerror.com), a DPOR-based full state space
|
||||||
|
exploration tool. Some preliminary Concuerror tests can be found in the
|
||||||
|
`test/machi_flu0_test.erl` module.
|
||||||
|
* [QuickCheck](http://www.quviq.com/products/erlang-quickcheck/), a
|
||||||
|
property-based testing tool for Erlang. QuickCheck doesn't provide
|
||||||
|
the reassurance of 100% state exploration, but it proven quite
|
||||||
|
effective at Basho for finding numerous subtle bugs.
|
||||||
|
* Automatic simulation of arbitrary network partition failures. This
|
||||||
|
code is already in progress and is used, for example, by the
|
||||||
|
`test/machi_chain_manager1_test.erl` module.
|
||||||
|
* TLA+ (future work), to try to create a rigorous model of the
|
||||||
|
algorithm and its behavior
|
||||||
|
|
||||||
|
If you'd like to work on additional testing of this component, please
|
||||||
|
[open a new GitHub Issue ticket](https://github.com/basho/machi) with
|
||||||
|
any questions you have. Or just open a GitHub pull request. <tt>^_^</tt>
|
||||||
|
|
||||||
|
## Compilation & unit testing
|
||||||
|
|
||||||
|
Use `make` and `make test`. Note that the Makefile assumes that the
|
||||||
|
`rebar` utility is available somewhere in your path.
|
||||||
|
|
||||||
|
Tested using Erlang/OTP R16B and Erlang/OTP 17, both on OS X.
|
||||||
|
|
||||||
|
If you wish to run the PULSE test in
|
||||||
|
`test/machi_chain_manager1_pulse.erl` module, you must use Erlang
|
||||||
|
R16B and Quviq QuickCheck 1.30.2 -- there is a known problem with
|
||||||
|
QuickCheck 1.33.2, sorry! Also, please note that a single iteration
|
||||||
|
of a PULSE test case in this model can run for 10s of seconds!
|
||||||
|
|
||||||
|
Otherwise, it ought to "just work" on other versions of Erlang and on other OS
|
||||||
|
platforms, but sorry, I haven't tested it.
|
||||||
|
|
||||||
|
### Testing with simulated network partitions
|
||||||
|
|
||||||
|
See the `doc/chain-self-management-sketch.org` file for details of how
|
||||||
|
the simulator works.
|
||||||
|
|
||||||
|
In summary, the simulator tries to emulate the effect of arbitrary
|
||||||
|
asymmetric network partitions. For example, for two simulated nodes A
|
||||||
|
and B, it's possible to have node A send messages to B, but B cannot
|
||||||
|
send messages to A.
|
||||||
|
|
||||||
|
This kind of one-way message passing is nearly impossible do with
|
||||||
|
distributed Erlang, because disterl uses TCP. If a network partition
|
||||||
|
happens at ISO Layer 2 (for example, due to a bad Ethernet cable that
|
||||||
|
has a faulty receive wire), the entire TCP connection will hang rather
|
||||||
|
than deliver disterl messages in only one direction.
|
||||||
|
|
||||||
|
### Testing simulated data "repair"
|
||||||
|
|
||||||
|
In the Machi documentation, "repair" is a re-syncronization of data
|
||||||
|
between the UPI members of the chain (see below) and members which
|
||||||
|
have been down/partitioned/gone-to-Hawaii-for-vacation for some period
|
||||||
|
of time and may have state which is out-of-sync with the rest of the
|
||||||
|
active-and-running-and-fully-in-sync chain members.
|
||||||
|
|
||||||
|
A rough-and-inaccurate-but-useful summary of state transitions are:
|
||||||
|
|
||||||
|
down -> repair eligible -> repairing started -> repairing finished -> upi
|
||||||
|
|
||||||
|
* Any state can transition back to 'down'
|
||||||
|
* Repair interruptions might trigger a transition to
|
||||||
|
'repair eligible instead of 'down'.
|
||||||
|
* UPI = Update Propagation Invariant (per the original
|
||||||
|
Chain Replication paper) preserving members.
|
||||||
|
|
||||||
|
I.e., The state stored by any UPI member is fully
|
||||||
|
in sync with all other UPI chain members, except
|
||||||
|
for new updates which are being processed by Chain
|
||||||
|
Replication at a particular instant in time.
|
||||||
|
|
||||||
|
In both the PULSE and `convergence_demo*()` tests, there is a
|
||||||
|
simulated time when a FLU's repair state goes from "repair started" to
|
||||||
|
"repair finished", which means that the FLU-under-repair is now
|
||||||
|
eligible to join the UPI portion of the chain as a fully-sync'ed
|
||||||
|
member of the chain. The simulation is based on a simple "coin
|
||||||
|
flip"-style random choice.
|
||||||
|
|
||||||
|
The simulator framework is simulating repair failures when a network
|
||||||
|
partition is detected with the repair destination FLU. In the "real
|
||||||
|
world", other kinds of failure could also interrupt the repair
|
||||||
|
process.
|
||||||
|
|
||||||
|
### The PULSE test in machi_chain_manager1_test.erl
|
||||||
|
|
||||||
|
As mentioned above, this test is quite slow: it can take many dozens
|
||||||
|
of seconds to execute a single test case. However, the test really is using
|
||||||
|
PULSE to play strange games with Erlang process scheduling.
|
||||||
|
|
||||||
|
Unfortnately, the PULSE framework is very slow for this test. We'd
|
||||||
|
like something better, so I wrote the
|
||||||
|
`machi_chain_manager1_test:convergence_demo_test()` test to use most
|
||||||
|
of the network partition simulator to try to run many more partition
|
||||||
|
scenarios in the same amount of time.
|
||||||
|
|
||||||
|
### machi_chain_manager1_test:convergence_demo1()
|
||||||
|
|
||||||
|
This function is intended both as a demo and as a possible
|
||||||
|
fully-automated sanity checking function (it will throw an exception
|
||||||
|
when a model failure happens). It's purpose is to "go faster" than
|
||||||
|
the PULSE test describe above. It meets this purpose handily.
|
||||||
|
However, it doesn't give quite as much confidence as PULSE does that
|
||||||
|
Erlang process scheduling cannot somehow break algorithm running
|
||||||
|
inside the simulator.
|
||||||
|
|
||||||
|
To execute:
|
||||||
|
|
||||||
|
make test
|
||||||
|
erl -pz ./.eunit deps/*/ebin
|
||||||
|
ok = machi_chain_manager1_test:convergence_demo1().
|
||||||
|
|
||||||
|
In summary:
|
||||||
|
|
||||||
|
* Set up four FLUs, `[a,b,c,d]`, to be used for the test
|
||||||
|
* Set up a set of random asymmetric network partitions, based on a
|
||||||
|
'seed' for a pseudo-random number generator. Each call to the
|
||||||
|
partition simulator may yield a different partition scenario ... so
|
||||||
|
the simulated environment is very unstable.
|
||||||
|
* Run the algorithm for a while so that it has witnessed the partition
|
||||||
|
instability for a long time.
|
||||||
|
* Set the partitions definition to a fixed `[{a,b}]`, meaning that FLU `a`
|
||||||
|
cannot send messages to FLU `b`, but all other communication
|
||||||
|
(including messages from `b -> a`) works correctly.
|
||||||
|
* Run the algorithm, wait for everyone to settle on rough consensus.
|
||||||
|
* Set the partition definition to wildly random again.
|
||||||
|
* Run the algorithm for a while so that it has witnessed the partition
|
||||||
|
instability for a long time.
|
||||||
|
* Set the partitions definition to a fixed `[{a,c}]`.
|
||||||
|
* Run the algorithm, wait for everyone to settle on rough consensus.
|
||||||
|
* Set the partitions definition to a fixed `[]`, i.e., there are no
|
||||||
|
network partitions at all.
|
||||||
|
* Run the algorithm, wait for everyone to settle on a **unanimous value**
|
||||||
|
of some ordering of all four FLUs.
|
||||||
|
|
2
prototype/chain-manager/docs/README.md
Normal file
2
prototype/chain-manager/docs/README.md
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
|
||||||
|
Please see the `doc` directory at the top of the Machi repo.
|
|
@ -1,191 +0,0 @@
|
||||||
## CORFU papers
|
|
||||||
|
|
||||||
I recommend the "5 pages" paper below first, to give a flavor of
|
|
||||||
what the CORFU is about. When Scott first read the CORFU paper
|
|
||||||
back in 2011 (and the Hyder paper), he thought it was insanity.
|
|
||||||
He recommends waiting before judging quite so hastily. :-)
|
|
||||||
|
|
||||||
After that, then perhaps take a step back are skim over the
|
|
||||||
Hyder paper. Hyder started before CORFU, but since CORFU, the
|
|
||||||
Hyder folks at Microsoft have rewritten Hyder to use CORFU as
|
|
||||||
the shared log underneath it. But the Hyder paper has lots of
|
|
||||||
interesting bits about how you'd go about creating a distributed
|
|
||||||
DB where the transaction log *is* the DB.
|
|
||||||
|
|
||||||
### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log"
|
|
||||||
|
|
||||||
MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN
|
|
||||||
PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI,
|
|
||||||
University of California, San Diego, TED WOBBER, Microsoft Research
|
|
||||||
Silicon Valley
|
|
||||||
|
|
||||||
Long version of introduction to CORFU (~30 pages)
|
|
||||||
http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf
|
|
||||||
|
|
||||||
### "CORFU: A Shared Log Design for Flash Clusters"
|
|
||||||
|
|
||||||
Same authors as above
|
|
||||||
|
|
||||||
Short version of introduction to CORFU paper above (~12 pages)
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf
|
|
||||||
|
|
||||||
### "From Paxos to CORFU: A Flash-Speed Shared Log"
|
|
||||||
|
|
||||||
Same authors as above
|
|
||||||
|
|
||||||
5 pages, a short summary of CORFU basics and some trial applications
|
|
||||||
that have been implemented on top of it.
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf
|
|
||||||
|
|
||||||
### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware"
|
|
||||||
|
|
||||||
Wei, Davis, Wobber, Balakrishnan, Malkhi
|
|
||||||
|
|
||||||
Summary report of implmementing the CORFU server-side in
|
|
||||||
FPGA-style hardware. (~11 pages)
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf
|
|
||||||
|
|
||||||
### "Tango: Distributed Data Structures over a Shared Log"
|
|
||||||
|
|
||||||
Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck
|
|
||||||
|
|
||||||
Describes a framework for developing data structures that reside
|
|
||||||
persistently within a CORFU log: the log *is* the database/data
|
|
||||||
structure store.
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/Tango.pdf
|
|
||||||
|
|
||||||
### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service"
|
|
||||||
|
|
||||||
Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber
|
|
||||||
|
|
||||||
The ZooKeeper inter-server communication is replaced with CORFU.
|
|
||||||
Faster, fewer lines of code than ZK, and more features than the
|
|
||||||
original ZK code base.
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf
|
|
||||||
|
|
||||||
### "Hyder – A Transactional Record Manager for Shared Flash"
|
|
||||||
|
|
||||||
Bernstein, Reid, Das
|
|
||||||
|
|
||||||
Describes a distributed log-based DB system where the txn log is
|
|
||||||
treated quite oddly: a "txn intent" record is written to a
|
|
||||||
shared common log All participants read the shared log in
|
|
||||||
parallel and make commit/abort decisions in parallel, based on
|
|
||||||
what conflicts (or not) that they see in the log. Scott's first
|
|
||||||
reading was "No way, wacky" ... and has since changed his mind.
|
|
||||||
|
|
||||||
http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf
|
|
||||||
pages 9-20
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Fiddling with PULSE
|
|
||||||
|
|
||||||
Do the following:
|
|
||||||
|
|
||||||
make clean
|
|
||||||
make
|
|
||||||
make pulse
|
|
||||||
|
|
||||||
... then watch the dots go across the screen for 60 seconds. If you
|
|
||||||
wish, you can press `Control-c` to interrupt the test. We're really
|
|
||||||
interested in the build artifacts.
|
|
||||||
|
|
||||||
erl -pz .eunit deps/*/ebin
|
|
||||||
eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
|
|
||||||
|
|
||||||
This will run the PULSE test for 5 seconds. Feel free to adjust for
|
|
||||||
as many seconds as you wish.
|
|
||||||
|
|
||||||
Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
|
|
||||||
|
|
||||||
Eshell V5.10.3 (abort with ^G)
|
|
||||||
1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
|
|
||||||
Starting Quviq QuickCheck version 1.30.4
|
|
||||||
(compiled at {{2014,2,7},{9,19,50}})
|
|
||||||
Licence for Basho reserved until {{2014,2,17},{1,41,39}}
|
|
||||||
......................................................................................
|
|
||||||
OK, passed 86 tests
|
|
||||||
schedule: Count: 86 Min: 2 Max: 1974 Avg: 3.2e+2 Total: 27260
|
|
||||||
true
|
|
||||||
2>
|
|
||||||
|
|
||||||
REPL interactive work can be done via:
|
|
||||||
|
|
||||||
1. Edit code, e.g. `corfurl_pulse.erl`.
|
|
||||||
2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP`
|
|
||||||
to compile.
|
|
||||||
3. Reload any recompiled modules, e.g. `l(corfurl_pulse).`
|
|
||||||
4. Resume QuickCheck activities.
|
|
||||||
|
|
||||||
## Seeing an PULSE scheduler interleaving failure in action
|
|
||||||
|
|
||||||
1. Edit `corfurl_pulse:check_trace()` to uncomment the
|
|
||||||
use of `conjunction()` that mentions `bogus_order_check_do_not_use_me`
|
|
||||||
and comment out the real `conjunction()` call below it.
|
|
||||||
2. Recompile & reload.
|
|
||||||
3. Check.
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
|
|
||||||
.........Failed! After 9 tests.
|
|
||||||
|
|
||||||
Sweet! The first tuple below are the first `?FORALL()` values,
|
|
||||||
and the 2nd is the list of commands,
|
|
||||||
`{SequentialCommands, ListofParallelCommandLists}`. The 3rd is the
|
|
||||||
seed used to perturb the PULSE scheduler.
|
|
||||||
|
|
||||||
In this case, `SequentialCommands` has two calls (to `setup()` then
|
|
||||||
`append()`) and there are two parallel procs: one makes 1 call
|
|
||||||
call to `append()` and the other makes 2 calls to `append()`.
|
|
||||||
|
|
||||||
{2,2,9}
|
|
||||||
{{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}],
|
|
||||||
[[{set,{var,3},
|
|
||||||
{call,corfurl_pulse,append,
|
|
||||||
[{var,1},<<231,149,226,203,10,105,54,223,147>>]}}],
|
|
||||||
[{set,{var,2},
|
|
||||||
{call,corfurl_pulse,append,
|
|
||||||
[{var,1},<<7,206,146,75,249,13,154,238,110>>]}},
|
|
||||||
{set,{var,4},
|
|
||||||
{call,corfurl_pulse,append,
|
|
||||||
[{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]},
|
|
||||||
{27492,46961,4884}}
|
|
||||||
|
|
||||||
Here are our results:
|
|
||||||
|
|
||||||
simple_result: passed
|
|
||||||
errors: passed
|
|
||||||
events: failed
|
|
||||||
identity: passed
|
|
||||||
bogus_order_check_do_not_use_me: failed
|
|
||||||
[{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
|
|
||||||
|
|
||||||
Our (bogus!) order expectation was violated. Shrinking!
|
|
||||||
|
|
||||||
simple_result: passed
|
|
||||||
errors: passed
|
|
||||||
events: failed
|
|
||||||
identity: passed
|
|
||||||
bogus_order_check_do_not_use_me: failed
|
|
||||||
[{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
|
|
||||||
|
|
||||||
Shrinking was able to remove two `append()` calls and to shrink the
|
|
||||||
size of the pages down from 9 bytes down to 1 byte.
|
|
||||||
|
|
||||||
Shrinking........(8 times)
|
|
||||||
{1,1,1}
|
|
||||||
{{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}],
|
|
||||||
[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],
|
|
||||||
[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]},
|
|
||||||
{27492,46961,4884}}
|
|
||||||
events: failed
|
|
||||||
bogus_order_check_do_not_use_me: failed
|
|
||||||
[{ok,2},{ok,1}] /= [{ok,1},{ok,2}]
|
|
||||||
false
|
|
|
@ -1,35 +0,0 @@
|
||||||
msc {
|
|
||||||
client1, FLU1, FLU2, client2, client3;
|
|
||||||
|
|
||||||
client1 box client3 [label="Epoch #1: chain = FLU1 -> FLU2"];
|
|
||||||
client1 -> FLU1 [label="{write,epoch1,<<Page YYY>>}"];
|
|
||||||
client1 <- FLU1 [label="ok"];
|
|
||||||
client1 box client1 [label="Client crash", textcolour="red"];
|
|
||||||
|
|
||||||
FLU1 box FLU1 [label="FLU crash", textcolour="red"];
|
|
||||||
|
|
||||||
client1 box client3 [label="Epoch #2: chain = FLU2"];
|
|
||||||
|
|
||||||
client2 -> FLU2 [label="{write,epoch2,<<Page ZZZ>>}"];
|
|
||||||
client2 <- FLU2 [label="ok"];
|
|
||||||
|
|
||||||
client3 box client3 [label="Read repair starts", textbgcolour="aqua"];
|
|
||||||
|
|
||||||
client3 -> FLU2 [label="{read,epoch2}"];
|
|
||||||
client3 <- FLU2 [label="{ok,<<Page ZZZ>>}"];
|
|
||||||
client3 -> FLU1 [label="{write,epoch2,<<Page ZZZ>>}"];
|
|
||||||
FLU1 box FLU1 [label="What do we do here? Our current value is <<Page YYY>>.", textcolour="red"] ;
|
|
||||||
FLU1 box FLU1 [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ;
|
|
||||||
FLU1 box FLU1 [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ;
|
|
||||||
FLU1 -> client3 [label="I'm sorry, Dave, I cannot do that."];
|
|
||||||
|
|
||||||
FLU1 box FLU1 [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ;
|
|
||||||
|
|
||||||
client3 -> FLU1 [label="{write,epoch2,<<Page ZZZ>>,repair,witnesses=[FLU2]}", textbgcolour="silver"];
|
|
||||||
FLU1 box FLU1 [label="Start an async process to ask the witness list to corroborate this repair."];
|
|
||||||
FLU1 -> FLU2 [label="{read,epoch2}", textbgcolour="aqua"];
|
|
||||||
FLU1 <- FLU2 [label="{ok,<<Page ZZ>>}", textbgcolour="aqua"];
|
|
||||||
FLU1 box FLU1 [label="Overwrite local storage with repair page.", textbgcolour="silver"];
|
|
||||||
client3 <- FLU1 [label="Async proc replies: ok", textbgcolour="silver"];
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
|
|
||||||
## read-repair-race.1.
|
|
||||||
|
|
||||||
First attempt at using "mscgen" to make some Message Sequence
|
|
||||||
Chart (MSC) for a race found at commit 087c2605ab.
|
|
||||||
|
|
||||||
|
|
||||||
## read-repair-race.2.
|
|
||||||
|
|
||||||
Second attempt. This is almost exactly the trace that is
|
|
||||||
generated by this failing test case at commit 087c2605ab:
|
|
||||||
|
|
||||||
C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]].
|
|
||||||
eqc:check(corfurl_pulse:prop_pulse(), C2).
|
|
||||||
|
|
||||||
## read-repair-race.2b.*
|
|
||||||
|
|
||||||
Same basic condition as read-repair-race.2, but edited
|
|
||||||
substantially to make it clearer what is happening.
|
|
||||||
Also for commit 087c2605ab.
|
|
||||||
|
|
||||||
I believe that I have a fix for the silver-colored
|
|
||||||
`error-overwritten` ... and it was indeed added to the code soon
|
|
||||||
afterward, but it turns out that it doesn't solve the entire problem
|
|
||||||
of "two clients try to write the exact same data at the same time to
|
|
||||||
the same LPN".
|
|
||||||
|
|
||||||
|
|
||||||
## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN"
|
|
||||||
|
|
||||||
This situation is something that CORFU cannot protect against, IMO.
|
|
||||||
|
|
||||||
I have been struggling for a while, to try to find a way for CORFU
|
|
||||||
clients to know *always* when there is a conflict with another
|
|
||||||
writer. It usually works: the basic nature of write-once registers is
|
|
||||||
very powerful. However, in the case where two clients are trying to
|
|
||||||
write the same page data to the same LPN, it looks impossible to
|
|
||||||
resolve.
|
|
||||||
|
|
||||||
How do you tell the difference between:
|
|
||||||
|
|
||||||
1. A race between a client A writing page P at address LPN and
|
|
||||||
read-repair fixing P. P *is* A's data and no other's, so this race
|
|
||||||
doesn't confuse anyone.
|
|
||||||
|
|
||||||
1. A race between a client A writing page P at address LPN and client
|
|
||||||
B writing the exact same page data P at the same LPN.
|
|
||||||
A's page P = B's page P, but clients A & B don't know that.
|
|
||||||
|
|
||||||
If CORFU tells both A & B that they were successful, A & B assume
|
|
||||||
that the CORFU log has two new pages appended to it, but in truth
|
|
||||||
only one new page was appended.
|
|
||||||
|
|
||||||
If we try to solve this by always avoiding the same LPN address
|
|
||||||
conflict, we are deluding ourselves. If we assume that the sequencer
|
|
||||||
is 100% correct in that it never assigns the same LPN twice, and if we
|
|
||||||
assume that a client must never write a block without an assignment
|
|
||||||
from the sequencer, then the problem is solved. But the problem has a
|
|
||||||
_heavy_ price: the log is only available when the sequencer is
|
|
||||||
available, and only when never more than one sequencer running at a
|
|
||||||
time.
|
|
||||||
|
|
||||||
The CORFU base system promises correct operation, even if:
|
|
||||||
|
|
||||||
* Zero sequencers are running, and clients might choose the same LPN
|
|
||||||
to write to.
|
|
||||||
* Two more more sequencers are running, and different sequencers
|
|
||||||
assign the same LPN to two different clients.
|
|
||||||
|
|
||||||
But CORFU's "correct" behavior does not include detecting the same
|
|
||||||
page at the same LPN. The papers don't specifically say it, alas.
|
|
||||||
But IMO it's impossible to guarantee, so all docs ought to explicitly
|
|
||||||
say that it's impossible and that clients must not assume it.
|
|
||||||
|
|
||||||
See also
|
|
||||||
* two-clients-race.1.png
|
|
||||||
|
|
||||||
## A scenario of chain repair & write-once registers
|
|
||||||
|
|
||||||
See:
|
|
||||||
* 2014-02-27.chain-repair-write-twice.png
|
|
||||||
|
|
||||||
... for a scenario where write-once registers that are truly only
|
|
||||||
write-once-ever-for-the-rest-of-the-future are "inconvenient" when it
|
|
||||||
comes to chain repair. Client 3 is attempting to do chain repair ops,
|
|
||||||
bringing FLU1 back into sync with FLU2.
|
|
||||||
|
|
||||||
The diagram proposes one possible idea for making overwriting a
|
|
||||||
read-once register a bit safer: ask another node in the chain to
|
|
||||||
verify that the page you've been asked to repair is exactly the same
|
|
||||||
as that other FLU's page.
|
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
msc {
|
|
||||||
"<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"];
|
|
||||||
|
|
||||||
"<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ;
|
|
||||||
"<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"];
|
|
||||||
"<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ;
|
|
||||||
"<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"];
|
|
||||||
"<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
|
|
||||||
"<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ;
|
|
||||||
"<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ;
|
|
||||||
"<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ;
|
|
||||||
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ;
|
|
||||||
"<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ;
|
|
||||||
}
|
|
|
@ -1,60 +0,0 @@
|
||||||
msc {
|
|
||||||
"<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
|
|
||||||
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
|
|
||||||
"<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ;
|
|
||||||
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
|
|
||||||
# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
|
|
||||||
# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
|
|
||||||
"<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ;
|
|
||||||
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
|
|
||||||
}
|
|
|
@ -1,57 +0,0 @@
|
||||||
msc {
|
|
||||||
"<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
|
|
||||||
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
|
|
||||||
"<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
|
|
||||||
# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
|
|
||||||
# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
|
|
||||||
"<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ;
|
|
||||||
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
|
|
||||||
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
|
|
||||||
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
|
|
||||||
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
|
|
||||||
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
|
|
||||||
"<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
|
|
||||||
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
|
|
||||||
"<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
msc {
|
|
||||||
client1, FLU1, FLU2, client2, client3;
|
|
||||||
|
|
||||||
client1 -> FLU1 [label="{write,epoch1,<<Not unique page>>}"];
|
|
||||||
client1 <- FLU1 [label="ok"];
|
|
||||||
|
|
||||||
client3 -> FLU2 [label="{seal,epoch1}"];
|
|
||||||
client3 <- FLU2 [label="{ok,...}"];
|
|
||||||
client3 -> FLU1 [label="{seal,epoch1}"];
|
|
||||||
client3 <- FLU1 [label="{ok,...}"];
|
|
||||||
|
|
||||||
client2 -> FLU1 [label="{write,epoch1,<<Not unique page>>}"];
|
|
||||||
client2 <- FLU1 [label="error_epoch"];
|
|
||||||
client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"];
|
|
||||||
client2 -> FLU1 [label="{write,epoch2,<<Not unique page>>}"];
|
|
||||||
client2 <- FLU1 [label="error_overwritten"];
|
|
||||||
|
|
||||||
client1 -> FLU2 [label="{write,epoch1,<<Not unique page>>}"];
|
|
||||||
client1 <- FLU2 [label="error_epoch"];
|
|
||||||
client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"];
|
|
||||||
|
|
||||||
client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ;
|
|
||||||
client3 -> FLU1 [label="{read,epoch2}"];
|
|
||||||
client3 <- FLU1 [label="{ok,<<Not unique page>>}"];
|
|
||||||
client3 -> FLU2 [label="{write,epoch2,<<Not unique page>>}"];
|
|
||||||
client3 <- FLU2 [label="ok"];
|
|
||||||
client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ;
|
|
||||||
client3 abox client3 [ label = "We saw <<Not unique page>>", textbgcolour="silver"] ;
|
|
||||||
|
|
||||||
client1 -> FLU2 [label="{write,epoch2,<<Not unique page>>}"];
|
|
||||||
client1 <- FLU2 [label="error_overwritten"];
|
|
||||||
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,109 +0,0 @@
|
||||||
digraph {
|
|
||||||
compound=true
|
|
||||||
label="Machi chain management flowchart (sample)";
|
|
||||||
|
|
||||||
node[shape="box", style="rounded"]
|
|
||||||
start;
|
|
||||||
node[shape="box", style="rounded", label="stop1"]
|
|
||||||
stop1;
|
|
||||||
node[shape="box", style="rounded", label="stop2"]
|
|
||||||
stop2;
|
|
||||||
node[shape="box", style="rounded"]
|
|
||||||
crash;
|
|
||||||
|
|
||||||
subgraph clustera {
|
|
||||||
node[shape="parallelogram", style="", label="Set retry counter = 0"]
|
|
||||||
a05_retry;
|
|
||||||
node[shape="parallelogram", style="", label="Create P_newprop @ epoch E+1\nbased on P_current @ epoch E"]
|
|
||||||
a10_create;
|
|
||||||
node[shape="parallelogram", style="", label="Get latest public projection, P_latest"]
|
|
||||||
a20_get;
|
|
||||||
node[shape="diamond", style="", label="Epoch(P_latest) > Epoch(P_current)\norelse\nP_latest was not unanimous"]
|
|
||||||
a30_epoch;
|
|
||||||
node[shape="diamond", style="", label="Epoch(P_latest) == Epoch(P_current)"]
|
|
||||||
a40_epochequal;
|
|
||||||
node[shape="diamond", style="", label="P_latest == P_current"]
|
|
||||||
a50_equal;
|
|
||||||
}
|
|
||||||
|
|
||||||
subgraph clustera100 {
|
|
||||||
node[shape="diamond", style="", label="Write P_newprop to everyone"]
|
|
||||||
a100_write;
|
|
||||||
}
|
|
||||||
|
|
||||||
subgraph clusterb {
|
|
||||||
node[shape="diamond", style="", label="P_latest was unanimous?"]
|
|
||||||
b10_unanimous;
|
|
||||||
node[shape="diamond", style="", label="Retry counter too big?"]
|
|
||||||
b20_counter;
|
|
||||||
node[shape="diamond", style="", label="Rank(P_latest) >= Rank(P_newprop)"]
|
|
||||||
b30_rank;
|
|
||||||
node[shape="diamond", style="", label="P_latest.upi == P_newprop.upi\nand also\nPlatest.repairing == P_newprop.repairing"]
|
|
||||||
b40_condc;
|
|
||||||
node[shape="square", style="", label="P_latest author is\ntoo slow, let's try!"]
|
|
||||||
b45_lets;
|
|
||||||
node[shape="parallelogram", style="", label="P_newprop is better than P_latest.\nSet P_newprop.epoch = P_latest.epoch + 1."]
|
|
||||||
b50_better;
|
|
||||||
}
|
|
||||||
|
|
||||||
subgraph clusterc {
|
|
||||||
node[shape="diamond", style="", label="Is Move(P_current, P_latest) ok?"]
|
|
||||||
c10_move;
|
|
||||||
node[shape="parallelogram", style="", label="Tell Author(P_latest) to rewrite\nwith a bigger epoch number"]
|
|
||||||
c20_tell;
|
|
||||||
}
|
|
||||||
|
|
||||||
subgraph clusterd {
|
|
||||||
node[shape="diamond", style="", label="Use P_latest as the\nnew P_current"]
|
|
||||||
d10_use;
|
|
||||||
}
|
|
||||||
|
|
||||||
start -> a05_retry;
|
|
||||||
|
|
||||||
a05_retry -> a10_create;
|
|
||||||
a10_create -> a20_get;
|
|
||||||
a20_get -> a30_epoch;
|
|
||||||
a30_epoch -> a40_epochequal[label="false"];
|
|
||||||
a30_epoch -> b10_unanimous[label="true"];
|
|
||||||
a40_epochequal -> a50_equal[label="true"];
|
|
||||||
a40_epochequal -> crash[label="falseXX"];
|
|
||||||
a50_equal -> stop1[label="true"];
|
|
||||||
a50_equal -> b20_counter[label="false"];
|
|
||||||
|
|
||||||
a100_write -> a10_create;
|
|
||||||
|
|
||||||
b10_unanimous -> c10_move[label="yes"];
|
|
||||||
b10_unanimous -> b20_counter[label="no"];
|
|
||||||
b20_counter -> b45_lets[label="true"];
|
|
||||||
b20_counter -> b30_rank[label="false"];
|
|
||||||
b30_rank -> b40_condc[label="false"];
|
|
||||||
b30_rank -> c20_tell[label="true"];
|
|
||||||
b40_condc -> b50_better[label="false"];
|
|
||||||
b40_condc -> c20_tell[label="true"];
|
|
||||||
b45_lets -> b50_better;
|
|
||||||
b50_better -> a100_write;
|
|
||||||
|
|
||||||
c10_move -> d10_use[label="yes"];
|
|
||||||
c10_move -> a100_write[label="no"];
|
|
||||||
c20_tell -> b50_better;
|
|
||||||
|
|
||||||
d10_use -> stop2;
|
|
||||||
|
|
||||||
{rank=same; clustera clusterb clusterc clusterd};
|
|
||||||
|
|
||||||
// {rank=same; a10_create b10_unanimous c10_move d10_use stop2};
|
|
||||||
// {rank=same; a20_get b20_counter c20_tell};
|
|
||||||
// {rank=same; a30_epoch b40_condc};
|
|
||||||
// {rank=same; a40_epochequal b40_condc crash};
|
|
||||||
// {rank=same; stop1 a50_equal b50_better};
|
|
||||||
|
|
||||||
// if_valid;
|
|
||||||
//
|
|
||||||
// start -> input;
|
|
||||||
// input -> if_valid;
|
|
||||||
// if_valid -> message[label="no"];
|
|
||||||
// if_valid -> end[label="yes"];
|
|
||||||
// message -> input;
|
|
||||||
|
|
||||||
// {rank=same; message input}
|
|
||||||
}
|
|
6
prototype/chain-manager/rebar.config
Normal file
6
prototype/chain-manager/rebar.config
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
|
||||||
|
{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
|
||||||
|
{deps, [
|
||||||
|
{lager, "2.0.1", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}}
|
||||||
|
]}.
|
||||||
|
|
9
prototype/chain-manager/src/foo.app.src
Normal file
9
prototype/chain-manager/src/foo.app.src
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
{application, foo, [
|
||||||
|
{description, "Prototype of Machi chain manager."},
|
||||||
|
{vsn, "0.0.0"},
|
||||||
|
{applications, [kernel, stdlib, lager]},
|
||||||
|
{mod,{foo_unfinished_app,[]}},
|
||||||
|
{registered, []},
|
||||||
|
{env, [
|
||||||
|
]}
|
||||||
|
]}.
|
|
@ -29,6 +29,7 @@
|
||||||
-export([]).
|
-export([]).
|
||||||
|
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
-ifndef(PULSE).
|
||||||
|
|
||||||
-ifdef(EQC).
|
-ifdef(EQC).
|
||||||
-include_lib("eqc/include/eqc.hrl").
|
-include_lib("eqc/include/eqc.hrl").
|
||||||
|
@ -458,4 +459,5 @@ combinations(L) ->
|
||||||
perms([]) -> [[]];
|
perms([]) -> [[]];
|
||||||
perms(L) -> [[H|T] || H <- L, T <- perms(L--[H])].
|
perms(L) -> [[H|T] || H <- L, T <- perms(L--[H])].
|
||||||
|
|
||||||
|
-endif. % ! PULSE
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
|
-define(D(X), io:format(user, "~s ~p\n", [??X, X])).
|
||||||
-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
|
-define(Dw(X), io:format(user, "~s ~w\n", [??X, X])).
|
||||||
|
|
||||||
-export([]).
|
-export([unanimous_report/1, unanimous_report/2]).
|
||||||
|
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
|
||||||
|
@ -42,6 +42,8 @@
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
-compile(export_all).
|
-compile(export_all).
|
||||||
|
|
||||||
|
-ifndef(PULSE).
|
||||||
|
|
||||||
smoke0_test() ->
|
smoke0_test() ->
|
||||||
machi_partition_simulator:start_link({1,2,3}, 50, 50),
|
machi_partition_simulator:start_link({1,2,3}, 50, 50),
|
||||||
{ok, FLUa} = machi_flu0:start_link(a),
|
{ok, FLUa} = machi_flu0:start_link(a),
|
||||||
|
@ -52,10 +54,12 @@ smoke0_test() ->
|
||||||
%% If/when calculate_projection_internal_old() disappears, then
|
%% If/when calculate_projection_internal_old() disappears, then
|
||||||
%% get rid of the comprehension below ... start/ping/stop is
|
%% get rid of the comprehension below ... start/ping/stop is
|
||||||
%% good enough for smoke0.
|
%% good enough for smoke0.
|
||||||
|
io:format(user, "\n\nBegin 5 lines of verbose stuff, check manually for differences\n", []),
|
||||||
[begin
|
[begin
|
||||||
Proj = ?MGR:calculate_projection_internal_old(M0),
|
Proj = ?MGR:calculate_projection_internal_old(M0),
|
||||||
io:format(user, "~w\n", [?MGR:make_projection_summary(Proj)])
|
io:format(user, "~w\n", [?MGR:make_projection_summary(Proj)])
|
||||||
end || _ <- lists:seq(1,5)]
|
end || _ <- lists:seq(1,5)],
|
||||||
|
io:format(user, "\n", [])
|
||||||
after
|
after
|
||||||
ok = ?MGR:stop(M0),
|
ok = ?MGR:stop(M0),
|
||||||
ok = machi_flu0:stop(FLUa),
|
ok = machi_flu0:stop(FLUa),
|
||||||
|
@ -105,9 +109,9 @@ nonunanimous_setup_and_fix_test() ->
|
||||||
ok = machi_flu0:proj_write(FLUa, P1Epoch, public, P1a),
|
ok = machi_flu0:proj_write(FLUa, P1Epoch, public, P1a),
|
||||||
ok = machi_flu0:proj_write(FLUb, P1Epoch, public, P1b),
|
ok = machi_flu0:proj_write(FLUb, P1Epoch, public, P1b),
|
||||||
|
|
||||||
?D(x),
|
%% ?D(x),
|
||||||
{not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
|
{not_unanimous,_,_}=_XX = ?MGR:test_read_latest_public_projection(Ma, false),
|
||||||
?Dw(_XX),
|
%% ?Dw(_XX),
|
||||||
{not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
|
{not_unanimous,_,_}=_YY = ?MGR:test_read_latest_public_projection(Ma, true),
|
||||||
%% The read repair here doesn't automatically trigger the creation of
|
%% The read repair here doesn't automatically trigger the creation of
|
||||||
%% a new projection (to try to create a unanimous projection). So
|
%% a new projection (to try to create a unanimous projection). So
|
||||||
|
@ -138,101 +142,18 @@ nonunanimous_setup_and_fix_test() ->
|
||||||
ok = machi_partition_simulator:stop()
|
ok = machi_partition_simulator:stop()
|
||||||
end.
|
end.
|
||||||
|
|
||||||
unanimous_report(Namez) ->
|
%% This test takes a long time and spits out a huge amount of logging
|
||||||
UniquePrivateEs =
|
%% cruft to the console. Comment out the EUnit fixture and run manually.
|
||||||
lists:usort(lists:flatten(
|
|
||||||
[machi_flu0:proj_list_all(FLU, private) ||
|
|
||||||
{_FLUName, FLU} <- Namez])),
|
|
||||||
[unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
|
|
||||||
|
|
||||||
unanimous_report(Epoch, Namez) ->
|
%% convergence_demo_test_() ->
|
||||||
Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
|
%% {timeout, 300, fun() -> convergence_demo1() end}.
|
||||||
{ok, T} -> T;
|
|
||||||
_Else -> not_in_this_epoch
|
|
||||||
end} || {FLUName, FLU} <- Namez],
|
|
||||||
UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
|
|
||||||
Proj#projection.epoch_csum} ||
|
|
||||||
{_FLUname, Proj} <- Projs,
|
|
||||||
is_record(Proj, projection)],
|
|
||||||
UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
|
|
||||||
Res =
|
|
||||||
[begin
|
|
||||||
case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
|
|
||||||
U == UPI]) of
|
|
||||||
[_1CSum] ->
|
|
||||||
%% Yay, there's only 1 checksum. Let's check
|
|
||||||
%% that all FLUs are in agreement.
|
|
||||||
{UPI, Repairing, _CSum} =
|
|
||||||
lists:keyfind(UPI, 1, UPI_R_Sums),
|
|
||||||
%% TODO: make certain that this subtlety doesn't get
|
|
||||||
%% last in later implementations.
|
|
||||||
|
|
||||||
%% So, this is a bit of a tricky thing. If we're at
|
convergence_demo1() ->
|
||||||
%% upi=[c] and repairing=[a,b], then the transition
|
|
||||||
%% (eventually!) to upi=[c,a] does not currently depend
|
|
||||||
%% on b being an active participant in the repair.
|
|
||||||
%%
|
|
||||||
%% Yes, b's state is very important for making certain
|
|
||||||
%% that all repair operations succeed both to a & b.
|
|
||||||
%% However, in this simulation, we only consider that
|
|
||||||
%% the head(Repairing) is sane. Therefore, we use only
|
|
||||||
%% the "HeadOfRepairing" in our considerations here.
|
|
||||||
HeadOfRepairing = case Repairing of
|
|
||||||
[H_Rep|_] ->
|
|
||||||
[H_Rep];
|
|
||||||
_ ->
|
|
||||||
[]
|
|
||||||
end,
|
|
||||||
Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
|
|
||||||
P when is_record(P, projection) ->
|
|
||||||
P#projection.epoch_csum;
|
|
||||||
Else ->
|
|
||||||
Else
|
|
||||||
end} || FLU <- UPI ++ HeadOfRepairing],
|
|
||||||
case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
|
|
||||||
[_] ->
|
|
||||||
{agreed_membership, {UPI, Repairing}};
|
|
||||||
Else2 ->
|
|
||||||
{not_agreed, {UPI, Repairing}, Else2}
|
|
||||||
end;
|
|
||||||
_Else ->
|
|
||||||
{UPI, not_unique, Epoch, _Else}
|
|
||||||
end
|
|
||||||
end || UPI <- UniqueUPIs],
|
|
||||||
AgreedResUPI_Rs = [UPI++Repairing ||
|
|
||||||
{agreed_membership, {UPI, Repairing}} <- Res],
|
|
||||||
Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
|
|
||||||
lists:sort(lists:flatten(AgreedResUPI_Rs)) of
|
|
||||||
true ->
|
|
||||||
ok_disjoint;
|
|
||||||
false ->
|
|
||||||
bummer_NOT_DISJOINT
|
|
||||||
end,
|
|
||||||
{Epoch, {Tag, Res}}.
|
|
||||||
|
|
||||||
all_reports_are_disjoint(Report) ->
|
|
||||||
[] == [X || {_Epoch, Tuple}=X <- Report,
|
|
||||||
element(1, Tuple) /= ok_disjoint].
|
|
||||||
|
|
||||||
extract_chains_relative_to_flu(FLU, Report) ->
|
|
||||||
{FLU, [{Epoch, UPI, Repairing} ||
|
|
||||||
{Epoch, {ok_disjoint, Es}} <- Report,
|
|
||||||
{agreed_membership, {UPI, Repairing}} <- Es,
|
|
||||||
lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
|
|
||||||
|
|
||||||
chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
|
|
||||||
?MGR:make_projection(Epoch, MyName, All_list,
|
|
||||||
All_list -- (UPI_list ++ Repairing_list),
|
|
||||||
UPI_list, Repairing_list, []).
|
|
||||||
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
convergence_demo_test_() ->
|
|
||||||
{timeout, 300, fun() -> convergence_demo_test(x) end}.
|
|
||||||
|
|
||||||
convergence_demo_test(_) ->
|
|
||||||
All_list = [a,b,c,d],
|
All_list = [a,b,c,d],
|
||||||
machi_partition_simulator:start_link({111,222,33}, 0, 100),
|
%% machi_partition_simulator:start_link({111,222,33}, 0, 100),
|
||||||
|
Seed = erlang:now(),
|
||||||
|
machi_partition_simulator:start_link(Seed, 0, 100),
|
||||||
|
io:format(user, "convergence_demo seed = ~p\n", [Seed]),
|
||||||
_ = machi_partition_simulator:get(All_list),
|
_ = machi_partition_simulator:get(All_list),
|
||||||
|
|
||||||
{ok, FLUa} = machi_flu0:start_link(a),
|
{ok, FLUa} = machi_flu0:start_link(a),
|
||||||
|
@ -357,4 +278,92 @@ convergence_demo_test(_) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-endif. % not PULSE
|
-endif. % not PULSE
|
||||||
|
|
||||||
|
unanimous_report(Namez) ->
|
||||||
|
UniquePrivateEs =
|
||||||
|
lists:usort(lists:flatten(
|
||||||
|
[machi_flu0:proj_list_all(FLU, private) ||
|
||||||
|
{_FLUName, FLU} <- Namez])),
|
||||||
|
[unanimous_report(Epoch, Namez) || Epoch <- UniquePrivateEs].
|
||||||
|
|
||||||
|
unanimous_report(Epoch, Namez) ->
|
||||||
|
Projs = [{FLUName, case machi_flu0:proj_read(FLU, Epoch, private) of
|
||||||
|
{ok, T} -> T;
|
||||||
|
_Else -> not_in_this_epoch
|
||||||
|
end} || {FLUName, FLU} <- Namez],
|
||||||
|
UPI_R_Sums = [{Proj#projection.upi, Proj#projection.repairing,
|
||||||
|
Proj#projection.epoch_csum} ||
|
||||||
|
{_FLUname, Proj} <- Projs,
|
||||||
|
is_record(Proj, projection)],
|
||||||
|
UniqueUPIs = lists:usort([UPI || {UPI, _Repairing, _CSum} <- UPI_R_Sums]),
|
||||||
|
Res =
|
||||||
|
[begin
|
||||||
|
case lists:usort([CSum || {U, _Repairing, CSum} <- UPI_R_Sums,
|
||||||
|
U == UPI]) of
|
||||||
|
[_1CSum] ->
|
||||||
|
%% Yay, there's only 1 checksum. Let's check
|
||||||
|
%% that all FLUs are in agreement.
|
||||||
|
{UPI, Repairing, _CSum} =
|
||||||
|
lists:keyfind(UPI, 1, UPI_R_Sums),
|
||||||
|
%% TODO: make certain that this subtlety doesn't get
|
||||||
|
%% last in later implementations.
|
||||||
|
|
||||||
|
%% So, this is a bit of a tricky thing. If we're at
|
||||||
|
%% upi=[c] and repairing=[a,b], then the transition
|
||||||
|
%% (eventually!) to upi=[c,a] does not currently depend
|
||||||
|
%% on b being an active participant in the repair.
|
||||||
|
%%
|
||||||
|
%% Yes, b's state is very important for making certain
|
||||||
|
%% that all repair operations succeed both to a & b.
|
||||||
|
%% However, in this simulation, we only consider that
|
||||||
|
%% the head(Repairing) is sane. Therefore, we use only
|
||||||
|
%% the "HeadOfRepairing" in our considerations here.
|
||||||
|
HeadOfRepairing = case Repairing of
|
||||||
|
[H_Rep|_] ->
|
||||||
|
[H_Rep];
|
||||||
|
_ ->
|
||||||
|
[]
|
||||||
|
end,
|
||||||
|
Tmp = [{FLU, case proplists:get_value(FLU, Projs) of
|
||||||
|
P when is_record(P, projection) ->
|
||||||
|
P#projection.epoch_csum;
|
||||||
|
Else ->
|
||||||
|
Else
|
||||||
|
end} || FLU <- UPI ++ HeadOfRepairing],
|
||||||
|
case lists:usort([CSum || {_FLU, CSum} <- Tmp]) of
|
||||||
|
[_] ->
|
||||||
|
{agreed_membership, {UPI, Repairing}};
|
||||||
|
Else2 ->
|
||||||
|
{not_agreed, {UPI, Repairing}, Else2}
|
||||||
|
end;
|
||||||
|
_Else ->
|
||||||
|
{UPI, not_unique, Epoch, _Else}
|
||||||
|
end
|
||||||
|
end || UPI <- UniqueUPIs],
|
||||||
|
AgreedResUPI_Rs = [UPI++Repairing ||
|
||||||
|
{agreed_membership, {UPI, Repairing}} <- Res],
|
||||||
|
Tag = case lists:usort(lists:flatten(AgreedResUPI_Rs)) ==
|
||||||
|
lists:sort(lists:flatten(AgreedResUPI_Rs)) of
|
||||||
|
true ->
|
||||||
|
ok_disjoint;
|
||||||
|
false ->
|
||||||
|
bummer_NOT_DISJOINT
|
||||||
|
end,
|
||||||
|
{Epoch, {Tag, Res}}.
|
||||||
|
|
||||||
|
all_reports_are_disjoint(Report) ->
|
||||||
|
[] == [X || {_Epoch, Tuple}=X <- Report,
|
||||||
|
element(1, Tuple) /= ok_disjoint].
|
||||||
|
|
||||||
|
extract_chains_relative_to_flu(FLU, Report) ->
|
||||||
|
{FLU, [{Epoch, UPI, Repairing} ||
|
||||||
|
{Epoch, {ok_disjoint, Es}} <- Report,
|
||||||
|
{agreed_membership, {UPI, Repairing}} <- Es,
|
||||||
|
lists:member(FLU, UPI) orelse lists:member(FLU, Repairing)]}.
|
||||||
|
|
||||||
|
chain_to_projection(MyName, Epoch, UPI_list, Repairing_list, All_list) ->
|
||||||
|
?MGR:make_projection(Epoch, MyName, All_list,
|
||||||
|
All_list -- (UPI_list ++ Repairing_list),
|
||||||
|
UPI_list, Repairing_list, []).
|
||||||
|
|
||||||
-endif. % TEST
|
-endif. % TEST
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
-ifndef(PULSE).
|
||||||
|
|
||||||
repair_status_test() ->
|
repair_status_test() ->
|
||||||
{ok, F} = machi_flu0:start_link(one),
|
{ok, F} = machi_flu0:start_link(one),
|
||||||
|
@ -41,7 +42,6 @@ repair_status_test() ->
|
||||||
ok = machi_flu0:stop(F)
|
ok = machi_flu0:stop(F)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
concuerror1_test() ->
|
concuerror1_test() ->
|
||||||
ok.
|
ok.
|
||||||
|
@ -375,5 +375,5 @@ event_get_all() ->
|
||||||
Tab = ?MODULE,
|
Tab = ?MODULE,
|
||||||
ets:tab2list(Tab).
|
ets:tab2list(Tab).
|
||||||
|
|
||||||
-endif.
|
-endif. % ! PULSE
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
-export([]).
|
-export([]).
|
||||||
|
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
-ifndef(PULSE).
|
||||||
|
|
||||||
-ifdef(EQC).
|
-ifdef(EQC).
|
||||||
-include_lib("eqc/include/eqc.hrl").
|
-include_lib("eqc/include/eqc.hrl").
|
||||||
|
@ -146,5 +147,6 @@ make_canonical_form2([{File, Start, End, Members}|T]) ->
|
||||||
Member <- Members] ++
|
Member <- Members] ++
|
||||||
make_canonical_form2(T).
|
make_canonical_form2(T).
|
||||||
|
|
||||||
|
-endif. % ! PULSE
|
||||||
-endif. % TEST
|
-endif. % TEST
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue