Updated tutorial + search returns sigma now too.

This commit is contained in:
Michael Whittaker 2021-02-05 17:18:41 -08:00
parent ea4523cc17
commit 30c54f8886
6 changed files with 440 additions and 166 deletions

492
README.md
View file

@ -24,12 +24,12 @@ Next, we specify the nodes in our quorum system. Our nodes can be strings,
integers, IP addresses, anything! integers, IP addresses, anything!
```python ```python
a = Node('a') >>> a = Node('a')
b = Node('b') >>> b = Node('b')
c = Node('c') >>> c = Node('c')
d = Node('d') >>> d = Node('d')
e = Node('e') >>> e = Node('e')
f = Node('f') >>> f = Node('f')
``` ```
Now, we construct a two by three grid of nodes. Every row is read quorum, and Now, we construct a two by three grid of nodes. Every row is read quorum, and
@ -38,36 +38,45 @@ quorum system, we only have to specify the set of read quorums. The library
figures out the optimal set of write quorums automatically. figures out the optimal set of write quorums automatically.
```python ```python
grid = QuorumSystem(reads=a*b*c + d*e*f) >>> grid = QuorumSystem(reads=a*b*c + d*e*f)
``` ```
This next code snippet prints out the read quorums `{'a', 'b', 'c'}` and `{'d', This next code snippet prints out the read quorums.
'e', 'f'}`.
```python ```python
for r in grid.read_quorums(): >>> for r in grid.read_quorums():
print(r) ... print(r)
{'a', 'b', 'c'}
{'d', 'e', 'f'}
``` ```
And this next code snippet prints out the write quorums `{'a', 'd'}`, `{'a', And this next code snippet prints out the write quorums.
'e'}`, `{'b', 'f'}`, `{'b', 'd'}`, ...
```python ```python
for w in grid.write_quorums(): >>> for w in grid.write_quorums():
print(w) ... print(w)
{'a', 'd'}
{'a', 'e'}
{'a', 'f'}
{'b', 'd'}
{'b', 'e'}
{'b', 'f'}
{'c', 'd'}
{'c', 'e'}
{'c', 'f'}
``` ```
Alternatively, we can construct a quorum system be specifying the write Alternatively, we can construct a quorum system be specifying the write
quorums. quorums.
```python ```python
QuorumSystem(writes=(a + b + c) * (d + e + f)) >>> QuorumSystem(writes=(a + b + c) * (d + e + f))
``` ```
Or, we can specify both the read and write quorums. Or, we can specify both the read and write quorums.
```python ```python
QuorumSystem(reads=a*b*c + d*e*f, writes=(a + b + c) * (d + e + f)) >>> QuorumSystem(reads=a*b*c + d*e*f, writes=(a + b + c) * (d + e + f))
``` ```
But, remember that every read quorum must intersect every write quorum. If we But, remember that every read quorum must intersect every write quorum. If we
@ -75,21 +84,29 @@ try to construct a quorum system with non-overlapping quorums, an exception
will be thrown. will be thrown.
```python ```python
QuorumSystem(reads=a+b+c, writes=d+e+f) >>> QuorumSystem(reads=a+b+c, writes=d+e+f)
# ValueError: Not all read quorums intersect all write quorums Traceback (most recent call last):
...
ValueError: Not all read quorums intersect all write quorums
``` ```
We can check whether a given set is a read or write quorum. Note that any We can check whether a given set is a read or write quorum. Note that any
superset of a quorum is also considered a quorum. superset of a quorum is also considered a quorum.
```python ```python
grid.is_read_quorum({'a', 'b', 'c'}) # True >>> grid.is_read_quorum({'a', 'b', 'c'})
grid.is_read_quorum({'a', 'b', 'c', 'd'}) # True True
grid.is_read_quorum({'a', 'b', 'd'}) # False >>> grid.is_read_quorum({'a', 'b', 'c', 'd'})
True
grid.is_write_quorum({'a', 'd'}) # True >>> grid.is_read_quorum({'a', 'b', 'd'})
grid.is_write_quorum({'a', 'd', 'd'}) # True False
grid.is_write_quorum({'a', 'b'}) # False >>>
>>> grid.is_write_quorum({'a', 'd'})
True
>>> grid.is_write_quorum({'a', 'd', 'd'})
True
>>> grid.is_write_quorum({'a', 'b'})
False
``` ```
## Resilience ## Resilience
@ -107,9 +124,12 @@ quorum, so our write resilience is 1. The resilience is the minimum of 1 and 2,
which is 1. which is 1.
```python ```python
grid.read_resilience() # 1 >>> grid.read_resilience()
grid.write_resilience() # 2 1
grid.resilience() # 1 >>> grid.write_resilience()
2
>>> grid.resilience()
1
``` ```
## Strategies ## Strategies
@ -125,19 +145,19 @@ in weights, and the library will automatically normalize the weights into a
valid probability distribution. valid probability distribution.
```python ```python
# The read quorum strategy. >>> # The read quorum strategy.
sigma_r = { >>> sigma_r = {
frozenset({'a', 'b', 'c'}): 2., ... frozenset({'a', 'b', 'c'}): 2.,
frozenset({'d', 'e', 'f'}): 1., ... frozenset({'d', 'e', 'f'}): 1.,
} ... }
>>>
# The write quorum strategy. >>> # The write quorum strategy.
sigma_w = { >>> sigma_w = {
frozenset({'a', 'd'}): 1., ... frozenset({'a', 'd'}): 1.,
frozenset({'b', 'e'}): 1., ... frozenset({'b', 'e'}): 1.,
frozenset({'c', 'f'}): 1., ... frozenset({'c', 'f'}): 1.,
} ... }
strategy = grid.make_strategy(sigma_r, sigma_w) >>> strategy = grid.make_strategy(sigma_r, sigma_w)
``` ```
Once we have a strategy, we can use it to sample read and write quorums. Here, Once we have a strategy, we can use it to sample read and write quorums. Here,
@ -146,14 +166,20 @@ row, and we expect `get_write_quorum` to return every column uniformly at
random. random.
```python ```python
print(strategy.get_read_quorum()) >>> strategy.get_read_quorum()
print(strategy.get_read_quorum()) {'a', 'b', 'c'}
print(strategy.get_read_quorum()) >>> strategy.get_read_quorum()
print(strategy.get_read_quorum()) {'a', 'b', 'c'}
print(strategy.get_write_quorum()) >>> strategy.get_read_quorum()
print(strategy.get_write_quorum()) {'d', 'e', 'f'}
print(strategy.get_write_quorum()) >>> strategy.get_write_quorum()
print(strategy.get_write_quorum()) {'b', 'e'}
>>> strategy.get_write_quorum()
{'c', 'f'}
>>> strategy.get_write_quorum()
{'b', 'e'}
>>> strategy.get_write_quorum()
{'a', 'd'}
``` ```
## Load and Capacity ## Load and Capacity
@ -187,7 +213,8 @@ The largest node load is 2/3, so our strategy has a load of 2/3. Rather than
calculating load by hand, we can simply call the `load` function. calculating load by hand, we can simply call the `load` function.
```python ```python
print(strategy.load(read_fraction=1)) # 2/3 >>> strategy.load(read_fraction=1)
0.6666666666666666
``` ```
Now let's calculate the load of our strategy assuming a 100% write workload. Now let's calculate the load of our strategy assuming a 100% write workload.
@ -211,7 +238,8 @@ than calculating load by hand, we can simply call the `load` function. Note
that we can pass in a `read_fraction` or `write_fraction` but not both. that we can pass in a `read_fraction` or `write_fraction` but not both.
```python ```python
print(strategy.load(write_fraction=1)) # 1/3 >>> strategy.load(write_fraction=1)
0.3333333333333333
``` ```
Now let's calculate the load of our strategy on a 25% read and 75% write Now let's calculate the load of our strategy on a 25% read and 75% write
@ -247,18 +275,25 @@ point, you can see that calculating load by hand is extremely tedious. We could
have skipped all that work and called `load` instead! have skipped all that work and called `load` instead!
```python ```python
print(strategy.load(read_fraction=0.25)) # 5/12 >>> strategy.load(read_fraction=0.25)
0.41666666666666663
``` ```
We can also compute the load on every node. We can also compute the load on every node.
```python ```python
print(strategy.node_load(a, read_fraction=0.25)) # 5/12 >>> print(strategy.node_load(a, read_fraction=0.25))
print(strategy.node_load(b, read_fraction=0.25)) # 5/12 0.41666666666666663
print(strategy.node_load(c, read_fraction=0.25)) # 5/12 >>> print(strategy.node_load(b, read_fraction=0.25))
print(strategy.node_load(d, read_fraction=0.25)) # 1/3 0.41666666666666663
print(strategy.node_load(e, read_fraction=0.25)) # 1/3 >>> print(strategy.node_load(c, read_fraction=0.25))
print(strategy.node_load(f, read_fraction=0.25)) # 1/3 0.41666666666666663
>>> print(strategy.node_load(d, read_fraction=0.25))
0.3333333333333333
>>> print(strategy.node_load(e, read_fraction=0.25))
0.3333333333333333
>>> print(strategy.node_load(f, read_fraction=0.25))
0.3333333333333333
``` ```
Our strategy has a load of 5/12 on a 25% read workload, but what about the Our strategy has a load of 5/12 on a 25% read workload, but what about the
@ -267,14 +302,11 @@ strategy is not optimal. We can call the `strategy` function to compute the
optimal strategy automatically. optimal strategy automatically.
```python ```python
strategy = grid.strategy(read_fraction=0.25) >>> strategy = grid.strategy(read_fraction=0.25)
print(strategy) >>> strategy
# Strategy(reads={('a', 'b', 'c'): 0.5, Strategy(reads={('a', 'b', 'c'): 0.5, ('d', 'e', 'f'): 0.5}, writes={('a', 'f'): 0.33333333, ('b', 'e'): 0.33333333, ('c', 'd'): 0.33333333})
# ('d', 'e', 'f'): 0.5}, >>> strategy.load(read_fraction=0.25))
# writes={('a', 'f'): 0.33333333, 0.3749999975
# ('b', 'e'): 0.33333333,
# ('c', 'd'): 0.33333333})
print(strategy.load(read_fraction=0.25)) # 3/8
``` ```
Here, we see that the optimal strategy picks all rows and all columns Here, we see that the optimal strategy picks all rows and all columns
@ -287,16 +319,20 @@ this strategy is optimal for a read fraction of 25%, but it may not be optimal
for other read fractions. for other read fractions.
```python ```python
print(strategy.load(read_fraction=0)) # 1/3 >>> strategy.load(read_fraction=0)
print(strategy.load(read_fraction=0.5)) # 5/12 0.33333333
print(strategy.load(read_fraction=1)) # 1/2 >>> strategy.load(read_fraction=0.5)
0.416666665
>>> strategy.load(read_fraction=1)
0.5
``` ```
We can also use a quorum system's `load` function. The code snippet below is a We can also use a quorum system's `load` function. The code snippet below is a
shorthand for `grid.strategy(read_fraction=0.25).load(read_fraction=0.25)`. shorthand for `grid.strategy(read_fraction=0.25).load(read_fraction=0.25)`.
```python ```python
grid.load(read_fraction=0.25) # 0.375 >>> grid.load(read_fraction=0.25)
0.3749999975
``` ```
The capacity of strategy or quorum is simply the inverse of the load. Our The capacity of strategy or quorum is simply the inverse of the load. Our
@ -304,7 +340,8 @@ quorum system has a load of 3/8 on a 25% read workload, so it has a capacity of
8/3. 8/3.
```python ```python
print(grid.capacity(read_fraction=0.25)) # 8/3 >>> grid.capacity(read_fraction=0.25)
2.6666666844444444
``` ```
The _capacity_ of a quorum system is proportional to the maximum throughput The _capacity_ of a quorum system is proportional to the maximum throughput
@ -321,9 +358,10 @@ will return the strategy that minimizes the expected load according to this
distribution. distribution.
```python ```python
distribution = {0.1: 1, 0.75: 1} >>> distribution = {0.1: 1, 0.75: 1}
strategy = grid.strategy(read_fraction=distribution) >>> strategy = grid.strategy(read_fraction=distribution)
strategy.load(read_fraction=distribution) # 0.404 >>> strategy.load(read_fraction=distribution)
0.40416666474999996
``` ```
## Heterogeneous Node ## Heterogeneous Node
@ -334,12 +372,12 @@ this, we instantiate every node with its capacity. Here, nodes `a`, `c`, and
only process 500 requests per second. only process 500 requests per second.
```python ```python
a = Node('a', capacity=1000) >>> a = Node('a', capacity=1000)
b = Node('b', capacity=500) >>> b = Node('b', capacity=500)
c = Node('c', capacity=1000) >>> c = Node('c', capacity=1000)
d = Node('d', capacity=500) >>> d = Node('d', capacity=500)
e = Node('e', capacity=1000) >>> e = Node('e', capacity=1000)
f = Node('f', capacity=500) >>> f = Node('f', capacity=500)
``` ```
Now, the definition of capacity becomes much simpler. The capacity of a quorum Now, the definition of capacity becomes much simpler. The capacity of a quorum
@ -348,10 +386,12 @@ interpreted as the inverse of the capacity. Here, our quorum system is capable
of processing 1333 commands per second for a workload of 75% reads. of processing 1333 commands per second for a workload of 75% reads.
```python ```python
grid = QuorumSystem(reads=a*b*c + d*e*f) >>> grid = QuorumSystem(reads=a*b*c + d*e*f)
strategy = grid.strategy(read_fraction=0.75) >>> strategy = grid.strategy(read_fraction=0.75)
strategy.load(read_fraction=0.75) # 0.00075 >>> strategy.load(read_fraction=0.75)
strategy.capacity(read_fraction=0.75) # 1333 0.00075
>>> strategy.capacity(read_fraction=0.75)
1333.3333333333333
``` ```
Nodes might also process reads and writes at different speeds. We can specify Nodes might also process reads and writes at different speeds. We can specify
@ -359,25 +399,28 @@ the peak read and write throughput of every node separately. Here, we assume
reads are ten times as fast as writes. reads are ten times as fast as writes.
```python ```python
a = Node('a', write_capacity=1000, read_capacity=10000) >>> a = Node('a', write_capacity=1000, read_capacity=10000)
b = Node('b', write_capacity=500, read_capacity=5000) >>> b = Node('b', write_capacity=500, read_capacity=5000)
c = Node('c', write_capacity=1000, read_capacity=10000) >>> c = Node('c', write_capacity=1000, read_capacity=10000)
d = Node('d', write_capacity=500, read_capacity=5000) >>> d = Node('d', write_capacity=500, read_capacity=5000)
e = Node('e', write_capacity=1000, read_capacity=10000) >>> e = Node('e', write_capacity=1000, read_capacity=10000)
f = Node('f', write_capacity=500, read_capacity=5000) >>> f = Node('f', write_capacity=500, read_capacity=5000)
``` ```
With 100% reads, our quorum system can process 10,000 commands per second. With 100% reads, our quorum system can process 10,000 commands per second.
This throughput decreases as we increase the fraction of writes. This throughput decreases as we increase the fraction of writes.
```python ```python
grid = QuorumSystem(reads=a*b*c + d*e*f) >>> grid = QuorumSystem(reads=a*b*c + d*e*f)
grid.capacity(read_fraction=1) # 10,000 >>> grid.capacity(read_fraction=1)
grid.capacity(read_fraction=0.5) # 3913 10000.0
grid.capacity(read_fraction=0) # 2000 >>> grid.capacity(read_fraction=0.5)
3913.043450018904
>>> grid.capacity(read_fraction=0)
2000.0
``` ```
# `f`-resilient Strategies ## `f`-resilient Strategies
Another real world complication is the fact that machines sometimes fail and Another real world complication is the fact that machines sometimes fail and
are sometimes slow. If we contact a quorum of nodes, some of them may fail, and are sometimes slow. If we contact a quorum of nodes, some of them may fail, and
we'll get stuck waiting to hear back from them. Or, some of them may be we'll get stuck waiting to hear back from them. Or, some of them may be
@ -391,68 +434,231 @@ By default, `strategy` returns `0`-resilient quorums. We can pass in the `f`
argument to get more resilient strategies. argument to get more resilient strategies.
```python ```python
strategy = grid.strategy(read_fraction=0.5, f=1) >>> strategy = grid.strategy(read_fraction=0.5, f=1)
``` ```
These sets are quorums even if 1 machine fails. These sets are quorums even if 1 machine fails.
```python ```python
strategy.get_read_quorum() >>> strategy.get_read_quorum()
strategy.get_write_quorum() {'b', 'f', 'e', 'd', 'a', 'c'}
>>> strategy.get_write_quorum()
{'b', 'd', 'a', 'e'}
```
Note that as we increase resilience, quorums get larger, and we decrease
capacity. On a 100% write workload, our grid quorum system has a 0-resilient
capacity of 2000 commands per second, but a 1-resilient capacity of 1000
commands per second.
```python
>>> grid.capacity(write_fraction=1, f=0)
2000.0
>>> grid.capacity(write_fraction=1, f=1)
1000.0
```
Also note that not all quorum systems are equally as resilient. In the next
code snippet, we construct a "write 2, read 3" quorum system using the `choose`
function. For this quorum system, every set of 2 nodes is a write quorum, and
every set of 3 nodes is a read quorum. This quorum system has a 0-resilient
capacity of 2000 (the same as the grid), but a 1-resilient capacity of 1333
(higher than the grid).
```python
>>> write2 = QuorumSystem(writes=choose(2, [a, b, c, d, e]))
>>> write2.capacity(write_fraction=1, f=0)
2000.0
>>> write2.capacity(write_fraction=1, f=1)
1333.3333333333333
``` ```
## Latency ## Latency
TODO(mwhittaker): Write. In the real world, not all nodes are equally as far away. Some are close and
some are far. To address this, we associate every node with a latency, i.e. the
time the required to contact the node. We model this in quoracle by assigning
each node a latency, represented as a `datetime.timedelta`. Here, nodes `a`,
`b`, `c`, `d`, `e`, and `f` in our grid have latencies of 1, 2, 3, 4, 5, and 6
seconds.
```python
>>> import datetime
>>>
>>> def seconds(x: int) -> datetime.timedelta:
>>> return datetime.timedelta(seconds=x)
>>>
>>> a = Node('a', write_capacity=1000, read_capacity=10000, latency=seconds(1))
>>> b = Node('b', write_capacity=500, read_capacity=5000, latency=seconds(2))
>>> c = Node('c', write_capacity=1000, read_capacity=10000, latency=seconds(3))
>>> d = Node('d', write_capacity=500, read_capacity=5000, latency=seconds(4))
>>> e = Node('e', write_capacity=1000, read_capacity=10000, latency=seconds(5))
>>> f = Node('f', write_capacity=500, read_capacity=5000, latency=seconds(6))
>>> grid = QuorumSystem(reads=a*b*c + d*e*f)
```
The _latency of a quorum_ `q` is the time required to form a quorum of
responses after contacting every node in `q`. For example, the read quorum `{a,
b, c}` has a latency of three seconds. It takes 1 second to hear back from `a`,
another second to hear back from `b`, and then a final second to hear back from
`c`. The write quorum `{a, b, d, f}` has a latency of 4 seconds. It takes 1
second to hear back from `a`, another second to hear back from `b`, and then
another 2 seconds to hear back from `d`. The set `{a, b, d}` is a write quorum,
so the latency of this quorum is 4 seconds. Note that we didn't have to wait to
hear back from `f` in order to form a quorum.
The _latency of a strategy_ is the expected latency of the quorums that it
chooses. The _latency of a quorum system_ is the latency of the latency-optimal
strategy. We can use the `strategy` function to find a latency-optimal strategy
by passing in the value `"latency"` to the `optimize` flag.
```python
>>> sigma = grid.strategy(read_fraction=0.5, optimize='latency')
>>> sigma
Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('c', 'd'): 1.0})
```
We can find the latency of this strategy by calling the `latency` function.
```python
>>> sigma.latency(read_fraction=1)
0:00:03
>>> sigma.latency(read_fraction=0)
0:00:04
>>> sigma.latency(read_fraction=0.5)
0:00:03.500000
```
As with capacity, we can call the `latency` function on our quorum system
directly. In the follow code snippet `grid.latency(read_fraction=0.5,
optimize='latency')` is a shorthand for `grid.strategy(read_fraction=0.5,
optimize='latency').latency(read_fraction=0.5)`.
```
>>> grid.latency(read_fraction=0.5, optimize='latency')
0:00:03.500000
```
Note that finding the latency-optimal strategy is trivial. The latency-optimal
strategy always selects the read and write quorum with the smallest latencies.
However, things get complicated when we start optimizing for capacity and
latency at the same time. When we call the `strategy` function with
`optimize='latency'`, we can pass in a constraint on the maximum allowable load
using the `load_limit` argument. For example, in the code snippet below, we
find the latency-optimal strategy with a capacity of at least 1,500.
```python
>>> sigma = grid.strategy(read_fraction=0.5,
... optimize='latency',
... load_limit=1/1500)
>>> sigma
Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('a', 'd'): 0.66666667, ('c', 'e'): 0.33333333})
>>> sigma.capacity(read_fraction=0.5)
1499.9999925
>>> sigma.latency(read_fraction=0.5)
0:00:03.666667
```
This strategy always picks the read quorum `{a, b, c}`, and picks the write
quorum `{a, d}` twice as often as write quorum `{c, e}`. It achieves our
desired capacity of 1,500 commands per second (ignoring rounding errors) and
has a latency of 3.66 seconds. We can also find a load-optimal strategy with a
latency constraint.
```python
>>> sigma = grid.strategy(read_fraction=0.5,
... optimize='load',
... latency_limit=seconds(4))
>>> sigma
Strategy(reads={('a', 'b', 'c'): 0.98870056, ('d', 'e', 'f'): 0.011299435}, writes={('a', 'd'): 0.19548023, ('a', 'f'): 0.22429379, ('b', 'd'): 0.062711864, ('b', 'e'): 0.097740113, ('c', 'e'): 0.41977401})
>>> sigma.capacity(read_fraction=0.5)
3856.2090893331633
>>> sigma.latency(read_fraction=0.5)
0:00:04.000001
```
This strategy is rather complicated and would be hard to find by hand. It has a
capacity of 3856 commands per second and achieves our latency constraint of 4
seconds.
Be careful when specifying constraints. If the constraints cannot be met, a
`NoStrategyFound` exception is raised.
```python
>>> grid.strategy(read_fraction=0.5,
... optimize='load',
... latency_limit=seconds(1))
Traceback (most recent call last):
...
quoracle.quorum_system.NoStrategyFoundError: no strategy satisfies the given constraints
```
## Network Load ## Network Load
TODO(mwhittaker): Write. Another useful metric is network load. When a protocol performs a read, it has
to send messages to every node in a read quorum, and when a protocol performs a
write, it has to send messages to every node in a write quorum. The bigger the
quorums, the more messages are sent over the network. The _network load of a
quorum_ is simply the size of the quorum, the _network load of a strategy_ is
the expected network load of the quorums it chooses, and the _network load of a
quorum system_ is the network load of the network load-optimal strategy.
We can find network load optimal-strategies using the `strategy` function by
passing in `"network"` to the `optimize` flag. We can also specify constraints
on load and latency. In general, using the `strategy` function, we can pick one
of load, latency, or network load to optimize and specify constraints on the
other two metrics.
```python
>>> sigma = grid.strategy(read_fraction=0.5, optimize='network')
>>> sigma
Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('c', 'f'): 1.0})
>>> sigma.network_load(read_fraction=0.5)
2.5
>>> grid.network_load(read_fraction=0.5, optimize='network')
2.5
>>> sigma = grid.strategy(read_fraction=0.5,
... optimize='network',
... load_limit=1/2000,
... latency_limit=seconds(4))
```
## Search ## Search
TODO(mwhittaker): Write. Finding good quorum systems by hand is hard. quoracle includes a heuristic
based search procedure that tries to find quorum systems that are optimal with
respect a target metric and set of constraints. For example, lets try to find a
quorum system
## Case Study - that has resilience 1,
TODO(mwhittaker): Update. - that is 1-resilient load optimal for a 75% read workload,
- that has a latency of most 4 seconds, and
- that has a network load of at most 4.
Putting everything together, we can use this library to pick quorum systems Because the number of quorum systems is enormous, the search procedure can take
that are well suited to our workload. For example, say we're implementing a a very, very long time. We pass in a timeout to the search procedure to limit
distributed file system and want to pick a 5 node quorum system with a how long it takes. If the timeout expires, `search` returns the most optimal
resilience of 1 that has a good load on workloads that are 90% reads 90% of the quorum system that it found so far.
time and 10% reads 10% of the time. We can try out three quorum systems: a
simple majority quorum system, a crumbling walls quorum system, and a paths
quorum system.
```python ```python
simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) ## Search
crumbling_walls = QuorumSystem(reads=a*b + c*d*e) >>> qs, sigma = search(nodes=[a, b, c, d, e, f],
paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) ... resilience=1,
... f=1,
... read_fraction=0.75,
... optimize='load',
... latency_limit=seconds(4),
... network_limit=4,
... timeout=seconds(60))
>>> qs
QuorumSystem(reads=choose3(a, c, e, (b + d + f)), writes=choose2(a, c, e, (b * d * f)))
>>> sigma
Strategy(reads={('a', 'c', 'e', 'f'): 0.33333333, ('a', 'b', 'c', 'e'): 0.33333333, ('a', 'c', 'd', 'e'): 0.33333333}, writes={('a', 'b', 'c', 'd', 'f'): 0.15714286, ('b', 'c', 'd', 'e', 'f'): 0.15714286, ('a', 'c', 'e'): 0.52857143, ('a', 'b', 'd', 'e', 'f'): 0.15714286})
>>> sigma.capacity(read_fraction=0.75)
3499.9999536250007
>>> sigma.latency(read_fraction=0.75)
0:00:03.907143
>>> sigma.network_load(read_fraction=0.75)
3.9857142674999997
``` ```
We make sure we have the desired resilience. Here, the search procedure returns the quorum system `choose(3, [a, c, e,
b+d+f])` with a capacity of 3500 commands per second and with latency and
```python network load close to the limits specified.
assert(simple_majority.resilience() >= 1)
assert(crumbling_walls.resilience() >= 1)
assert(paths.resilience() >= 1)
```
We check the loads and see that the crumbling walls quorum system has the
highest load, so we use the crumbling walls quorum system to implement our file
system.
```python
distribution = {0.9: 0.9, 0.1: 0.1}
simple_majority.capacity(read_fraction=distribution) # 5089
crumbling_walls.capacity(read_fraction=distribution) # 5837
paths.capacity(read_fraction=distribution) # 5725
```
Maybe some time later, we experiencing high latency because of stragglers and
want to switch to a 1-resilient strategy. We again compute the loads, but now
see that the simple majority quorum system has the highest load, so we switch
from the crumbling walls quorum system to the simple majority quorum system.
```python
simple_majority.capacity(read_fraction=distribution, f=1) # 3816
crumbling_walls.capacity(read_fraction=distribution, f=1) # 1908
paths.capacity(read_fraction=distribution, f=1) # 1908
```

View file

@ -44,7 +44,7 @@ def main() -> None:
print('0-resilient Searched') print('0-resilient Searched')
start = datetime.datetime.now() start = datetime.datetime.now()
opt = search(nodes=[a, b, c, d, e], opt, _ = search(nodes=[a, b, c, d, e],
resilience=1, resilience=1,
read_fraction=fr) read_fraction=fr)
stop = datetime.datetime.now() stop = datetime.datetime.now()
@ -87,7 +87,7 @@ def main() -> None:
print('1-resilient Searched') print('1-resilient Searched')
start = datetime.datetime.now() start = datetime.datetime.now()
opt = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, f=1) opt, _ = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, f=1)
stop = datetime.datetime.now() stop = datetime.datetime.now()
print(stop - start) print(stop - start)
sigma = opt.strategy(read_fraction=fr, f=1) sigma = opt.strategy(read_fraction=fr, f=1)
@ -109,7 +109,7 @@ def main() -> None:
print('Latency Optimal Searched') print('Latency Optimal Searched')
start = datetime.datetime.now() start = datetime.datetime.now()
opt = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, opt, _ = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr,
optimize='latency', load_limit=1/2000) optimize='latency', load_limit=1/2000)
stop = datetime.datetime.now() stop = datetime.datetime.now()
print(stop - start) print(stop - start)

View file

@ -70,7 +70,7 @@ def main() -> None:
print() print()
# Search. # Search.
qs = search( qs, _ = search(
nodes = [a, b, c, d], nodes = [a, b, c, d],
read_fraction = 1, read_fraction = 1,
optimize = 'latency', optimize = 'latency',

View file

@ -17,7 +17,7 @@ def main():
simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) simple_majority = QuorumSystem(reads=majority([a, b, c, d, e]))
crumbling_walls = QuorumSystem(reads=a*b + c*d*e) crumbling_walls = QuorumSystem(reads=a*b + c*d*e)
paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b)
opt = search(nodes, read_fraction=fr, timeout=datetime.timedelta(seconds=9)) opt, _ = search(nodes, read_fraction=fr, timeout=datetime.timedelta(seconds=9))
fig, ax = plt.subplots(3, 4, figsize = (6.3 * 2, 4.8 * 2), sharey='row') fig, ax = plt.subplots(3, 4, figsize = (6.3 * 2, 4.8 * 2), sharey='row')
for i, qs in enumerate([simple_majority, crumbling_walls, paths, opt]): for i, qs in enumerate([simple_majority, crumbling_walls, paths, opt]):

View file

@ -1,3 +1,4 @@
## Quorum Systems
from quoracle import * from quoracle import *
a = Node('a') a = Node('a')
@ -30,10 +31,12 @@ print(grid.is_write_quorum({'a', 'd'})) # True
print(grid.is_write_quorum({'a', 'd', 'd'})) # True print(grid.is_write_quorum({'a', 'd', 'd'})) # True
print(grid.is_write_quorum({'a', 'b'})) # False print(grid.is_write_quorum({'a', 'b'})) # False
## Resilience
print(grid.read_resilience()) # 1 print(grid.read_resilience()) # 1
print(grid.write_resilience()) # 2 print(grid.write_resilience()) # 2
print(grid.resilience()) # 1 print(grid.resilience()) # 1
## Strategies
# The read quorum strategy. # The read quorum strategy.
sigma_r = { sigma_r = {
frozenset({'a', 'b', 'c'}): 2., frozenset({'a', 'b', 'c'}): 2.,
@ -57,6 +60,7 @@ print(strategy.get_write_quorum())
print(strategy.get_write_quorum()) print(strategy.get_write_quorum())
print(strategy.get_write_quorum()) print(strategy.get_write_quorum())
## Load and Capacity
print(strategy.load(read_fraction=1)) # 2/3 print(strategy.load(read_fraction=1)) # 2/3
print(strategy.load(write_fraction=1)) # 1/3 print(strategy.load(write_fraction=1)) # 1/3
@ -82,10 +86,12 @@ print(grid.load(read_fraction=0.25)) # 3/8
print(grid.capacity(read_fraction=0.25)) # 8/3 print(grid.capacity(read_fraction=0.25)) # 8/3
## Workload Distributions
distribution = {0.1: 0.5, 0.75: 0.5} distribution = {0.1: 0.5, 0.75: 0.5}
strategy = grid.strategy(read_fraction=distribution) strategy = grid.strategy(read_fraction=distribution)
print(strategy.load(read_fraction=distribution)) # 0.404 print(strategy.load(read_fraction=distribution)) # 0.404
## Heterogeneous Node
a = Node('a', capacity=1000) a = Node('a', capacity=1000)
b = Node('b', capacity=500) b = Node('b', capacity=500)
c = Node('c', capacity=1000) c = Node('c', capacity=1000)
@ -110,24 +116,82 @@ print(grid.capacity(read_fraction=1)) # 10,000
print(grid.capacity(read_fraction=0.5)) # 3913 print(grid.capacity(read_fraction=0.5)) # 3913
print(grid.capacity(read_fraction=0)) # 2000 print(grid.capacity(read_fraction=0)) # 2000
## f-resilient Strategies
strategy = grid.strategy(read_fraction=0.5, f=1) strategy = grid.strategy(read_fraction=0.5, f=1)
print(strategy.get_read_quorum()) print(strategy.get_read_quorum())
print(strategy.get_write_quorum()) print(strategy.get_write_quorum())
simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) print(grid.capacity(write_fraction=1, f=0))
crumbling_walls = QuorumSystem(reads=a*b + c*d*e) print(grid.capacity(write_fraction=1, f=1))
paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b)
assert(simple_majority.resilience() >= 1) write2 = QuorumSystem(writes=choose(2, [a, b, c, d, e]))
assert(crumbling_walls.resilience() >= 1) print(write2.capacity(write_fraction=1, f=0))
assert(paths.resilience() >= 1) print(write2.capacity(write_fraction=1, f=1))
distribution = {0.9: 0.9, 0.1: 0.1} ## Latency
print(simple_majority.capacity(read_fraction=distribution)) # 5089 import datetime
print(crumbling_walls.capacity(read_fraction=distribution)) # 5824
print(paths.capacity(read_fraction=distribution)) # 5725
print(simple_majority.capacity(read_fraction=distribution, f=1)) # 3816 def seconds(x: int) -> datetime.timedelta:
print(crumbling_walls.capacity(read_fraction=distribution, f=1)) # 1908 return datetime.timedelta(seconds=x)
print(paths.capacity(read_fraction=distribution, f=1)) # 1908
a = Node('a', write_capacity=1000, read_capacity=10000, latency=seconds(1))
b = Node('b', write_capacity=500, read_capacity=5000, latency=seconds(2))
c = Node('c', write_capacity=1000, read_capacity=10000, latency=seconds(3))
d = Node('d', write_capacity=500, read_capacity=5000, latency=seconds(4))
e = Node('e', write_capacity=1000, read_capacity=10000, latency=seconds(5))
f = Node('f', write_capacity=500, read_capacity=5000, latency=seconds(6))
grid = QuorumSystem(reads=a*b*c + d*e*f)
sigma = grid.strategy(read_fraction=0.5, optimize='latency')
print(sigma)
print(sigma.latency(read_fraction=1))
print(sigma.latency(read_fraction=0))
print(sigma.latency(read_fraction=0.5))
print(grid.latency(read_fraction=0.5, optimize='latency'))
sigma = grid.strategy(read_fraction=0.5,
optimize='latency',
load_limit=1/1500)
print(sigma)
print(sigma.capacity(read_fraction=0.5))
print(sigma.latency(read_fraction=0.5))
sigma = grid.strategy(read_fraction=0.5,
optimize='load',
latency_limit=seconds(4))
print(sigma)
print(sigma.capacity(read_fraction=0.5))
print(sigma.latency(read_fraction=0.5))
# grid.strategy(read_fraction=0.5,
# optimize='load',
# latency_limit=seconds(1))
# quoracle.quorum_system.NoStrategyFoundError: no strategy satisfies the given constraints
## Network Load
sigma = grid.strategy(read_fraction=0.5, optimize='network')
print(sigma)
print(sigma.network_load(read_fraction=0.5))
print(grid.network_load(read_fraction=0.5, optimize='network'))
sigma = grid.strategy(read_fraction=0.5,
optimize='network',
load_limit=1/2000,
latency_limit=seconds(4))
## Search
qs, sigma = search(nodes=[a, b, c, d, e, f],
resilience=1,
f=1,
read_fraction=0.75,
optimize='load',
latency_limit=seconds(4),
network_limit=4,
timeout=seconds(60))
print(qs)
print(sigma)
print(sigma.capacity(read_fraction=0.75))
print(sigma.latency(read_fraction=0.75))
print(sigma.network_load(read_fraction=0.75))

View file

@ -1,7 +1,7 @@
from .distribution import Distribution from .distribution import Distribution
from .expr import choose, Expr, Node from .expr import choose, Expr, Node
from .quorum_system import (LATENCY, LOAD, NETWORK, NoStrategyFoundError, from .quorum_system import (LATENCY, LOAD, NETWORK, NoStrategyFoundError,
QuorumSystem, Strategy) QuorumSystem, Strategy, Tuple)
from typing import Iterator, List, Optional, TypeVar from typing import Iterator, List, Optional, TypeVar
import datetime import datetime
import itertools import itertools
@ -83,7 +83,7 @@ def search(nodes: List[Node[T]],
latency_limit: Optional[datetime.timedelta] = None, latency_limit: Optional[datetime.timedelta] = None,
f: int = 0, f: int = 0,
timeout: datetime.timedelta = datetime.timedelta(seconds=0)) \ timeout: datetime.timedelta = datetime.timedelta(seconds=0)) \
-> QuorumSystem[T]: -> Tuple[QuorumSystem[T], Strategy[T]]:
start_time = datetime.datetime.now() start_time = datetime.datetime.now()
def metric(sigma: Strategy[T]) -> float: def metric(sigma: Strategy[T]) -> float:
@ -95,10 +95,12 @@ def search(nodes: List[Node[T]],
return sigma.latency(read_fraction, write_fraction).total_seconds() return sigma.latency(read_fraction, write_fraction).total_seconds()
opt_qs: Optional[QuorumSystem[T]] = None opt_qs: Optional[QuorumSystem[T]] = None
opt_sigma: Optional[Strategy[T]] = None
opt_metric: Optional[float] = None opt_metric: Optional[float] = None
def do_search(exprs: Iterator[Expr[T]]) -> None: def do_search(exprs: Iterator[Expr[T]]) -> None:
nonlocal opt_qs nonlocal opt_qs
nonlocal opt_sigma
nonlocal opt_metric nonlocal opt_metric
for reads in exprs: for reads in exprs:
@ -117,6 +119,7 @@ def search(nodes: List[Node[T]],
sigma_metric = metric(sigma) sigma_metric = metric(sigma)
if opt_metric is None or sigma_metric < opt_metric: if opt_metric is None or sigma_metric < opt_metric:
opt_qs = qs opt_qs = qs
opt_sigma = sigma
opt_metric = sigma_metric opt_metric = sigma_metric
except NoStrategyFoundError: except NoStrategyFoundError:
pass pass
@ -131,4 +134,5 @@ def search(nodes: List[Node[T]],
if opt_qs is None: if opt_qs is None:
raise ValueError('no quorum system found') raise ValueError('no quorum system found')
else: else:
return opt_qs assert opt_sigma is not None
return (opt_qs, opt_sigma)