From 30c54f8886886e9521fc45fdba5faa5f91a14674 Mon Sep 17 00:00:00 2001 From: Michael Whittaker Date: Fri, 5 Feb 2021 17:18:41 -0800 Subject: [PATCH] Updated tutorial + search returns sigma now too. --- README.md | 492 +++++++++++++++++++++++++----------- examples/case_study.py | 10 +- examples/paper.py | 2 +- examples/plot_node_loads.py | 2 +- examples/tutorial.py | 90 ++++++- quoracle/search.py | 10 +- 6 files changed, 440 insertions(+), 166 deletions(-) diff --git a/README.md b/README.md index 9c62d31..ce9ea7c 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,12 @@ Next, we specify the nodes in our quorum system. Our nodes can be strings, integers, IP addresses, anything! ```python -a = Node('a') -b = Node('b') -c = Node('c') -d = Node('d') -e = Node('e') -f = Node('f') +>>> a = Node('a') +>>> b = Node('b') +>>> c = Node('c') +>>> d = Node('d') +>>> e = Node('e') +>>> f = Node('f') ``` Now, we construct a two by three grid of nodes. Every row is read quorum, and @@ -38,36 +38,45 @@ quorum system, we only have to specify the set of read quorums. The library figures out the optimal set of write quorums automatically. ```python -grid = QuorumSystem(reads=a*b*c + d*e*f) +>>> grid = QuorumSystem(reads=a*b*c + d*e*f) ``` -This next code snippet prints out the read quorums `{'a', 'b', 'c'}` and `{'d', -'e', 'f'}`. +This next code snippet prints out the read quorums. ```python -for r in grid.read_quorums(): - print(r) +>>> for r in grid.read_quorums(): +... print(r) +{'a', 'b', 'c'} +{'d', 'e', 'f'} ``` -And this next code snippet prints out the write quorums `{'a', 'd'}`, `{'a', -'e'}`, `{'b', 'f'}`, `{'b', 'd'}`, ... +And this next code snippet prints out the write quorums. ```python -for w in grid.write_quorums(): - print(w) +>>> for w in grid.write_quorums(): +... print(w) +{'a', 'd'} +{'a', 'e'} +{'a', 'f'} +{'b', 'd'} +{'b', 'e'} +{'b', 'f'} +{'c', 'd'} +{'c', 'e'} +{'c', 'f'} ``` Alternatively, we can construct a quorum system be specifying the write quorums. ```python -QuorumSystem(writes=(a + b + c) * (d + e + f)) +>>> QuorumSystem(writes=(a + b + c) * (d + e + f)) ``` Or, we can specify both the read and write quorums. ```python -QuorumSystem(reads=a*b*c + d*e*f, writes=(a + b + c) * (d + e + f)) +>>> QuorumSystem(reads=a*b*c + d*e*f, writes=(a + b + c) * (d + e + f)) ``` But, remember that every read quorum must intersect every write quorum. If we @@ -75,21 +84,29 @@ try to construct a quorum system with non-overlapping quorums, an exception will be thrown. ```python -QuorumSystem(reads=a+b+c, writes=d+e+f) -# ValueError: Not all read quorums intersect all write quorums +>>> QuorumSystem(reads=a+b+c, writes=d+e+f) +Traceback (most recent call last): +... +ValueError: Not all read quorums intersect all write quorums ``` We can check whether a given set is a read or write quorum. Note that any superset of a quorum is also considered a quorum. ```python -grid.is_read_quorum({'a', 'b', 'c'}) # True -grid.is_read_quorum({'a', 'b', 'c', 'd'}) # True -grid.is_read_quorum({'a', 'b', 'd'}) # False - -grid.is_write_quorum({'a', 'd'}) # True -grid.is_write_quorum({'a', 'd', 'd'}) # True -grid.is_write_quorum({'a', 'b'}) # False +>>> grid.is_read_quorum({'a', 'b', 'c'}) +True +>>> grid.is_read_quorum({'a', 'b', 'c', 'd'}) +True +>>> grid.is_read_quorum({'a', 'b', 'd'}) +False +>>> +>>> grid.is_write_quorum({'a', 'd'}) +True +>>> grid.is_write_quorum({'a', 'd', 'd'}) +True +>>> grid.is_write_quorum({'a', 'b'}) +False ``` ## Resilience @@ -107,9 +124,12 @@ quorum, so our write resilience is 1. The resilience is the minimum of 1 and 2, which is 1. ```python -grid.read_resilience() # 1 -grid.write_resilience() # 2 -grid.resilience() # 1 +>>> grid.read_resilience() +1 +>>> grid.write_resilience() +2 +>>> grid.resilience() +1 ``` ## Strategies @@ -125,19 +145,19 @@ in weights, and the library will automatically normalize the weights into a valid probability distribution. ```python -# The read quorum strategy. -sigma_r = { - frozenset({'a', 'b', 'c'}): 2., - frozenset({'d', 'e', 'f'}): 1., -} - -# The write quorum strategy. -sigma_w = { - frozenset({'a', 'd'}): 1., - frozenset({'b', 'e'}): 1., - frozenset({'c', 'f'}): 1., -} -strategy = grid.make_strategy(sigma_r, sigma_w) +>>> # The read quorum strategy. +>>> sigma_r = { +... frozenset({'a', 'b', 'c'}): 2., +... frozenset({'d', 'e', 'f'}): 1., +... } +>>> +>>> # The write quorum strategy. +>>> sigma_w = { +... frozenset({'a', 'd'}): 1., +... frozenset({'b', 'e'}): 1., +... frozenset({'c', 'f'}): 1., +... } +>>> strategy = grid.make_strategy(sigma_r, sigma_w) ``` Once we have a strategy, we can use it to sample read and write quorums. Here, @@ -146,14 +166,20 @@ row, and we expect `get_write_quorum` to return every column uniformly at random. ```python -print(strategy.get_read_quorum()) -print(strategy.get_read_quorum()) -print(strategy.get_read_quorum()) -print(strategy.get_read_quorum()) -print(strategy.get_write_quorum()) -print(strategy.get_write_quorum()) -print(strategy.get_write_quorum()) -print(strategy.get_write_quorum()) +>>> strategy.get_read_quorum() +{'a', 'b', 'c'} +>>> strategy.get_read_quorum() +{'a', 'b', 'c'} +>>> strategy.get_read_quorum() +{'d', 'e', 'f'} +>>> strategy.get_write_quorum() +{'b', 'e'} +>>> strategy.get_write_quorum() +{'c', 'f'} +>>> strategy.get_write_quorum() +{'b', 'e'} +>>> strategy.get_write_quorum() +{'a', 'd'} ``` ## Load and Capacity @@ -187,7 +213,8 @@ The largest node load is 2/3, so our strategy has a load of 2/3. Rather than calculating load by hand, we can simply call the `load` function. ```python -print(strategy.load(read_fraction=1)) # 2/3 +>>> strategy.load(read_fraction=1) +0.6666666666666666 ``` Now let's calculate the load of our strategy assuming a 100% write workload. @@ -211,7 +238,8 @@ than calculating load by hand, we can simply call the `load` function. Note that we can pass in a `read_fraction` or `write_fraction` but not both. ```python -print(strategy.load(write_fraction=1)) # 1/3 +>>> strategy.load(write_fraction=1) +0.3333333333333333 ``` Now let's calculate the load of our strategy on a 25% read and 75% write @@ -247,18 +275,25 @@ point, you can see that calculating load by hand is extremely tedious. We could have skipped all that work and called `load` instead! ```python -print(strategy.load(read_fraction=0.25)) # 5/12 +>>> strategy.load(read_fraction=0.25) +0.41666666666666663 ``` We can also compute the load on every node. ```python -print(strategy.node_load(a, read_fraction=0.25)) # 5/12 -print(strategy.node_load(b, read_fraction=0.25)) # 5/12 -print(strategy.node_load(c, read_fraction=0.25)) # 5/12 -print(strategy.node_load(d, read_fraction=0.25)) # 1/3 -print(strategy.node_load(e, read_fraction=0.25)) # 1/3 -print(strategy.node_load(f, read_fraction=0.25)) # 1/3 +>>> print(strategy.node_load(a, read_fraction=0.25)) +0.41666666666666663 +>>> print(strategy.node_load(b, read_fraction=0.25)) +0.41666666666666663 +>>> print(strategy.node_load(c, read_fraction=0.25)) +0.41666666666666663 +>>> print(strategy.node_load(d, read_fraction=0.25)) +0.3333333333333333 +>>> print(strategy.node_load(e, read_fraction=0.25)) +0.3333333333333333 +>>> print(strategy.node_load(f, read_fraction=0.25)) +0.3333333333333333 ``` Our strategy has a load of 5/12 on a 25% read workload, but what about the @@ -267,14 +302,11 @@ strategy is not optimal. We can call the `strategy` function to compute the optimal strategy automatically. ```python -strategy = grid.strategy(read_fraction=0.25) -print(strategy) -# Strategy(reads={('a', 'b', 'c'): 0.5, -# ('d', 'e', 'f'): 0.5}, -# writes={('a', 'f'): 0.33333333, -# ('b', 'e'): 0.33333333, -# ('c', 'd'): 0.33333333}) -print(strategy.load(read_fraction=0.25)) # 3/8 +>>> strategy = grid.strategy(read_fraction=0.25) +>>> strategy +Strategy(reads={('a', 'b', 'c'): 0.5, ('d', 'e', 'f'): 0.5}, writes={('a', 'f'): 0.33333333, ('b', 'e'): 0.33333333, ('c', 'd'): 0.33333333}) +>>> strategy.load(read_fraction=0.25)) +0.3749999975 ``` Here, we see that the optimal strategy picks all rows and all columns @@ -287,16 +319,20 @@ this strategy is optimal for a read fraction of 25%, but it may not be optimal for other read fractions. ```python -print(strategy.load(read_fraction=0)) # 1/3 -print(strategy.load(read_fraction=0.5)) # 5/12 -print(strategy.load(read_fraction=1)) # 1/2 +>>> strategy.load(read_fraction=0) +0.33333333 +>>> strategy.load(read_fraction=0.5) +0.416666665 +>>> strategy.load(read_fraction=1) +0.5 ``` We can also use a quorum system's `load` function. The code snippet below is a shorthand for `grid.strategy(read_fraction=0.25).load(read_fraction=0.25)`. ```python -grid.load(read_fraction=0.25) # 0.375 +>>> grid.load(read_fraction=0.25) +0.3749999975 ``` The capacity of strategy or quorum is simply the inverse of the load. Our @@ -304,7 +340,8 @@ quorum system has a load of 3/8 on a 25% read workload, so it has a capacity of 8/3. ```python -print(grid.capacity(read_fraction=0.25)) # 8/3 +>>> grid.capacity(read_fraction=0.25) +2.6666666844444444 ``` The _capacity_ of a quorum system is proportional to the maximum throughput @@ -321,9 +358,10 @@ will return the strategy that minimizes the expected load according to this distribution. ```python -distribution = {0.1: 1, 0.75: 1} -strategy = grid.strategy(read_fraction=distribution) -strategy.load(read_fraction=distribution) # 0.404 +>>> distribution = {0.1: 1, 0.75: 1} +>>> strategy = grid.strategy(read_fraction=distribution) +>>> strategy.load(read_fraction=distribution) +0.40416666474999996 ``` ## Heterogeneous Node @@ -334,12 +372,12 @@ this, we instantiate every node with its capacity. Here, nodes `a`, `c`, and only process 500 requests per second. ```python -a = Node('a', capacity=1000) -b = Node('b', capacity=500) -c = Node('c', capacity=1000) -d = Node('d', capacity=500) -e = Node('e', capacity=1000) -f = Node('f', capacity=500) +>>> a = Node('a', capacity=1000) +>>> b = Node('b', capacity=500) +>>> c = Node('c', capacity=1000) +>>> d = Node('d', capacity=500) +>>> e = Node('e', capacity=1000) +>>> f = Node('f', capacity=500) ``` Now, the definition of capacity becomes much simpler. The capacity of a quorum @@ -348,10 +386,12 @@ interpreted as the inverse of the capacity. Here, our quorum system is capable of processing 1333 commands per second for a workload of 75% reads. ```python -grid = QuorumSystem(reads=a*b*c + d*e*f) -strategy = grid.strategy(read_fraction=0.75) -strategy.load(read_fraction=0.75) # 0.00075 -strategy.capacity(read_fraction=0.75) # 1333 +>>> grid = QuorumSystem(reads=a*b*c + d*e*f) +>>> strategy = grid.strategy(read_fraction=0.75) +>>> strategy.load(read_fraction=0.75) +0.00075 +>>> strategy.capacity(read_fraction=0.75) +1333.3333333333333 ``` Nodes might also process reads and writes at different speeds. We can specify @@ -359,25 +399,28 @@ the peak read and write throughput of every node separately. Here, we assume reads are ten times as fast as writes. ```python -a = Node('a', write_capacity=1000, read_capacity=10000) -b = Node('b', write_capacity=500, read_capacity=5000) -c = Node('c', write_capacity=1000, read_capacity=10000) -d = Node('d', write_capacity=500, read_capacity=5000) -e = Node('e', write_capacity=1000, read_capacity=10000) -f = Node('f', write_capacity=500, read_capacity=5000) +>>> a = Node('a', write_capacity=1000, read_capacity=10000) +>>> b = Node('b', write_capacity=500, read_capacity=5000) +>>> c = Node('c', write_capacity=1000, read_capacity=10000) +>>> d = Node('d', write_capacity=500, read_capacity=5000) +>>> e = Node('e', write_capacity=1000, read_capacity=10000) +>>> f = Node('f', write_capacity=500, read_capacity=5000) ``` With 100% reads, our quorum system can process 10,000 commands per second. This throughput decreases as we increase the fraction of writes. ```python -grid = QuorumSystem(reads=a*b*c + d*e*f) -grid.capacity(read_fraction=1) # 10,000 -grid.capacity(read_fraction=0.5) # 3913 -grid.capacity(read_fraction=0) # 2000 +>>> grid = QuorumSystem(reads=a*b*c + d*e*f) +>>> grid.capacity(read_fraction=1) +10000.0 +>>> grid.capacity(read_fraction=0.5) +3913.043450018904 +>>> grid.capacity(read_fraction=0) +2000.0 ``` -# `f`-resilient Strategies +## `f`-resilient Strategies Another real world complication is the fact that machines sometimes fail and are sometimes slow. If we contact a quorum of nodes, some of them may fail, and we'll get stuck waiting to hear back from them. Or, some of them may be @@ -391,68 +434,231 @@ By default, `strategy` returns `0`-resilient quorums. We can pass in the `f` argument to get more resilient strategies. ```python -strategy = grid.strategy(read_fraction=0.5, f=1) +>>> strategy = grid.strategy(read_fraction=0.5, f=1) ``` These sets are quorums even if 1 machine fails. ```python -strategy.get_read_quorum() -strategy.get_write_quorum() +>>> strategy.get_read_quorum() +{'b', 'f', 'e', 'd', 'a', 'c'} +>>> strategy.get_write_quorum() +{'b', 'd', 'a', 'e'} +``` + +Note that as we increase resilience, quorums get larger, and we decrease +capacity. On a 100% write workload, our grid quorum system has a 0-resilient +capacity of 2000 commands per second, but a 1-resilient capacity of 1000 +commands per second. + +```python +>>> grid.capacity(write_fraction=1, f=0) +2000.0 +>>> grid.capacity(write_fraction=1, f=1) +1000.0 +``` + +Also note that not all quorum systems are equally as resilient. In the next +code snippet, we construct a "write 2, read 3" quorum system using the `choose` +function. For this quorum system, every set of 2 nodes is a write quorum, and +every set of 3 nodes is a read quorum. This quorum system has a 0-resilient +capacity of 2000 (the same as the grid), but a 1-resilient capacity of 1333 +(higher than the grid). + +```python +>>> write2 = QuorumSystem(writes=choose(2, [a, b, c, d, e])) +>>> write2.capacity(write_fraction=1, f=0) +2000.0 +>>> write2.capacity(write_fraction=1, f=1) +1333.3333333333333 ``` ## Latency -TODO(mwhittaker): Write. +In the real world, not all nodes are equally as far away. Some are close and +some are far. To address this, we associate every node with a latency, i.e. the +time the required to contact the node. We model this in quoracle by assigning +each node a latency, represented as a `datetime.timedelta`. Here, nodes `a`, +`b`, `c`, `d`, `e`, and `f` in our grid have latencies of 1, 2, 3, 4, 5, and 6 +seconds. + +```python +>>> import datetime +>>> +>>> def seconds(x: int) -> datetime.timedelta: +>>> return datetime.timedelta(seconds=x) +>>> +>>> a = Node('a', write_capacity=1000, read_capacity=10000, latency=seconds(1)) +>>> b = Node('b', write_capacity=500, read_capacity=5000, latency=seconds(2)) +>>> c = Node('c', write_capacity=1000, read_capacity=10000, latency=seconds(3)) +>>> d = Node('d', write_capacity=500, read_capacity=5000, latency=seconds(4)) +>>> e = Node('e', write_capacity=1000, read_capacity=10000, latency=seconds(5)) +>>> f = Node('f', write_capacity=500, read_capacity=5000, latency=seconds(6)) +>>> grid = QuorumSystem(reads=a*b*c + d*e*f) +``` + +The _latency of a quorum_ `q` is the time required to form a quorum of +responses after contacting every node in `q`. For example, the read quorum `{a, +b, c}` has a latency of three seconds. It takes 1 second to hear back from `a`, +another second to hear back from `b`, and then a final second to hear back from +`c`. The write quorum `{a, b, d, f}` has a latency of 4 seconds. It takes 1 +second to hear back from `a`, another second to hear back from `b`, and then +another 2 seconds to hear back from `d`. The set `{a, b, d}` is a write quorum, +so the latency of this quorum is 4 seconds. Note that we didn't have to wait to +hear back from `f` in order to form a quorum. + +The _latency of a strategy_ is the expected latency of the quorums that it +chooses. The _latency of a quorum system_ is the latency of the latency-optimal +strategy. We can use the `strategy` function to find a latency-optimal strategy +by passing in the value `"latency"` to the `optimize` flag. + +```python +>>> sigma = grid.strategy(read_fraction=0.5, optimize='latency') +>>> sigma +Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('c', 'd'): 1.0}) +``` + +We can find the latency of this strategy by calling the `latency` function. + +```python +>>> sigma.latency(read_fraction=1) +0:00:03 +>>> sigma.latency(read_fraction=0) +0:00:04 +>>> sigma.latency(read_fraction=0.5) +0:00:03.500000 +``` + +As with capacity, we can call the `latency` function on our quorum system +directly. In the follow code snippet `grid.latency(read_fraction=0.5, +optimize='latency')` is a shorthand for `grid.strategy(read_fraction=0.5, +optimize='latency').latency(read_fraction=0.5)`. + +``` +>>> grid.latency(read_fraction=0.5, optimize='latency') +0:00:03.500000 +``` + +Note that finding the latency-optimal strategy is trivial. The latency-optimal +strategy always selects the read and write quorum with the smallest latencies. +However, things get complicated when we start optimizing for capacity and +latency at the same time. When we call the `strategy` function with +`optimize='latency'`, we can pass in a constraint on the maximum allowable load +using the `load_limit` argument. For example, in the code snippet below, we +find the latency-optimal strategy with a capacity of at least 1,500. + +```python +>>> sigma = grid.strategy(read_fraction=0.5, +... optimize='latency', +... load_limit=1/1500) +>>> sigma +Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('a', 'd'): 0.66666667, ('c', 'e'): 0.33333333}) +>>> sigma.capacity(read_fraction=0.5) +1499.9999925 +>>> sigma.latency(read_fraction=0.5) +0:00:03.666667 +``` + +This strategy always picks the read quorum `{a, b, c}`, and picks the write +quorum `{a, d}` twice as often as write quorum `{c, e}`. It achieves our +desired capacity of 1,500 commands per second (ignoring rounding errors) and +has a latency of 3.66 seconds. We can also find a load-optimal strategy with a +latency constraint. + +```python +>>> sigma = grid.strategy(read_fraction=0.5, +... optimize='load', +... latency_limit=seconds(4)) +>>> sigma +Strategy(reads={('a', 'b', 'c'): 0.98870056, ('d', 'e', 'f'): 0.011299435}, writes={('a', 'd'): 0.19548023, ('a', 'f'): 0.22429379, ('b', 'd'): 0.062711864, ('b', 'e'): 0.097740113, ('c', 'e'): 0.41977401}) +>>> sigma.capacity(read_fraction=0.5) +3856.2090893331633 +>>> sigma.latency(read_fraction=0.5) +0:00:04.000001 +``` + +This strategy is rather complicated and would be hard to find by hand. It has a +capacity of 3856 commands per second and achieves our latency constraint of 4 +seconds. + +Be careful when specifying constraints. If the constraints cannot be met, a +`NoStrategyFound` exception is raised. + +```python +>>> grid.strategy(read_fraction=0.5, +... optimize='load', +... latency_limit=seconds(1)) +Traceback (most recent call last): +... +quoracle.quorum_system.NoStrategyFoundError: no strategy satisfies the given constraints +``` ## Network Load -TODO(mwhittaker): Write. +Another useful metric is network load. When a protocol performs a read, it has +to send messages to every node in a read quorum, and when a protocol performs a +write, it has to send messages to every node in a write quorum. The bigger the +quorums, the more messages are sent over the network. The _network load of a +quorum_ is simply the size of the quorum, the _network load of a strategy_ is +the expected network load of the quorums it chooses, and the _network load of a +quorum system_ is the network load of the network load-optimal strategy. + +We can find network load optimal-strategies using the `strategy` function by +passing in `"network"` to the `optimize` flag. We can also specify constraints +on load and latency. In general, using the `strategy` function, we can pick one +of load, latency, or network load to optimize and specify constraints on the +other two metrics. + +```python +>>> sigma = grid.strategy(read_fraction=0.5, optimize='network') +>>> sigma +Strategy(reads={('a', 'b', 'c'): 1.0}, writes={('c', 'f'): 1.0}) +>>> sigma.network_load(read_fraction=0.5) +2.5 +>>> grid.network_load(read_fraction=0.5, optimize='network') +2.5 +>>> sigma = grid.strategy(read_fraction=0.5, +... optimize='network', +... load_limit=1/2000, +... latency_limit=seconds(4)) +``` ## Search -TODO(mwhittaker): Write. +Finding good quorum systems by hand is hard. quoracle includes a heuristic +based search procedure that tries to find quorum systems that are optimal with +respect a target metric and set of constraints. For example, lets try to find a +quorum system -## Case Study -TODO(mwhittaker): Update. +- that has resilience 1, +- that is 1-resilient load optimal for a 75% read workload, +- that has a latency of most 4 seconds, and +- that has a network load of at most 4. -Putting everything together, we can use this library to pick quorum systems -that are well suited to our workload. For example, say we're implementing a -distributed file system and want to pick a 5 node quorum system with a -resilience of 1 that has a good load on workloads that are 90% reads 90% of the -time and 10% reads 10% of the time. We can try out three quorum systems: a -simple majority quorum system, a crumbling walls quorum system, and a paths -quorum system. +Because the number of quorum systems is enormous, the search procedure can take +a very, very long time. We pass in a timeout to the search procedure to limit +how long it takes. If the timeout expires, `search` returns the most optimal +quorum system that it found so far. ```python -simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) -crumbling_walls = QuorumSystem(reads=a*b + c*d*e) -paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) +## Search +>>> qs, sigma = search(nodes=[a, b, c, d, e, f], +... resilience=1, +... f=1, +... read_fraction=0.75, +... optimize='load', +... latency_limit=seconds(4), +... network_limit=4, +... timeout=seconds(60)) +>>> qs +QuorumSystem(reads=choose3(a, c, e, (b + d + f)), writes=choose2(a, c, e, (b * d * f))) +>>> sigma +Strategy(reads={('a', 'c', 'e', 'f'): 0.33333333, ('a', 'b', 'c', 'e'): 0.33333333, ('a', 'c', 'd', 'e'): 0.33333333}, writes={('a', 'b', 'c', 'd', 'f'): 0.15714286, ('b', 'c', 'd', 'e', 'f'): 0.15714286, ('a', 'c', 'e'): 0.52857143, ('a', 'b', 'd', 'e', 'f'): 0.15714286}) +>>> sigma.capacity(read_fraction=0.75) +3499.9999536250007 +>>> sigma.latency(read_fraction=0.75) +0:00:03.907143 +>>> sigma.network_load(read_fraction=0.75) +3.9857142674999997 ``` -We make sure we have the desired resilience. - -```python -assert(simple_majority.resilience() >= 1) -assert(crumbling_walls.resilience() >= 1) -assert(paths.resilience() >= 1) -``` - -We check the loads and see that the crumbling walls quorum system has the -highest load, so we use the crumbling walls quorum system to implement our file -system. - -```python -distribution = {0.9: 0.9, 0.1: 0.1} -simple_majority.capacity(read_fraction=distribution) # 5089 -crumbling_walls.capacity(read_fraction=distribution) # 5837 -paths.capacity(read_fraction=distribution) # 5725 -``` - -Maybe some time later, we experiencing high latency because of stragglers and -want to switch to a 1-resilient strategy. We again compute the loads, but now -see that the simple majority quorum system has the highest load, so we switch -from the crumbling walls quorum system to the simple majority quorum system. - -```python -simple_majority.capacity(read_fraction=distribution, f=1) # 3816 -crumbling_walls.capacity(read_fraction=distribution, f=1) # 1908 -paths.capacity(read_fraction=distribution, f=1) # 1908 -``` +Here, the search procedure returns the quorum system `choose(3, [a, c, e, +b+d+f])` with a capacity of 3500 commands per second and with latency and +network load close to the limits specified. diff --git a/examples/case_study.py b/examples/case_study.py index 2f74b74..a18ea76 100644 --- a/examples/case_study.py +++ b/examples/case_study.py @@ -44,9 +44,9 @@ def main() -> None: print('0-resilient Searched') start = datetime.datetime.now() - opt = search(nodes=[a, b, c, d, e], - resilience=1, - read_fraction=fr) + opt, _ = search(nodes=[a, b, c, d, e], + resilience=1, + read_fraction=fr) stop = datetime.datetime.now() print((stop - start)) sigma = opt.strategy(read_fraction=fr) @@ -87,7 +87,7 @@ def main() -> None: print('1-resilient Searched') start = datetime.datetime.now() - opt = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, f=1) + opt, _ = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, f=1) stop = datetime.datetime.now() print(stop - start) sigma = opt.strategy(read_fraction=fr, f=1) @@ -109,7 +109,7 @@ def main() -> None: print('Latency Optimal Searched') start = datetime.datetime.now() - opt = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, + opt, _ = search(nodes=[a, b, c, d, e], resilience=1, read_fraction=fr, optimize='latency', load_limit=1/2000) stop = datetime.datetime.now() print(stop - start) diff --git a/examples/paper.py b/examples/paper.py index dc4c46c..46524fb 100644 --- a/examples/paper.py +++ b/examples/paper.py @@ -70,7 +70,7 @@ def main() -> None: print() # Search. - qs = search( + qs, _ = search( nodes = [a, b, c, d], read_fraction = 1, optimize = 'latency', diff --git a/examples/plot_node_loads.py b/examples/plot_node_loads.py index 001ace6..e4ded1b 100644 --- a/examples/plot_node_loads.py +++ b/examples/plot_node_loads.py @@ -17,7 +17,7 @@ def main(): simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) crumbling_walls = QuorumSystem(reads=a*b + c*d*e) paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) - opt = search(nodes, read_fraction=fr, timeout=datetime.timedelta(seconds=9)) + opt, _ = search(nodes, read_fraction=fr, timeout=datetime.timedelta(seconds=9)) fig, ax = plt.subplots(3, 4, figsize = (6.3 * 2, 4.8 * 2), sharey='row') for i, qs in enumerate([simple_majority, crumbling_walls, paths, opt]): diff --git a/examples/tutorial.py b/examples/tutorial.py index 293e5c6..7d136c8 100644 --- a/examples/tutorial.py +++ b/examples/tutorial.py @@ -1,3 +1,4 @@ +## Quorum Systems from quoracle import * a = Node('a') @@ -30,10 +31,12 @@ print(grid.is_write_quorum({'a', 'd'})) # True print(grid.is_write_quorum({'a', 'd', 'd'})) # True print(grid.is_write_quorum({'a', 'b'})) # False +## Resilience print(grid.read_resilience()) # 1 print(grid.write_resilience()) # 2 print(grid.resilience()) # 1 +## Strategies # The read quorum strategy. sigma_r = { frozenset({'a', 'b', 'c'}): 2., @@ -57,6 +60,7 @@ print(strategy.get_write_quorum()) print(strategy.get_write_quorum()) print(strategy.get_write_quorum()) +## Load and Capacity print(strategy.load(read_fraction=1)) # 2/3 print(strategy.load(write_fraction=1)) # 1/3 @@ -82,10 +86,12 @@ print(grid.load(read_fraction=0.25)) # 3/8 print(grid.capacity(read_fraction=0.25)) # 8/3 +## Workload Distributions distribution = {0.1: 0.5, 0.75: 0.5} strategy = grid.strategy(read_fraction=distribution) print(strategy.load(read_fraction=distribution)) # 0.404 +## Heterogeneous Node a = Node('a', capacity=1000) b = Node('b', capacity=500) c = Node('c', capacity=1000) @@ -110,24 +116,82 @@ print(grid.capacity(read_fraction=1)) # 10,000 print(grid.capacity(read_fraction=0.5)) # 3913 print(grid.capacity(read_fraction=0)) # 2000 +## f-resilient Strategies strategy = grid.strategy(read_fraction=0.5, f=1) print(strategy.get_read_quorum()) print(strategy.get_write_quorum()) -simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) -crumbling_walls = QuorumSystem(reads=a*b + c*d*e) -paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) +print(grid.capacity(write_fraction=1, f=0)) +print(grid.capacity(write_fraction=1, f=1)) -assert(simple_majority.resilience() >= 1) -assert(crumbling_walls.resilience() >= 1) -assert(paths.resilience() >= 1) +write2 = QuorumSystem(writes=choose(2, [a, b, c, d, e])) +print(write2.capacity(write_fraction=1, f=0)) +print(write2.capacity(write_fraction=1, f=1)) -distribution = {0.9: 0.9, 0.1: 0.1} -print(simple_majority.capacity(read_fraction=distribution)) # 5089 -print(crumbling_walls.capacity(read_fraction=distribution)) # 5824 -print(paths.capacity(read_fraction=distribution)) # 5725 +## Latency +import datetime -print(simple_majority.capacity(read_fraction=distribution, f=1)) # 3816 -print(crumbling_walls.capacity(read_fraction=distribution, f=1)) # 1908 -print(paths.capacity(read_fraction=distribution, f=1)) # 1908 +def seconds(x: int) -> datetime.timedelta: + return datetime.timedelta(seconds=x) + +a = Node('a', write_capacity=1000, read_capacity=10000, latency=seconds(1)) +b = Node('b', write_capacity=500, read_capacity=5000, latency=seconds(2)) +c = Node('c', write_capacity=1000, read_capacity=10000, latency=seconds(3)) +d = Node('d', write_capacity=500, read_capacity=5000, latency=seconds(4)) +e = Node('e', write_capacity=1000, read_capacity=10000, latency=seconds(5)) +f = Node('f', write_capacity=500, read_capacity=5000, latency=seconds(6)) +grid = QuorumSystem(reads=a*b*c + d*e*f) + +sigma = grid.strategy(read_fraction=0.5, optimize='latency') +print(sigma) + +print(sigma.latency(read_fraction=1)) +print(sigma.latency(read_fraction=0)) +print(sigma.latency(read_fraction=0.5)) + +print(grid.latency(read_fraction=0.5, optimize='latency')) + +sigma = grid.strategy(read_fraction=0.5, + optimize='latency', + load_limit=1/1500) +print(sigma) +print(sigma.capacity(read_fraction=0.5)) +print(sigma.latency(read_fraction=0.5)) + +sigma = grid.strategy(read_fraction=0.5, + optimize='load', + latency_limit=seconds(4)) +print(sigma) +print(sigma.capacity(read_fraction=0.5)) +print(sigma.latency(read_fraction=0.5)) + +# grid.strategy(read_fraction=0.5, +# optimize='load', +# latency_limit=seconds(1)) +# quoracle.quorum_system.NoStrategyFoundError: no strategy satisfies the given constraints + +## Network Load +sigma = grid.strategy(read_fraction=0.5, optimize='network') +print(sigma) +print(sigma.network_load(read_fraction=0.5)) +print(grid.network_load(read_fraction=0.5, optimize='network')) +sigma = grid.strategy(read_fraction=0.5, + optimize='network', + load_limit=1/2000, + latency_limit=seconds(4)) + +## Search +qs, sigma = search(nodes=[a, b, c, d, e, f], + resilience=1, + f=1, + read_fraction=0.75, + optimize='load', + latency_limit=seconds(4), + network_limit=4, + timeout=seconds(60)) +print(qs) +print(sigma) +print(sigma.capacity(read_fraction=0.75)) +print(sigma.latency(read_fraction=0.75)) +print(sigma.network_load(read_fraction=0.75)) diff --git a/quoracle/search.py b/quoracle/search.py index 704a26e..16be3f0 100644 --- a/quoracle/search.py +++ b/quoracle/search.py @@ -1,7 +1,7 @@ from .distribution import Distribution from .expr import choose, Expr, Node from .quorum_system import (LATENCY, LOAD, NETWORK, NoStrategyFoundError, - QuorumSystem, Strategy) + QuorumSystem, Strategy, Tuple) from typing import Iterator, List, Optional, TypeVar import datetime import itertools @@ -83,7 +83,7 @@ def search(nodes: List[Node[T]], latency_limit: Optional[datetime.timedelta] = None, f: int = 0, timeout: datetime.timedelta = datetime.timedelta(seconds=0)) \ - -> QuorumSystem[T]: + -> Tuple[QuorumSystem[T], Strategy[T]]: start_time = datetime.datetime.now() def metric(sigma: Strategy[T]) -> float: @@ -95,10 +95,12 @@ def search(nodes: List[Node[T]], return sigma.latency(read_fraction, write_fraction).total_seconds() opt_qs: Optional[QuorumSystem[T]] = None + opt_sigma: Optional[Strategy[T]] = None opt_metric: Optional[float] = None def do_search(exprs: Iterator[Expr[T]]) -> None: nonlocal opt_qs + nonlocal opt_sigma nonlocal opt_metric for reads in exprs: @@ -117,6 +119,7 @@ def search(nodes: List[Node[T]], sigma_metric = metric(sigma) if opt_metric is None or sigma_metric < opt_metric: opt_qs = qs + opt_sigma = sigma opt_metric = sigma_metric except NoStrategyFoundError: pass @@ -131,4 +134,5 @@ def search(nodes: List[Node[T]], if opt_qs is None: raise ValueError('no quorum system found') else: - return opt_qs + assert opt_sigma is not None + return (opt_qs, opt_sigma)