From 2a1b56e987ea34cb60d2fe4d0b44940ebc3b4a2c Mon Sep 17 00:00:00 2001 From: Michael Whittaker Date: Wed, 27 Jan 2021 14:56:00 -0800 Subject: [PATCH] Fixed buggy load computation. I was incorrectly computing load on a distribution of read fractions. I have to compute the load for each fr separately and then weight them. --- README.md | 2 +- quorums/quorum_system.py | 109 +++++++++++++++++++++++++++++++++------ quorums/strategy.py | 54 ++++++++++++------- tutorial.py | 102 ++++++++++++++++++++++++++++++++++++ 4 files changed, 229 insertions(+), 38 deletions(-) create mode 100644 tutorial.py diff --git a/README.md b/README.md index b8cd391..473bfd9 100644 --- a/README.md +++ b/README.md @@ -267,7 +267,7 @@ system. ```python distribution = {0.9: 0.9, 0.1: 0.1} simple_majority.capacity(read_fraction=distribution) # 5089 -crumbling_walls.capacity(read_fraction=distribution) # 6824 +crumbling_walls.capacity(read_fraction=distribution) # 5837 paths.capacity(read_fraction=distribution) # 5725 ``` diff --git a/quorums/quorum_system.py b/quorums/quorum_system.py index d99ba1a..34b0544 100644 --- a/quorums/quorum_system.py +++ b/quorums/quorum_system.py @@ -129,17 +129,82 @@ class QuorumSystem(Generic[T]): write_quorums: List[Set[T]], read_fraction: Dict[float, float]) \ -> 'Strategy[T]': - # TODO(mwhittaker): Explain f_r calculation. - fr = sum(f * weight for (f, weight) in read_fraction.items()) + """ + Consider the following 2x2 grid quorum system. + a b + + c d + + with + + read_quorums = [{a, b}, {c, d}] + write_quorums = [{a, c}, {a, d}, {b, c}, {b, d}] + + We can form a linear program to compute the optimal load of this quorum + system for some fixed read fraction fr as follows. First, we create a + variable ri for every read quorum i and a variable wi for every write + quorum i. ri represents the probabilty of selecting the ith read + quorum, and wi represents the probabilty of selecting the ith write + quorum. We introduce an additional variable l that represents the load + and solve the following linear program. + + min L subject to + r0 + r1 + r2 = 1 + w0 + w1 = 1 + fr (r0) + (1 - fr) (w0 + w1) <= L # a's load + fr (r0) + (1 - fr) (w2 + w3) <= L # b's load + fr (r1) + (1 - fr) (w0 + w2) <= L # c's load + fr (r1) + (1 - fr) (w1 + w3) <= L # d's load + + If we assume every element x has read capacity rcap_x and write + capacity wcap_x, then we adjust the linear program like this. + + min L subject to + r0 + r1 + r2 = 1 + w0 + w1 = 1 + fr/rcap_a (r0) + (1 - fr)/wcap_a (w0 + w1) <= L # a's load + fr/rcap_b (r0) + (1 - fr)/wcap_b (w2 + w3) <= L # b's load + fr/rcap_c (r1) + (1 - fr)/wcap_c (w0 + w2) <= L # c's load + fr/rcap_d (r1) + (1 - fr)/wcap_d (w1 + w3) <= L # d's load + + Assume we have fr = 0.9 with 80% probabilty and fr = 0.5 with 20%. Then + we adjust the linear program as follows to find the strategy that + minimzes the average load. + + min 0.8 * L_0.9 + 0.2 * L_0.5 subject to + r0 + r1 + r2 = 1 + w0 + w1 = 1 + 0.9/rcap_a (r0) + 0.1/wcap_a (w0 + w1) <= L_0.9 # a's load + 0.9/rcap_b (r0) + 0.1/wcap_b (w2 + w3) <= L_0.9 # b's load + 0.9/rcap_c (r1) + 0.1/wcap_c (w0 + w2) <= L_0.9 # c's load + 0.9/rcap_d (r1) + 0.1/wcap_d (w1 + w3) <= L_0.9 # d's load + 0.5/rcap_a (r0) + 0.5/wcap_a (w0 + w1) <= L_0.5 # a's load + 0.5/rcap_b (r0) + 0.5/wcap_b (w2 + w3) <= L_0.5 # b's load + 0.5/rcap_c (r1) + 0.5/wcap_c (w0 + w2) <= L_0.5 # c's load + 0.5/rcap_d (r1) + 0.5/wcap_d (w1 + w3) <= L_0.5 # d's load + """ nodes = self.reads.nodes() | self.writes.nodes() read_capacity = {node.x: node.read_capacity for node in nodes} write_capacity = {node.x: node.write_capacity for node in nodes} + # Create a variable for every read quorum and every write quorum. While + # we do this, map each element x to the read and write quorums that + # it's in. For example, image we have the following read and write + # quorums: + # + # read_quorums = [{a}, {a, b}, {a, c}] + # write_quorums = [{a, b}, {a, b, c}] + # + # Then, we'd have + # + # read_quorum_vars = [r0, r1, 2] + # write_quorum_vars = [w0, w1] + # x_to_read_quorum_vars = {a: [r1, r2, r3], b: [r1], c: [r2]} + # x_to_write_quorum_vars = {a: [w1, w2], b: [w2, w2], c: [w2]} read_quorum_vars: List[pulp.LpVariable] = [] x_to_read_quorum_vars: Dict[T, List[pulp.LpVariable]] = \ collections.defaultdict(list) - for (i, read_quorum) in enumerate(read_quorums): v = pulp.LpVariable(f'r{i}', 0, 1) read_quorum_vars.append(v) @@ -155,26 +220,36 @@ class QuorumSystem(Generic[T]): for x in write_quorum: x_to_write_quorum_vars[x].append(v) + # Create a variable for every load. + load_vars = {fr: pulp.LpVariable(f'l_{fr}', 0, 1) + for fr in read_fraction.keys()} + # Form the linear program to find the load. problem = pulp.LpProblem("load", pulp.LpMinimize) - # If we're trying to balance the strategy, then we want to minimize the - # pairwise absolute differences between the read probabilities and the - # write probabilities. - l = pulp.LpVariable('l', 0, 1) - problem += l + # First, we add our objective. + problem += sum(weight * load_vars[fr] + for (fr, weight) in read_fraction.items()) + + # Next, we make sure that the probabilities we select form valid + # probabilty distributions. problem += (sum(read_quorum_vars) == 1, 'valid read strategy') problem += (sum(write_quorum_vars) == 1, 'valid write strategy') - for node in nodes: - x = node.x - x_load: pulp.LpAffineExpression = 0 - if x in x_to_read_quorum_vars: - x_load += fr * sum(x_to_read_quorum_vars[x]) / read_capacity[x] - if x in x_to_write_quorum_vars: - x_load += ((1 - fr) * sum(x_to_write_quorum_vars[x]) / - write_capacity[x]) - problem += (x_load <= l, x) + # Finally, we add constraints for every value of fr. + for fr, weight in read_fraction.items(): + for node in nodes: + x = node.x + x_load: pulp.LpAffineExpression = 0 + if x in x_to_read_quorum_vars: + x_load += (fr * sum(x_to_read_quorum_vars[x]) / + read_capacity[x]) + if x in x_to_write_quorum_vars: + x_load += ((1 - fr) * sum(x_to_write_quorum_vars[x]) / + write_capacity[x]) + problem += (x_load <= load_vars[fr], f'{x}{fr}') + + # Solve the linear program. problem.solve(pulp.apis.PULP_CBC_CMD(msg=False)) return ExplicitStrategy(nodes, read_quorums, diff --git a/quorums/strategy.py b/quorums/strategy.py index df46834..ff2fa72 100644 --- a/quorums/strategy.py +++ b/quorums/strategy.py @@ -44,6 +44,18 @@ class ExplicitStrategy(Strategy[T]): self.writes = writes self.write_weights = write_weights + self.unweighted_read_load: Dict[T, float] = \ + collections.defaultdict(float) + for (read_quorum, weight) in zip(self.reads, self.read_weights): + for x in read_quorum: + self.unweighted_read_load[x] += weight + + self.unweighted_write_load: Dict[T, float] = \ + collections.defaultdict(float) + for (write_quorum, weight) in zip(self.writes, self.write_weights): + for x in write_quorum: + self.unweighted_write_load[x] += weight + def __str__(self) -> str: non_zero_reads = {tuple(r): p for (r, p) in zip(self.reads, self.read_weights) @@ -66,29 +78,31 @@ class ExplicitStrategy(Strategy[T]): write_fraction: Optional[Distribution] = None) \ -> float: d = distribution.canonicalize_rw(read_fraction, write_fraction) - fr = sum(f * weight for (f, weight) in d.items()) + return sum(weight * self._load(fr) + for (fr, weight) in d.items()) - read_load: Dict[T, float] = collections.defaultdict(float) - for (read_quorum, weight) in zip(self.reads, self.read_weights): - for x in read_quorum: - read_load[x] += weight + def node_load(self, + x: T, + read_fraction: Optional[Distribution] = None, + write_fraction: Optional[Distribution] = None) \ + -> float: + d = distribution.canonicalize_rw(read_fraction, write_fraction) + return sum(weight * self._node_load(x, fr) + for (fr, weight) in d.items()) - write_load: Dict[T, float] = collections.defaultdict(float) - for (write_quorum, weight) in zip(self.writes, self.write_weights): - for x in write_quorum: - write_load[x] += weight + def _node_load(self, x: T, fr: float) -> float: + """ + _node_load returns the load on x given a fixed read fraction fr. + """ + fw = 1 - fr + return (fr * self.unweighted_read_load[x] / self.read_capacity[x] + + fw * self.unweighted_write_load[x] / self.write_capacity[x]) - loads: List[float] = [] - for node in self.nodes: - x = node.x - load = 0.0 - if x in read_load: - load += fr * read_load[x] / self.read_capacity[x] - if x in write_load: - load += (1 - fr) * write_load[x] / self.write_capacity[x] - loads.append(load) - - return max(loads) + def _load(self, fr: float) -> float: + """ + _load returns the load given a fixed read fraction fr. + """ + return max(self._node_load(node.x, fr) for node in self.nodes) # TODO(mwhittaker): Add read/write load and capacity and read/write cap. diff --git a/tutorial.py b/tutorial.py new file mode 100644 index 0000000..ed1a400 --- /dev/null +++ b/tutorial.py @@ -0,0 +1,102 @@ +from quorums import * + +a = Node('a') +b = Node('b') +c = Node('c') +d = Node('d') +e = Node('e') +f = Node('f') + +grid = QuorumSystem(reads=a*b*c + d*e*f) + +for r in grid.read_quorums(): + print(r) + +for w in grid.write_quorums(): + print(w) + +QuorumSystem(writes=(a + b + c) * (d + e + f)) + +QuorumSystem(reads=a*b*c + d*e*f, writes=(a + b + c) * (d + e + f)) + +print(grid.is_read_quorum({'a', 'b', 'c'})) # True +print(grid.is_read_quorum({'a', 'b', 'c', 'd'})) # True +print(grid.is_read_quorum({'a', 'b', 'd'})) # False + +print(grid.is_write_quorum({'a', 'd'})) # True +print(grid.is_write_quorum({'a', 'd', 'd'})) # True +print(grid.is_write_quorum({'a', 'b'})) # False + +print(grid.read_resilience()) # 1 +print(grid.write_resilience()) # 2 +print(grid.resilience()) # 1 + +strategy = grid.strategy(read_fraction=0.75) + +print(strategy.get_read_quorum()) +print(strategy.get_read_quorum()) +print(strategy.get_read_quorum()) +print(strategy.get_write_quorum()) +print(strategy.get_write_quorum()) +print(strategy.get_write_quorum()) + +print(strategy.load(read_fraction=0.75)) # 0.458 + +print(strategy.load(read_fraction=0)) # 0.333 +print(strategy.load(read_fraction=0.5)) # 0.416 +print(strategy.load(read_fraction=1)) # 0.5 + +print(grid.load(read_fraction=0.25)) # 0.375 + +distribution = {0.1: 0.5, 0.75: 0.5} +strategy = grid.strategy(read_fraction=distribution) +print(strategy.load(read_fraction=distribution)) # 0.404 + +strategy = grid.strategy(write_fraction=0.75) +print(strategy.load(write_fraction=distribution)) # 0.429 + +a = Node('a', capacity=1000) +b = Node('b', capacity=500) +c = Node('c', capacity=1000) +d = Node('d', capacity=500) +e = Node('e', capacity=1000) +f = Node('f', capacity=500) + +grid = QuorumSystem(reads=a*b*c + d*e*f) +strategy = grid.strategy(read_fraction=0.75) +print(strategy.load(read_fraction=0.75)) # 0.00075 +print(strategy.capacity(read_fraction=0.75)) # 1333 + +a = Node('a', write_capacity=1000, read_capacity=10000) +b = Node('b', write_capacity=500, read_capacity=5000) +c = Node('c', write_capacity=1000, read_capacity=10000) +d = Node('d', write_capacity=500, read_capacity=5000) +e = Node('e', write_capacity=1000, read_capacity=10000) +f = Node('f', write_capacity=500, read_capacity=5000) + +grid = QuorumSystem(reads=a*b*c + d*e*f) +print(grid.capacity(read_fraction=1)) # 10,000 +print(grid.capacity(read_fraction=0.5)) # 3913 +print(grid.capacity(read_fraction=0)) # 2000 + +strategy = grid.strategy(read_fraction=0.5, f=1) + +print(strategy.get_read_quorum()) +print(strategy.get_write_quorum()) + +simple_majority = QuorumSystem(reads=majority([a, b, c, d, e])) +crumbling_walls = QuorumSystem(reads=a*b + c*d*e) +paths = QuorumSystem(reads=a*b + a*c*e + d*e + d*c*b) + +assert(simple_majority.resilience() >= 1) +assert(crumbling_walls.resilience() >= 1) +assert(paths.resilience() >= 1) + +distribution = {0.9: 0.9, 0.1: 0.1} +print(simple_majority.capacity(read_fraction=distribution)) # 5089 +print(crumbling_walls.capacity(read_fraction=distribution)) # 6824 +print(paths.capacity(read_fraction=distribution)) # 5725 + +print(simple_majority.capacity(read_fraction=distribution, f=1)) # 3816 +print(crumbling_walls.capacity(read_fraction=distribution, f=1)) # 1908 +print(paths.capacity(read_fraction=distribution, f=1)) # 1908