diff --git a/.gitignore b/.gitignore index 0f6b627..3af54ff 100644 --- a/.gitignore +++ b/.gitignore @@ -25,5 +25,6 @@ rel/machi *.patch current_counterexample.eqc foo* +RUNLOG* typescript* *.swp diff --git a/FAQ.md b/FAQ.md index f2e37c1..5885bc5 100644 --- a/FAQ.md +++ b/FAQ.md @@ -13,8 +13,8 @@ + [1.1 What is Machi?](#n1.1) + [1.2 What is a Machi "cluster of clusters"?](#n1.2) + [1.2.1 This "cluster of clusters" idea needs a better name, don't you agree?](#n1.2.1) - + [1.3 What is Machi like when operating in "eventually consistent" mode?](#n1.3) - + [1.4 What is Machi like when operating in "strongly consistent" mode?](#n1.4) + + [1.3 What is Machi like when operating in "eventually consistent"/"AP mode"?](#n1.3) + + [1.4 What is Machi like when operating in "strongly consistent"/"CP mode"?](#n1.4) + [1.5 What does Machi's API look like?](#n1.5) + [1.6 What licensing terms are used by Machi?](#n1.6) + [1.7 Where can I find the Machi source code and documentation? Can I contribute?](#n1.7) @@ -120,7 +120,7 @@ For proof that naming things is hard, see [http://martinfowler.com/bliki/TwoHardThings.html](http://martinfowler.com/bliki/TwoHardThings.html) -### 1.3. What is Machi like when operating in "eventually consistent" mode? +### 1.3. What is Machi like when operating in "eventually consistent"/"AP mode"? Machi's operating mode dictates how a Machi cluster will react to network partitions. A network partition may be caused by: @@ -130,15 +130,17 @@ network partitions. A network partition may be caused by: * An extreme server software "hang" or "pause", e.g. caused by OS scheduling problems such as a failing/stuttering disk device. -The consistency semantics of file operations while in eventual -consistency mode during and after network partitions are: +"AP mode" refers to the "A" and "P" properties of the "CAP +conjecture", meaning that the cluster will be "Available" and +"Partition tolerant". + +The consistency semantics of file operations while in "AP mode" are +eventually consistent during and after network partitions: * File write operations are permitted by any client on the "same side" of the network partition. * File read operations are successful for any file contents where the client & server are on the "same side" of the network partition. - * File read operations will probably fail for any file contents where the - client & server are on "different sides" of the network partition. * After the network partition(s) is resolved, files are merged together from "all sides" of the partition(s). * Unique files are copied in their entirety. @@ -149,10 +151,16 @@ consistency mode during and after network partitions are: to rules which guarantee safe mergeability.). -### 1.4. What is Machi like when operating in "strongly consistent" mode? +### 1.4. What is Machi like when operating in "strongly consistent"/"CP mode"? -The consistency semantics of file operations while in strongly -consistency mode during and after network partitions are: +Machi's operating mode dictates how a Machi cluster will react to +network partitions. +"CP mode" refers to the "C" and "P" properties of the "CAP +conjecture", meaning that the cluster will be "Consistent" and +"Partition tolerant". + +The consistency semantics of file operations while in "CP mode" are +strongly consistent during and after network partitions: * File write operations are permitted by any client on the "same side" of the network partition if and only if a quorum majority of Machi servers @@ -197,12 +205,7 @@ Internally, there is a more complex protocol used by individual cluster members to manage file contents and to repair damaged/missing files. See Figure 3 in [Machi high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-machi.pdf) -for more description. - -The definitions of both the "high level" external protocol and "low -level" internal protocol are in a -[Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview) -definition at [./src/machi.proto](./src/machi.proto). +for more details. ### 1.6. What licensing terms are used by Machi? @@ -229,8 +232,8 @@ guidelines at ### 1.8. What is Machi's expected release schedule, packaging, and operating system/OS distribution support? -Basho expects that Machi's first major product release will take place -during the 2nd quarter of 2016. +Basho expects that Machi's first release will take place near the end +of calendar year 2015. Basho's official support for operating systems (e.g. Linux, FreeBSD), operating system packaging (e.g. CentOS rpm/yum package management, @@ -534,10 +537,6 @@ change several times during any single test case) and a random series of cluster operations, an event trace of all cluster activity is used to verify that no safety-critical rules have been violated. -All test code is available in the [./test](./test) subdirectory. -Modules that use QuickCheck will use a file suffix of `_eqc`, for -example, [./test/machi_ap_repair_eqc.erl](./test/machi_ap_repair_eqc.erl). - ### 3.5. Does Machi require shared disk storage? e.g. iSCSI, NBD (Network Block Device), Fibre Channel disks @@ -568,10 +567,7 @@ deploy multiple Machi servers per machine: one Machi server per disk. ### 3.7. What language(s) is Machi written in? -So far, Machi is written in 100% Erlang. Machi uses at least one -library, [ELevelDB](https://github.com/basho/eleveldb), that is -implemented both in C++ and in Erlang, using Erlang NIFs (Native -Interface Functions) to allow Erlang code to call C++ functions. +So far, Machi is written in 100% Erlang. In the event that we encounter a performance problem that cannot be solved within the Erlang/OTP runtime environment, all of Machi's @@ -592,16 +588,19 @@ bit-twiddling magicSPEED ... without also having to find a replacement for disterl. (Or without having to re-invent disterl's features in another language.) -All wire protocols used by Machi are defined & implemented using -[Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview). -The definition file can be found at [./src/machi.proto](./src/machi.proto). + +In the first drafts of the Machi code, the inter-node communication +uses a hand-crafted, artisanal, mostly ASCII protocol as part of a +"demo day" quick & dirty prototype. Work is underway (summer of 2015) +to replace that protocol gradually with a well-structured, +well-documented protocol based on Protocol Buffers data serialization. ### 3.9. Can I use HTTP to write/read stuff into/from Machi? -Short answer: No, not yet. - -Longer answer: No, but it was possible as a hack, many months ago, see +Yes, sort of. For as long as the legacy of +Machi's first internal protocol & code still +survives, it's possible to use a [primitive/hack'y HTTP interface that is described in this source code commit log](https://github.com/basho/machi/commit/6cebf397232cba8e63c5c9a0a8c02ba391b20fef). Please note that commit `6cebf397232cba8e63c5c9a0a8c02ba391b20fef` is required to try using this feature: the code has since bit-rotted and diff --git a/Makefile b/Makefile index 8cf5072..7ff19ed 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,9 @@ deps: clean: $(REBAR) -r clean +edoc: edoc-clean + $(REBAR) skip_deps=true doc + edoc-clean: rm -f edoc/*.png edoc/*.html edoc/*.css edoc/edoc-info diff --git a/README.md b/README.md index 28f77d2..d802080 100644 --- a/README.md +++ b/README.md @@ -1,117 +1,62 @@ -# Machi: a robust & reliable, distributed, highly available, large file store +# Machi [Travis-CI](http://travis-ci.org/basho/machi) :: ![Travis-CI](https://secure.travis-ci.org/basho/machi.png) -Outline +Our goal is a robust & reliable, distributed, highly available(*), +large file store based upon write-once registers, append-only files, +Chain Replication, and client-server style architecture. All members +of the cluster store all of the files. Distributed load +balancing/sharding of files is __outside__ of the scope of this +system. However, it is a high priority that this system be able to +integrate easily into systems that do provide distributed load +balancing, e.g., Riak Core. Although strong consistency is a major +feature of Chain Replication, first use cases will focus mainly on +eventual consistency features --- strong consistency design will be +discussed in a separate design document (read more below). -1. [Why another file store?](#sec1) -2. [Where to learn more about Machi](#sec2) -3. [Development status summary](#sec3) -4. [Contributing to Machi's development](#sec4) +The ability for Machi to maintain strong consistency will make it +attractive as a toolkit for building things like CORFU and Tango as +well as better-known open source software such as Kafka's file +replication. (See the bibliography of the [Machi high level design +doc](./doc/high-level-machi.pdf) for further references.) - -## 1. Why another file store? + (*) When operating in strong consistency mode (supporting + sequential or linearizable semantics), the availability of the + system is restricted to quorum majority availability. When in + eventual consistency mode, service can be provided by any + available server. -Our goal is a robust & reliable, distributed, highly available, large -file store. Such stores already exist, both in the open source world -and in the commercial world. Why reinvent the wheel? We believe -there are three reasons, ordered by decreasing rarity. +## Status: mid-October 2015: work is underway -1. We want end-to-end checksums for all file data, from the initial - file writer to every file reader, anywhere, all the time. -2. We need flexibility to trade consistency for availability: - e.g. weak consistency in exchange for being available in cases - of partial system failure. -3. We want to manage file replicas in a way that's provably correct - and also easy to test. +* The chain manager is ready for both eventual consistency use ("available + mode") and strong consistency use ("consistent mode"). Both modes use a new + consensus technique, Humming Consensus. + * Scott will be + [speaking about Humming Consensus](http://ricon.io/agenda/#managing-chain-replication-metadata-with-humming-consensus) + at the [Ricon 2015 conference] (http://ricon.io) in San Francisco, + CA, USA on Thursday, November 5th, 2015. + * If you would like to run the network partition simulator + mentioned in that Ricon presentation, please see the + [partition simulator convergence test doc.](./doc/machi_chain_manager1_converge_demo.md) + * Implementation of the file repair process for strong consistency + is still in progress. -Of all the file stores in the open source & commercial worlds, only -criteria #3 is a viable option. Or so we hope. Or we just don't -care, and if data gets lost or corrupted, then ... so be it. - -If we have app use cases where availability is more important than -consistency, then systems that meet criteria #2 are also rare. -Most file stores provide only strong consistency and therefore -have unavoidable, unavailable behavior when parts of the system -fail. -What if we want a file store that is always available to write new -file data and attempts best-effort file reads? - -If we really do care about data loss and/or data corruption, then we -really want both #3 and #1. Unfortunately, systems that meet -criteria #1 are _very rare_. -Why? This is 2015. We have decades of research that shows -that computer hardware can (and -indeed does) corrupt data at nearly every level of the modern -client/server application stack. Systems with end-to-end data -corruption detection should be ubiquitous today. Alas, they are not. -Machi is an effort to change the deplorable state of the world, one -Erlang function at a time. - - -## 2. Where to learn more about Machi - -The two major design documents for Machi are now mostly stable. -Please see the [doc](./doc) directory's [README](./doc) for details. - -We also have a -[Frequently Asked Questions (FAQ) list](./FAQ.md). - -Scott recently (November 2015) gave a presentation at the -[RICON 2015 conference](http://ricon.io) about one of the techniques -used by Machi; "Managing Chain Replication Metadata with -Humming Consensus" is available online now. -* [slides (PDF format)](http://ricon.io/speakers/slides/Scott_Fritchie_Ricon_2015.pdf) -* [video](https://www.youtube.com/watch?v=yR5kHL1bu1Q) - - -## 3. Development status summary - -Mid-December 2015: work is underway. - -* In progress: - * Code refactoring: metadata management using - [ELevelDB](https://github.com/basho/eleveldb) - * File repair using file-centric, Merkle-style hash tree. - * Server-side socket handling is now performed by - [ranch](https://github.com/ninenines/ranch) - * QuickCheck tests for file repair correctness - * 2015-12-15: The EUnit test `machi_ap_repair_eqc` is - currently failing occasionally because it (correctly) detects - double-write errors. Double-write errors will be eliminated - when the ELevelDB integration work is complete. - * The `make stage` and `make release` commands can be used to - create a primitive "package". Use `./rel/machi/bin/machi console` - to start the Machi app in interactive mode. Substitute the word - `start` instead of console to start Machi in background/daemon - mode. The `./rel/machi/bin/machi` command without any arguments - will give a short usage summary. - * Chain Replication management using the Humming Consensus - algorithm to manage chain state is stable. - * ... with the caveat that it runs very well in a very harsh - and unforgiving network partition simulator but has not run - much yet in the real world. - * All Machi client/server protocols are based on - [Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview). - * The current specification for Machi's protocols can be found at - [https://github.com/basho/machi/blob/master/src/machi.proto](https://github.com/basho/machi/blob/master/src/machi.proto). - * The Machi PB protocol is not yet stable. Expect change! - * The Erlang language client implementation of the high-level - protocol flavor is brittle (e.g., little error handling yet). - -If you would like to run the network partition simulator -mentioned in the Ricon 2015 presentation about Humming Consensus, -please see the -[partition simulator convergence test doc.](./doc/machi_chain_manager1_converge_demo.md) +* All Machi client/server protocols are based on + [Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview). + * The current specification for Machi's protocols can be found at + [https://github.com/basho/machi/blob/master/src/machi.proto](https://github.com/basho/machi/blob/master/src/machi.proto). + * The Machi PB protocol is not yet stable. Expect change! + * The Erlang language client implementation of the high-level + protocol flavor is brittle (e.g., little error handling yet). If you'd like to work on a protocol such as Thrift, UBF, msgpack over UDP, or some other protocol, let us know by [opening an issue to discuss it](./issues/new). - -## 4. Contributing to Machi's development +The two major design documents for Machi are now mostly stable. +Please see the [doc](./doc) directory's [README](./doc) for details. -### 4.1 License +## Contributing to Machi: source code, documentation, etc. Basho Technologies, Inc. as committed to licensing all work for Machi under the @@ -127,7 +72,26 @@ We invite all contributors to review the [CONTRIBUTING.md](./CONTRIBUTING.md) document for guidelines for working with the Basho development team. -### 4.2 Development environment requirements +## A brief survey of this directories in this repository + +* A list of Frequently Asked Questions, a.k.a. + [the Machi FAQ](./FAQ.md). + +* The [doc](./doc/) directory: home for major documents about Machi: + high level design documents as well as exploration of features still + under design & review within Basho. + +* The `ebin` directory: used for compiled application code + +* The `include`, `src`, and `test` directories: contain the header + files, source files, and test code for Machi, respectively. + +* The [prototype](./prototype/) directory: contains proof of concept + code, scaffolding libraries, and other exploratory code. Curious + readers should see the [prototype/README.md](./prototype/README.md) + file for more explanation of the small sub-projects found here. + +## Development environment requirements All development to date has been done with Erlang/OTP version 17 on OS X. The only known limitations for using R16 are minor type @@ -139,8 +103,26 @@ tool chain for C and C++ applications. Specifically, we assume `make` is available. The utility used to compile the Machi source code, `rebar`, is pre-compiled and included in the repo. -Machi has a dependency on the -[ELevelDB](https://github.com/basho/eleveldb) library. ELevelDB only -supports UNIX/Linux OSes and 64-bit versions of Erlang/OTP only; we -apologize to Windows-based and 32-bit-based Erlang developers for this -restriction. +There are no known OS limits at this time: any platform that supports +Erlang/OTP should be sufficient for Machi. This may change over time +(e.g., adding NIFs which can make full portability to Windows OTP +environments difficult), but it hasn't happened yet. + +## Contributions + +Basho encourages contributions to Riak from the community. Here’s how +to get started. + +* Fork the appropriate sub-projects that are affected by your change. +* Create a topic branch for your change and checkout that branch. + git checkout -b some-topic-branch +* Make your changes and run the test suite if one is provided. (see below) +* Commit your changes and push them to your fork. +* Open pull-requests for the appropriate projects. +* Contributors will review your pull request, suggest changes, and merge it when it’s ready and/or offer feedback. +* To report a bug or issue, please open a new issue against this repository. + +-The Machi team at Basho, +[Scott Lystig Fritchie](mailto:scott@basho.com), technical lead, and +[Matt Brender](mailto:mbrender@basho.com), your developer advocate. + diff --git a/doc/README.md b/doc/README.md index 3ad424c..181278b 100644 --- a/doc/README.md +++ b/doc/README.md @@ -6,6 +6,20 @@ Erlang documentation, please use this link: ## Documents in this directory +### chain-self-management-sketch.org + +[chain-self-management-sketch.org](chain-self-management-sketch.org) +is a mostly-deprecated draft of +an introduction to the +self-management algorithm proposed for Machi. Most material has been +moved to the [high-level-chain-mgr.pdf](high-level-chain-mgr.pdf) document. + +### cluster-of-clusters (directory) + +This directory contains the sketch of the "cluster of clusters" design +strawman for partitioning/distributing/sharding files across a large +number of independent Machi clusters. + ### high-level-machi.pdf [high-level-machi.pdf](high-level-machi.pdf) @@ -36,9 +50,9 @@ introduction to the Humming Consensus algorithm. Its abstract: > of file updates to all replica servers in a Machi cluster. Chain > Replication is a variation of primary/backup replication where the > order of updates between the primary server and each of the backup -> servers is strictly ordered into a single "chain". Management of -> Chain Replication's metadata, e.g., "What is the current order of -> servers in the chain?", remains an open research problem. The +> servers is strictly ordered into a single ``chain''. Management of +> Chain Replication's metadata, e.g., ``What is the current order of +> servers in the chain?'', remains an open research problem. The > current state of the art for Chain Replication metadata management > relies on an external oracle (e.g., ZooKeeper) or the Elastic > Replication algorithm. @@ -46,7 +60,7 @@ introduction to the Humming Consensus algorithm. Its abstract: > This document describes the Machi chain manager, the component > responsible for managing Chain Replication metadata state. The chain > manager uses a new technique, based on a variation of CORFU, called -> "humming consensus". +> ``humming consensus''. > Humming consensus does not require active participation by all or even > a majority of participants to make decisions. Machi's chain manager > bases its logic on humming consensus to make decisions about how to @@ -57,18 +71,3 @@ introduction to the Humming Consensus algorithm. Its abstract: > decision during that epoch. When a differing decision is discovered, > new time epochs are proposed in which a new consensus is reached and > disseminated to all available participants. - -### chain-self-management-sketch.org - -[chain-self-management-sketch.org](chain-self-management-sketch.org) -is a mostly-deprecated draft of -an introduction to the -self-management algorithm proposed for Machi. Most material has been -moved to the [high-level-chain-mgr.pdf](high-level-chain-mgr.pdf) document. - -### cluster-of-clusters (directory) - -This directory contains the sketch of the "cluster of clusters" design -strawman for partitioning/distributing/sharding files across a large -number of independent Machi clusters. - diff --git a/doc/cluster-of-clusters/name-game-sketch.org b/doc/cluster-of-clusters/name-game-sketch.org index 44b5df0..a7adb59 100644 --- a/doc/cluster-of-clusters/name-game-sketch.org +++ b/doc/cluster-of-clusters/name-game-sketch.org @@ -175,8 +175,8 @@ of chains: | Chain length | CoC namespace | Mode | Comment | |--------------+---------------+------+----------------------------------| | 3 | normal | AP | Normal storage redundancy & cost | -| 2 | reduced | AP | Reduced cost storage | -| 1 | risky | AP | Really, really cheap storage | +| 2 | cheap | AP | Reduced cost storage | +| 1 | risky | AP | Really cheap storage | | 9 | paranoid | AP | Safety-critical storage | | 3 | sequential | CP | Strong consistency | |--------------+---------------+------+----------------------------------| @@ -189,7 +189,7 @@ intention. Further, the CoC administrators may wish to use the namespace to provide separate storage for different applications. Jane's application may use the namespace "jane-normal" and Bob's app uses -"bob-reduced". The CoC administrators may definite separate groups of +"bob-cheap". The CoC administrators may definite separate groups of chains on separate servers to serve these two applications. * 6. Floating point is not required ... it is merely convenient for explanation @@ -218,8 +218,8 @@ Machi assigns file names based on: ~ClientSuppliedPrefix ++ "^" ++ SomeOpaqueFileNameSuffix~ What if the CoC client could peek inside of the opaque file name -suffix in order to look at the CoC location information that we might -code in the filename suffix? +suffix in order to remove (or add) the CoC location information that +we need? ** The notation we use @@ -263,58 +263,6 @@ code in the filename suffix? if ~r=0.5~, then ~L = 0.33 + 0.5*(0.58-0.33) = 0.455~, which is exactly in the middle of the ~(0.33,0.58]~ interval. -** A bit more about the CoC locator's meaning and use - -- If two files were written using exactly the same CoC locator and the - same CoC namespace, then the client is indicating that it wishes - that the two files be stored in the same chain. -- If two files have a different CoC locator, then the client has - absolutely no expectation of where the two files will be stored - relative to each other. - -Given the items above, then some consequences are: - -- If the client doesn't care about CoC placement, then picking a - random number is fine. Always choosing a different locator ~L~ for - each append will scatter data across the CoC as widely as possible. -- If the client believes that some physical locality is good, then the - client should reuse the same locator ~L~ for a batch of appends to - the same prefix ~p~ and namespace ~N~. We have no recommendations - for the batch size, yet; perhaps 10-1,000 might be a good start for - experiments? - -When the client choose CoC namespace ~N~ and CoC locator ~L~ (using -random number or target cluster technique), the client uses ~N~'s CoC -map to find the CoC target cluster, ~T~. The client has also chosen -the file prefix ~p~. The append op sent to cluster ~T~ would look -like: - -~append_chunk(N="reduced",L=0.25,p="myprefix",<<900-data-bytes>>,<>,...)~ - -A successful result would yield a chunk position: - -~{offset=883293,size=900,file="myprefix^reduced^0.25^OpaqueSuffix"}~ - -** A bit more about the CoC namespaces's meaning and use - -- The CoC framework will provide means of creating and managing - chains of different types, e.g., chain length, consistency mode. -- The CoC framework will manage the mapping of CoC namespace names to - the chains in the system. -- The CoC framework will provide a query service to map a CoC - namespace name to a Coc map, - e.g. ~coc_latest_map("reduced") -> Map{generation=7,...}~. - -For use by Riak CS, for example, we'd likely start with the following -namespaces ... working our way down the list as we add new features -and/or re-implement existing CS features. - -- "standard" = Chain length = 3, eventually consistency mode -- "reduced" = Chain length = 2, eventually consistency mode. -- "stanchion7" = Chain length = 7, strong consistency mode. Perhaps - use this namespace for the metadata required to re-implement the - operations that are performed by today's Stanchion application. - * 8. File migration (a.k.a. rebalancing/reparitioning/resharding/redistribution) ** What is "migration"? @@ -363,26 +311,26 @@ As an example: And the new Random Slicing map for some CoC namespace ~N~ might look like this: -| Generation number / Namespace | 7 / reduced | -|-------------------------------+-------------| -| SubMap | 1 | -|-------------------------------+-------------| -| Hash range | Cluster ID | -|-------------------------------+-------------| -| 0.00 - 0.33 | Cluster1 | -| 0.33 - 0.66 | Cluster2 | -| 0.66 - 1.00 | Cluster3 | -|-------------------------------+-------------| -| SubMap | 2 | -|-------------------------------+-------------| -| Hash range | Cluster ID | -|-------------------------------+-------------| -| 0.00 - 0.25 | Cluster1 | -| 0.25 - 0.33 | Cluster4 | -| 0.33 - 0.58 | Cluster2 | -| 0.58 - 0.66 | Cluster4 | -| 0.66 - 0.91 | Cluster3 | -| 0.91 - 1.00 | Cluster4 | +| Generation number / Namespace | 7 / cheap | +|-------------------------------+------------| +| SubMap | 1 | +|-------------------------------+------------| +| Hash range | Cluster ID | +|-------------------------------+------------| +| 0.00 - 0.33 | Cluster1 | +| 0.33 - 0.66 | Cluster2 | +| 0.66 - 1.00 | Cluster3 | +|-------------------------------+------------| +| SubMap | 2 | +|-------------------------------+------------| +| Hash range | Cluster ID | +|-------------------------------+------------| +| 0.00 - 0.25 | Cluster1 | +| 0.25 - 0.33 | Cluster4 | +| 0.33 - 0.58 | Cluster2 | +| 0.58 - 0.66 | Cluster4 | +| 0.66 - 0.91 | Cluster3 | +| 0.91 - 1.00 | Cluster4 | When a new Random Slicing map contains a single submap, then its use is identical to the original Random Slicing algorithm. If the map @@ -419,18 +367,18 @@ Cluster4. When the CoC manager is satisfied that all such files have been copied to Cluster4, then the CoC manager can create and distribute a new map, such as: -| Generation number / Namespace | 8 / reduced | -|-------------------------------+-------------| -| SubMap | 1 | -|-------------------------------+-------------| -| Hash range | Cluster ID | -|-------------------------------+-------------| -| 0.00 - 0.25 | Cluster1 | -| 0.25 - 0.33 | Cluster4 | -| 0.33 - 0.58 | Cluster2 | -| 0.58 - 0.66 | Cluster4 | -| 0.66 - 0.91 | Cluster3 | -| 0.91 - 1.00 | Cluster4 | +| Generation number / Namespace | 8 / cheap | +|-------------------------------+------------| +| SubMap | 1 | +|-------------------------------+------------| +| Hash range | Cluster ID | +|-------------------------------+------------| +| 0.00 - 0.25 | Cluster1 | +| 0.25 - 0.33 | Cluster4 | +| 0.33 - 0.58 | Cluster2 | +| 0.58 - 0.66 | Cluster4 | +| 0.66 - 0.91 | Cluster3 | +| 0.91 - 1.00 | Cluster4 | The HibariDB system performs data migrations in almost exactly this manner. However, one important diff --git a/doc/flu-and-chain-lifecycle.org b/doc/flu-and-chain-lifecycle.org new file mode 100644 index 0000000..4672080 --- /dev/null +++ b/doc/flu-and-chain-lifecycle.org @@ -0,0 +1,620 @@ +FLU and Chain Life Cycle Management -*- mode: org; -*- +#+STARTUP: lognotedone hidestars indent showall inlineimages +#+COMMENT: To generate the outline section: egrep '^\*[*]* ' doc/flu-and-chain-lifecycle.org | egrep -v '^\* Outline' | sed -e 's/^\*\*\* / + /' -e 's/^\*\* / + /' -e 's/^\* /+ /' + +* FLU and Chain Life Cycle Management + +In an ideal world, we (the Machi development team) would have a full +vision of how Machi would be managed, down to the last detail of +beautiful CLI character and network protocol bit. Our vision isn't +complete yet, so we are working one small step at a time. + +* Outline + ++ FLU and Chain Life Cycle Management ++ Terminology review + + Terminology: Machi run-time components/services/thingies + + Terminology: Machi data structures + + Terminology: Cluster-of-cluster (CoC) data structures ++ Overview of administrative life cycles + + Cluster-of-clusters (CoC) administrative life cycle + + Chain administrative life cycle + + FLU server administrative life cycle ++ Quick admin: declarative management of Machi FLU and chain life cycles + + Quick admin uses the "rc.d" config scheme for life cycle management + + Quick admin's declarative "language": an Erlang-flavored AST + + Term 'host': define a new host for FLU services + + Term 'flu': define a new FLU + + Term 'chain': define or reconfigure a chain + + Executing quick admin AST files via the 'machi-admin' utility + + Checking the syntax of an AST file + + Executing an AST file + + Using quick admin to manage multiple machines ++ The "rc.d" style configuration file scheme + + Riak had a similar configuration file editing problem (and its solution) + + Machi's "rc.d" file scheme. + + FLU life cycle management using "rc.d" style files + + The key configuration components of a FLU + + Chain life cycle management using "rc.d" style files + + The key configuration components of a chain + +* Terminology review + +** Terminology: Machi run-time components/services/thingies + ++ FLU: a basic Machi server, responsible for managing a collection of + files. + ++ Chain: a small collection of FLUs that maintain replicas of the same + collection of files. A chain is usually small, 1-3 servers, where + more than 3 would be used only in cases when availability of + certain data is critical despite failures of several machines. + + The length of a chain is directly proportional to its + replication factor, e.g., a chain length=3 will maintain + (nominally) 3 replicas of each file. + + To maintain file availability when ~F~ failures have occurred, a + chain must be at least ~F+1~ members long. (In comparison, the + quorum replication technique requires ~2F+1~ members in the + general case.) + ++ Cluster: this word can be used interchangeably with "chain". + ++ Cluster-of-clusters: A collection of Machi clusters where files are + horizontally partitioned/sharded/distributed across + +** Terminology: Machi data structures + ++ Projection: used to define a single chain: the chain's consistency + mode (strong or eventual consistency), all members (from an + administrative point of view), all active members (from a runtime, + automatically-managed point of view), repairing/file-syncing members + (also runtime, auto-managed), and so on + ++ Epoch: A version number of a projection. The epoch number is used + by both clients & servers to manage transitions from one projection + to another, e.g., when the chain is temporarily shortened by the + failure of a member FLU server. + +** Terminology: Cluster-of-cluster (CoC) data structures + ++ Namespace: A collection of human-friendly names that are mapped to + groups of Machi chains that provide the same type of storage + service: consistency mode, replication policy, etc. + + A single namespace name, e.g. ~normal-ec~, is paired with a single + CoC chart (see below). + + Example: ~normal-ec~ might be a collection of Machi chains in + eventually-consistent mode that are of length=3. + + Example: ~risky-ec~ might be a collection of Machi chains in + eventually-consistent mode that are of length=1. + + Example: ~mgmt-critical~ might be a collection of Machi chains in + strongly-consistent mode that are of length=7. + ++ CoC chart: Encodes the rules which partition/shard/distribute a + particular namespace across a group of chains that collectively + store the namespace's files. + + "chart: noun, a geographical map or plan, especially on used for + navigation by sea or air." + ++ Chain weight: A value assigned to each chain within a CoC chart + structure that defines the relative storage capacity of a chain + within the namespace. For example, a chain weight=150 has 50% more + capacity than a chain weight=100. + ++ CoC chart epoch: The version number assigned to a CoC chart. + +* Overview of administrative life cycles + +** Cluster-of-clusters (CoC) administrative life cycle + ++ CoC is first created ++ CoC adds namespaces (e.g. consistency policy + chain length policy) ++ CoC adds/removes chains to a namespace to increase/decrease the + namespace's storage capacity. ++ CoC adjusts chain weights within a namespace, e.g., to shift files + within the namespace to chains with greater storage capacity + resources and/or runtime I/O resources. + +A CoC "file migration" is the process of moving files from one +namespace member chain to another for purposes of shifting & +re-balancing storage capacity and/or runtime I/O capacity. + +** Chain administrative life cycle + ++ A chain is created with an initial FLU membership list. ++ Chain may be administratively modified zero or more times to + add/remove member FLU servers. ++ A chain may be decommissioned. + +See also: http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html + +** FLU server administrative life cycle + ++ A FLU is created after an administrator chooses the FLU's runtime + location is selected by the administrator: which machine/virtual + machine, IP address and TCP port allocation, etc. ++ An unassigned FLU may be added to a chain by chain administrative + policy. ++ A FLU that is assigned to a chain may be removed from that chain by + chain administrative policy. + + In the current implementation, the FLU's Erlang processes will be + halted. Then the FLU's data and metadata files will be moved to + another area of the disk for safekeeping. Later, a "garbage + collection" process can be used for reclaiming disk space used by + halted FLU servers. + +See also: http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html + +* Quick admin: declarative management of Machi FLU and chain life cycles + +The "quick admin" scheme is a temporary (?) tool for managing Machi +FLU server and chain life cycles in a declarative manner. The API is +described in this section. + +** Quick admin uses the "rc.d" config scheme for life cycle management + +As described at the top of +http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html, the "rc.d" +config files do not manage "policy". "Policy" is doing the right +thing with a Machi cluster-of-clusters from a systems administrator's +point of view. The "rc.d" config files can only implement decisions +made according to policy. + +The "quick admin" tool is a first attempt at automating policy +decisions in a safe way (we hope) that is also easy to implement (we +hope) with a variety of systems management tools, e.g. Chef, Puppet, +Ansible, Saltstack, or plain-old-human-at-a-keyboard. + +** Quick admin's declarative "language": an Erlang-flavored AST + +The "language" that an administrator uses to express desired policy +changes is not (yet) a true language. As a quick implementation hack, +the current language is an Erlang-flavored abstract syntax tree +(AST). The tree isn't very deep, either, frequently just one +element tall. (Not much of a tree, is it?) + +There are three terms in the language currently: + ++ ~host~, define a new host that can execute FLU servers ++ ~flu~, define a new FLU ++ ~chain~, define a new chain or re-configure an existing chain with + the same name + +*** Term 'host': define a new host for FLU services + +In this context, a host is a machine, virtual machine, or container +that can execute the Machi application and can therefore provide FLU +services, i.e. file service, Humming Consensus management. + +Two formats may be used to define a new host: + +#+BEGIN_SRC +{host, Name, Props}. +{host, Name, AdminI, ClientI, Props}. +#+END_SRC + +The shorter tuple is shorthand notation for the latter. If the +shorthand form is used, then it will be converted automatically to the +long form as: + +#+BEGIN_SRC +{host, Name, AdminI=Name, ClientI=Name, Props}. +#+END_SRC + +Type information, description, and restrictions: + ++ ~Name::string()~ The ~Name~ attribute must be unique. Note that it + is possible to define two different hosts, one using a DNS hostname + and one using an IP address. The user must avoid this + double-definition because it is not enforced by quick admin. + + The ~Name~ field is used for cross-reference purposes with other + terms, e.g., ~flu~ and ~chain~. + + There is no syntax yet for removing a host definition. + ++ ~AdminI::string()~ A DNS hostname or IP address for cluster + administration purposes, e.g. SSH access. + + This field is unused at the present time. + ++ ~ClientI::string()~ A DNS hostname or IP address for Machi's client + protocol access, e.g., Protocol Buffers network API service. + + This field is unused at the present time. + ++ ~props::proplist()~ is an Erlang-style property list for specifying + additional configuration options, debugging information, sysadmin + comments, etc. + ++ A full-featured admin tool should also include managing several + other aspects of configuration related to a "host". For example, + for any single IP address, quick admin assumes that there will be + exactly one Erlang VM that is running the Machi application. Of + course, it is possible to have dozens of Erlang VMs on the same + (let's assume for clarity) hardware machine and all running Machi + ... but there are additional aspects of such a machine that quick + admin does not account for + + multiple IP addresses per machine + + multiple Machi package installation paths + + multiple Machi config files (e.g. cuttlefish config, ~etc.conf~, + ~vm.args~) + + multiple data directories/file system mount points + + This is also a management problem for quick admin for a single + Machi package on a machine to take advantage of bulk data + storage using multiple multiple file system mount points. + + multiple Erlang VM host names, required for distributed Erlang, + which is used for communication with ~machi~ and ~machi-admin~ + command line utilities. + + and others.... + +*** Term 'flu': define a new FLU + +A new FLU is defined relative to a previously-defined ~host~ entities; +an exception will be thrown if the ~host~ cannot be cross-referenced. + +#+BEGIN_SRC +{flu, Name, HostName, Port, Props} +#+END_SRC + +Type information, description, and restrictions: + ++ ~Name::atom()~ The name of the FLU, as a human-friendly name and + also for internal management use; please note the ~atom()~ type. + This name must be unique. + + The ~Name~ field is used for cross-reference purposes with the + ~chain~ term. + + There is no syntax yet for removing a FLU definition. + ++ ~Hostname::string()~ The cross-reference name of the ~host~ that + this FLU should run on. + ++ ~Port::non_neg_integer()~ The TCP port used by this FLU server's + Protocol Buffers network API listener service + ++ ~props::proplist()~ is an Erlang-style property list for specifying + additional configuration options, debugging information, sysadmin + comments, etc. + +*** Term 'chain': define or reconfigure a chain + +A chain is defined relative to zero or more previously-defined ~flu~ +entities; an exception will be thrown if any ~flu~ cannot be +cross-referenced. + +Two formats may be used to define/reconfigure a chain: + +#+BEGIN_SRC +{chain, Name, FullList, Props}. +{chain, Name, CMode, FullList, Witnesses, Props}. +#+END_SRC + +The shorter tuple is shorthand notation for the latter. If the +shorthand form is used, then it will be converted automatically to the +long form as: + +#+BEGIN_SRC +{chain, Name, ap_mode, FullList, [], Props}. +#+END_SRC + +Type information, description, and restrictions: + ++ ~Name::atom()~ The name of the chain, as a human-friendly name and + also for internal management use; please note the ~atom()~ type. + This name must be unique. + + There is no syntax yet for removing a chain definition. + ++ ~CMode::'ap_mode'|'cp_mode'~ Defines the consistency mode of the + chain, either eventual consistency or strong consistency, + respectively. + + A chain cannot change consistency mode, e.g., from + strong~->~eventual consistency. + ++ ~FullList::list(atom())~ Specifies the list of full-service FLU + servers, i.e. servers that provide file data & metadata services as + well as Humming Consensus. Each atom in the list must + cross-reference with a previously defined ~chain~; an exception will + be thrown if any ~flu~ cannot be cross-referenced. + ++ ~Witnesses::list(atom())~ Specifies the list of witness-only + servers, i.e. servers that only participate in Humming Consensus. + Each atom in the list must cross-reference with a previously defined + ~chain~; an exception will be thrown if any ~flu~ cannot be + cross-referenced. + + This list must be empty for eventual consistency chains. + ++ ~props::proplist()~ is an Erlang-style property list for specifying + additional configuration options, debugging information, sysadmin + comments, etc. + ++ If this term specifies a new ~chain~ name, then all of the member + FLU servers (full & witness types) will be bootstrapped to a + starting configuration. + ++ If this term specifies a previously-defined ~chain~ name, then all + of the member FLU servers (full & witness types, respectively) will + be adjusted to add or remove members, as appropriate. + + Any FLU servers added to either list must not be assigned to any + other chain, or they must be a member of this specific chain. + + Any FLU servers removed from either list will be halted. + (See the "FLU server administrative life cycle" section above.) + +** Executing quick admin AST files via the 'machi-admin' utility + +Examples of quick admin AST files can be found in the +~priv/quick-admin/examples~ directory. Below is an example that will +define a new host ( ~"localhost"~ ), three new FLU servers ( ~f1~ & ~f2~ +and ~f3~ ), and an eventually consistent chain ( ~c1~ ) that uses the new +FLU servers: + +#+BEGIN_SRC +{host, "localhost", []}. +{flu,f1,"localhost",20401,[]}. +{flu,f2,"localhost",20402,[]}. +{flu,f3,"localhost",20403,[]}. +{chain,c1,[f1,f2,f3],[]}. +#+END_SRC + +*** Checking the syntax of an AST file + +Given an AST config file, ~/path/to/ast/file~, its basic syntax and +correctness can be checked without executing it. + +#+BEGIN_SRC +./rel/machi/bin/machi-admin quick-admin-check /path/to/ast/file +#+END_SRC + ++ The utility will exit with status zero and output ~ok~ if the syntax + and proposed configuration appears to be correct. ++ If there is an error, the utility will exit with status one, and an + error message will be printed. + +*** Executing an AST file + +Given an AST config file, ~/path/to/ast/file~, it can be executed +using the command: + +#+BEGIN_SRC +./rel/machi/bin/machi-admin quick-admin-apply /path/to/ast/file RelativeHost +#+END_SRC + +... where the last argument, ~RelativeHost~, should be the exact +spelling of one of the previously defined AST ~host~ entities, +*and also* is the same host that the ~machi-admin~ utility is being +executed on. + +Restrictions and warnings: + ++ This is alpha quality software. + ++ There is no "undo". + + Of course there is, but you need to resort to doing things like + using ~machi attach~ to attach to the server's CLI to then execute + magic Erlang incantations to stop FLUs, unconfigure chains, etc. + + Oh, and delete some files with magic paths, also. + +** Using quick admin to manage multiple machines + +A quick sketch follows: + +1. Create the AST file to specify all of the changes that you wish to + make to all hosts, FLUs, and/or chains, e.g., ~/tmp/ast.txt~. +2. Check the basic syntax with the ~quick-admin-check~ argument to + ~machi-admin~. +3. If the syntax is good, then copy ~/tmp/ast.txt~ to all hosts in the + cluster, using the same path, ~/tmp/ast.txt~. +4. For each machine in the cluster, run: +#+BEGIN_SRC +./rel/machi/bin/machi-admin quick-admin-apply /tmp/ast.txt RelativeHost +#+END_SRC + +... where RelativeHost is the AST ~host~ name of the machine that you +are executing the ~machi-admin~ command on. The command should be +successful, with exit status 0 and outputting the string ~ok~. + +Finally, for each machine in the cluster, a listing of all files in +the directory ~rel/machi/etc/quick-admin-archive~ should show exactly +the same files, one for each time that ~quick-admin-apply~ has been +run successfully on that machine. + +* The "rc.d" style configuration file scheme + +This configuration scheme is inspired by BSD UNIX's ~init(8)~ process +manager's configuration style, called "rc.d" after the name of the +directory where these files are stored, ~/etc/rc.d~. The ~init~ +process is responsible for (among other things) starting UNIX +processes at machine boot time and stopping them when the machine is +shut down. + +The original scheme used by ~init~ to start processes at boot time was +a single Bourne shell script called ~/etc/rc~. When a new software +package was installed that required a daemon to be started at boot +time, text was added to the ~/etc/rc~ file. Uninstalling packages was +much trickier, because it meant removing lines from a file that +*is a computer program (run by the Bourne shell, a Turing-complete +programming language)*. Error-free editing of the ~/etc/rc~ script +was impossible in all cases. + +Later, ~init~'s configuration was split into a few master Bourne shell +scripts and a subdirectory, ~/etc/rc.d~. The subdirectory contained +shell scripts that were responsible for boot time starting of a single +daemon or service, e.g. NFS or an HTTP server. When a new software +package was added, a new file was added to the ~rc.d~ subdirectory. +When a package was removed, the corresponding file in ~rc.d~ was +removed. With this simple scheme, addition & removal of boot time +scripts was vastly simplified. + +** Riak had a similar configuration file editing problem (and its solution) + +Another software product from Basho Technologies, Riak, had a similar +configuration file editing problem. One file in particular, +~app.config~, had a syntax that made it difficult both for human +systems administrators and also computer programs to edit the file in +a syntactically correct manner. + +Later releases of Riak switched to an alternative configuration file +format, one inspired by the BSD UNIX ~sysctl(8)~ utility and +~sysctl.conf(5)~ file syntax. The ~sysctl.conf~ format is much easier +to manage by computer programs to add items. Removing items is not +100% simple, however: the correct lines must be identified and then +removed (e.g. with Perl or a text editor or combination of ~grep -v~ +and ~mv~), but removing any comment lines that "belong" to the removed +config item(s) is not any easy for a 1-line shell script to do 100% +correctly. + +Machi will use the ~sysctl.conf~ style configuration for some +application configuration variables. However, adding & removing FLUs +and chains will be managed using the "rc.d" style because of the +"rc.d" scheme's simplicity and tolerance of mistakes by administrators +(human or computer). + +** Machi's "rc.d" file scheme. + +Machi will use a single subdirectory that will contain configuration +files for some life cycle management task, e.g. a single FLU or a +single chain. + +The contents of the file should be a single Erlang term, serialized in +ASCII form as Erlang source code statement, i.e. a single Erlang term +~T~ that is formatted by ~io:format("~w.",[T]).~. This file must be +parseable by the Erlang function ~file:consult()~. + +Later versions of Machi may change the file format to be more familiar +to administrators who are unaccustomed to Erlang language syntax. + +** FLU life cycle management using "rc.d" style files + +*** The key configuration components of a FLU + +1. The machine (or virtual machine) to run it on. +2. The Machi software package's artifacts to execute. +3. The disk device(s) used to store Machi file data & metadata, "rc.d" + style config files, etc. +4. The name, IP address and TCP port assigned to the FLU service. +5. Its chain assignment. + +Notes: + ++ Items 1-3 are currently outside of the scope of this life cycle + document. We assume that human administrators know how to do these + things. ++ Item 4's properties are explicitly managed by a FLU-defining "rc.d" + style config file. ++ Item 5 is managed by the chain life cycle management system. + +Here is an example of a properly formatted FLU config file: + +#+BEGIN_SRC +{p_srvr,f1,machi_flu1_client,"192.168.72.23",20401,[]}. +#+END_SRC + +... which corresponds to the following Erlang record definition: + +#+BEGIN_SRC +-record(p_srvr, { + name :: atom(), + proto_mod = 'machi_flu1_client' :: atom(), % Module name + address :: term(), % Protocol-specific + port :: term(), % Protocol-specific + props = [] :: list() % proplist for other related info + }). +#+END_SRC + ++ ~name~ is ~f1~. This is name of the FLU. This name should be + unique over the lifetime of the administrative domain and thus + managed by external policy. This name must be the same as the name + of the config file that defines the FLU. ++ ~proto_mod~ is used for internal management purposes and should be + considered a mandatory constant. ++ ~address~ is "192.168.72.23". The DNS hostname or IP address used + by other servers to communicate with this FLU. This must be a valid + IP address, previously assigned to this machine/VM using the + appropriate operating system-specific procedure. ++ ~port~ is TCP port 20401. The TCP port number that the FLU listens + to for incoming Protocol Buffers-serialized communication. This TCP + port must not be in use (now or in the future) by another Machi FLU + or any other process running on this machine/VM. ++ ~props~ is an Erlang-style property list for specifying additional + configuration options, debugging information, sysadmin comments, + etc. + +** Chain life cycle management using "rc.d" style files + +Unlike FLUs, chains have a self-management aspect that makes a chain +life cycle different from a single FLU server. Machi's chains are +self-managing, via Humming Consensus; see the +https://github.com/basho/machi/tree/master/doc/ directory for much +more detail about Humming Consensus. After FLUs have received their +initial chain configuration for Humming Consensus, the FLUs will +manage the chain (and each other) by themselves. + +However, Humming Consensus does not handle three chain management +problems: + +1. Specifying the very first chain configuration, +2. Altering the membership of the chain (i.e. adding/removing FLUs + from the chain), +3. Stopping the chain permanently. + +A chain "rc.d" file will only be used to bootstrap a newly-defined FLU +server. It's like a piece of glue information to introduce the new +FLU to the Humming Consensus group that is managing the chain's +dynamic state (e.g. which members are up or down). In all other +respects, chain config files are ignored by life cycle management code. +However, to mimic the life cycle of the FLU server's "rc.d" config +files, a chain "rc.d" files is not deleted until the chain has been +decommissioned (i.e. defined with length=0). + +*** The key configuration components of a chain + +1. The name of the chain. +2. Consistency mode: eventually consistent or strongly consistent. +3. The membership list of all FLU servers in the chain. + + Remember, all servers in a single chain will manage full replicas + of the same collection of Machi files. +4. If the chain is defined to use strongly consistent mode, then a + list of "witness servers" may also be defined. See the + [https://github.com/basho/machi/tree/master/doc/] documentation for + more information on witness servers. + + The witness list must be empty for all chains in eventual + consistency mode. + +Here is an example of a properly formatted chain config file: + +#+BEGIN_SRC +{chain_def_v1,c1,ap_mode, + [{p_srvr,f1,machi_flu1_client,"localhost",20401,[]}, + {p_srvr,f2,machi_flu1_client,"localhost",20402,[]}, + {p_srvr,f3,machi_flu1_client,"localhost",20403,[]}], + [],[],[], + [f1,f2,f3], + [],[]}. +#+END_SRC + +... which corresponds to the following Erlang record definition: + +#+BEGIN_SRC +-record(chain_def_v1, { + name :: atom(), % chain name + mode :: 'ap_mode' | 'cp_mode', + full = [] :: [p_srvr()], + witnesses = [] :: [p_srvr()], + old_full = [] :: [atom()], % guard against some races + old_witnesses=[] :: [atom()], % guard against some races + local_run = [] :: [atom()], % must be tailored to each machine! + local_stop = [] :: [atom()], % must be tailored to each machine! + props = [] :: list() % proplist for other related info + }). +#+END_SRC + ++ ~name~ is ~c1~, the name of the chain. This name should be unique + over the lifetime of the administrative domain and thus managed by + external policy. This name must be the same as the name of the + config file that defines the chain. ++ ~mode~ is ~ap_mode~, an internal code symbol for eventual + consistency mode. ++ ~full~ is a list of Erlang ~#p_srvr{}~ records for full-service + members of the chain, i.e., providing Machi file data & metadata + storage services. ++ ~witnesses~ is a list of Erlang ~#p_srvr{}~ records for witness-only + FLU servers, i.e., providing only Humming Consensus service. ++ The next four fields are used for internal management only. ++ ~props~ is an Erlang-style property list for specifying additional + configuration options, debugging information, sysadmin comments, + etc. + diff --git a/doc/overview.edoc b/doc/overview.edoc new file mode 100644 index 0000000..6182f6b --- /dev/null +++ b/doc/overview.edoc @@ -0,0 +1,170 @@ + +@title Machi: a small village of replicated files + +@doc + +== About This EDoc Documentation == + +This EDoc-style documentation will concern itself only with Erlang +function APIs and function & data types. Higher-level design and +commentary will remain outside of the Erlang EDoc system; please see +the "Pointers to Other Machi Documentation" section below for more +details. + +Readers should beware that this documentation may be out-of-sync with +the source code. When in doubt, use the `make edoc' command to +regenerate all HTML pages. + +It is the developer's responsibility to re-generate the documentation +periodically and commit it to the Git repo. + +== Machi Code Overview == + +=== Chain Manager === + +The Chain Manager is responsible for managing the state of Machi's +"Chain Replication" state. This role is roughly analogous to the +"Riak Core" application inside of Riak, which takes care of +coordinating replica placement and replica repair. + +For each primitive data server in the cluster, a Machi FLU, there is a +Chain Manager process that manages its FLU's role within the Machi +cluster's Chain Replication scheme. Each Chain Manager process +executes locally and independently to manage the distributed state of +a single Machi Chain Replication chain. + + + +=== FLU === + +The FLU is the basic storage server for Machi. + + + +For each FLU, there are three independent tasks that are implemented +using three different Erlang processes: + + + +From the perspective of failure detection, it is very convenient that +all three FLU-related services (file server, sequencer server, and +projection server) are accessed using the same single TCP port. + +=== Projection (data structure) === + +The projection is a data structure that specifies the current state +of the Machi cluster: all FLUs, which FLUS are considered +up/running or down/crashed/stopped, which FLUs are actively +participants in the Chain Replication protocol, and which FLUs are +under "repair" (i.e., having their data resyncronized when +newly-added to a cluster or when restarting after a crash). + +=== Projection Store (server) === + +The projection store is a storage service that is implemented by an +Erlang/OTP `gen_server' process that is associated with each +FLU. Conceptually, the projection store is an array of +write-once registers. For each projection store register, the +key is a 2-tuple of an epoch number (`non_neg_integer()' type) +and a projection type (`public' or `private' type); the value is +a projection data structure (`projection_v1()' type). + +=== Client and Proxy Client === + +Machi is intentionally avoiding using distributed Erlang for Machi's +communication. This design decision makes Erlang-side code more +difficult & complex but allows us the freedom of implementing +parts of Machi in other languages without major +protocol&API&glue code changes later in the product's +lifetime. + +There are two layers of interface for Machi clients. + + + +The types for both modules ought to be the same. However, due to +rapid code churn, some differences might exist. Any major difference +is (almost by definition) a bug: please open a GitHub issue to request +a correction. + +== TODO notes == + +Any use of the string "TODO" in upper/lower/mixed case, anywhere in +the code, is a reminder signal of unfinished work. + +== Pointers to Other Machi Documentation == + + diff --git a/doc/src.high-level/high-level-chain-mgr.tex b/doc/src.high-level/high-level-chain-mgr.tex index b88fa84..f139862 100644 --- a/doc/src.high-level/high-level-chain-mgr.tex +++ b/doc/src.high-level/high-level-chain-mgr.tex @@ -1279,7 +1279,7 @@ as the foundation for Machi's data loss prevention techniques. \begin{figure} \centering $ -[\overbrace{\underbrace{H_1}_\textbf{Head}, M_{11}, \ldots, T_1, +[\overbrace{\underbrace{H_1}_\textbf{Head}, M_{11}, T_1, H_2, M_{21}, \ldots \underbrace{T_2}_\textbf{Tail}}^\textbf{Chain (U.P.~Invariant preserving)} diff --git a/include/machi.hrl b/include/machi.hrl index f825556..671fe01 100644 --- a/include/machi.hrl +++ b/include/machi.hrl @@ -40,6 +40,3 @@ %% TODO: it's used in flu_sup and elsewhere, change this to suitable name -define(TEST_ETS_TABLE, test_ets_table). - --define(DEFAULT_COC_NAMESPACE, ""). --define(DEFAULT_COC_LOCATOR, 0). diff --git a/include/machi_projection.hrl b/include/machi_projection.hrl index 0702586..ce161e4 100644 --- a/include/machi_projection.hrl +++ b/include/machi_projection.hrl @@ -1,6 +1,6 @@ %% ------------------------------------------------------------------- %% -%% Copyright (c) 2007-2014 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -22,10 +22,11 @@ -define(MACHI_PROJECTION_HRL, true). -type pv1_consistency_mode() :: 'ap_mode' | 'cp_mode'. +-type pv1_chain_name():: atom(). -type pv1_csum() :: binary(). -type pv1_epoch() :: {pv1_epoch_n(), pv1_csum()}. -type pv1_epoch_n() :: non_neg_integer(). --type pv1_server() :: atom() | binary(). +-type pv1_server() :: atom(). -type pv1_timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. -record(p_srvr, { @@ -55,6 +56,7 @@ epoch_number :: pv1_epoch_n() | ?SPAM_PROJ_EPOCH, epoch_csum :: pv1_csum(), author_server :: pv1_server(), + chain_name = ch_not_def_yet :: pv1_chain_name(), all_members :: [pv1_server()], witnesses = [] :: [pv1_server()], creation_time :: pv1_timestamp(), @@ -75,4 +77,16 @@ %% create a consistent projection ranking score. -define(MAX_CHAIN_LENGTH, 64). +-record(chain_def_v1, { + name :: atom(), % chain name + mode :: pv1_consistency_mode(), + full = [] :: [p_srvr()], + witnesses = [] :: [p_srvr()], + old_full = [] :: [pv1_server()], % guard against some races + old_witnesses=[] :: [pv1_server()], % guard against some races + local_run = [] :: [pv1_server()], % must be tailored to each machine! + local_stop = [] :: [pv1_server()], % must be tailored to each machine! + props = [] :: list() % proplist for other related info + }). + -endif. % !MACHI_PROJECTION_HRL diff --git a/priv/quick-admin-examples/000 b/priv/quick-admin-examples/000 new file mode 100644 index 0000000..45eee80 --- /dev/null +++ b/priv/quick-admin-examples/000 @@ -0,0 +1 @@ +{host, "localhost", []}. diff --git a/priv/quick-admin-examples/001 b/priv/quick-admin-examples/001 new file mode 100644 index 0000000..e5df9dd --- /dev/null +++ b/priv/quick-admin-examples/001 @@ -0,0 +1,4 @@ +{flu,f1,"localhost",20401,[]}. +{flu,f2,"localhost",20402,[]}. +{flu,f3,"localhost",20403,[]}. +{chain,c1,[f1,f2,f3],[]}. diff --git a/priv/quick-admin-examples/002 b/priv/quick-admin-examples/002 new file mode 100644 index 0000000..5656558 --- /dev/null +++ b/priv/quick-admin-examples/002 @@ -0,0 +1,4 @@ +{flu,f4,"localhost",20404,[]}. +{flu,f5,"localhost",20405,[]}. +{flu,f6,"localhost",20406,[]}. +{chain,c2,[f4,f5,f6],[]}. diff --git a/rebar.config b/rebar.config index be9b0d3..d6debc0 100644 --- a/rebar.config +++ b/rebar.config @@ -5,13 +5,9 @@ {edoc_opts, [{dir, "./edoc"}]}. {deps, [ - {cuttlefish, ".*", {git, "git://github.com/basho/cuttlefish.git", {branch, "develop"}}}, - {sext, ".*", {git, "git://github.com/basho/sext.git", {branch, "master"}}}, - {eleveldb, ".*", {git, "git://github.com/basho/eleveldb.git", {branch, "develop"}}}, {lager, ".*", {git, "git://github.com/basho/lager.git", {tag, "2.2.0"}}}, {protobuffs, "0.8.*", {git, "git://github.com/basho/erlang_protobuffs.git", {tag, "0.8.1p4"}}}, {riak_dt, ".*", {git, "git://github.com/basho/riak_dt.git", {branch, "develop"}}}, - {ranch, ".*", {git, "git://github.com/ninenines/ranch.git", {branch, "master"}}}, {node_package, ".*", {git, "git://github.com/basho/node_package.git", {branch, "develop"}}}, {eper, ".*", {git, "git://github.com/basho/eper.git", {tag, "0.92-basho1"}}}, {cluster_info, ".*", {git, "git://github.com/basho/cluster_info", {branch, "develop"}}} diff --git a/rel/files/app.config b/rel/files/app.config index c65c526..eb330f3 100644 --- a/rel/files/app.config +++ b/rel/files/app.config @@ -1,25 +1,31 @@ [ {machi, [ %% Data directory for all FLUs. - {flu_data_dir, "{{platform_data_dir}}"}, + {flu_data_dir, "{{platform_data_dir}}/flu"}, + + %% FLU config directory + {flu_config_dir, "{{platform_etc_dir}}/flu-config"}, + + %% Chain config directory + {chain_config_dir, "{{platform_etc_dir}}/chain-config"}, %% FLUs to start at app start. - {initial_flus, [ - %% Remember, this is a list, so separate all tuples - %% with a comma. - %% - %% {Name::atom(), Port::pos_integer(), proplist()} - %% - %% For example: {my_name_is_a, 12500, []} - - ]}, + %% This task has moved to machi_flu_sup and machi_lifecycle_mgr. %% Number of metadata manager processes to run per FLU. %% Default = 10 %% {metadata_manager_count, 2}, + %% Platform vars (mirror of reltool packaging) + {platform_data_dir, "{{platform_data_dir}}"}, + {platform_etc_dir, "{{platform_etc_dir}}"}, + %% Do not delete, do not put Machi config items after this line. {final_comma_stopper, do_not_delete} ] + }, + {lager, [ + {error_logger_hwm, 5000} % lager's default of 50/sec is too low + ] } ]. diff --git a/rel/files/machi-admin b/rel/files/machi-admin index 627705f..fd07634 100755 --- a/rel/files/machi-admin +++ b/rel/files/machi-admin @@ -22,23 +22,41 @@ cd $RUNNER_BASE_DIR SCRIPT=`basename $0` usage() { - echo "Usage: $SCRIPT { test | " + echo "Usage: $SCRIPT { quick-admin-check | quick-admin-apply | " echo " top }" } case "$1" in - test) + quick-admin-check) # Make sure the local node IS running node_up_check shift - # Parse out the node name to pass to the client - NODE_NAME=${NAME_ARG#* } + NODE_NAME=${NAME_ARG#* } # target machi server node name + IN_FILE="$1" - $ERTS_PATH/erl -noshell $NAME_PARAM machi_test$NAME_HOST $COOKIE_ARG \ - -pa $RUNNER_LIB_DIR/basho-patches \ - -eval "case catch(machi:client_test(\"$NODE_NAME\")) of \ + $ERTS_PATH/erl -noshell -noinput $NAME_PARAM machi_test$NAME_HOST $COOKIE_ARG \ + -remsh $NODE_NAME \ + -eval "Me = self(), spawn('"$NODE_NAME"', fun() -> X = (catch(machi_lifecycle_mgr:quick_admin_sanity_check(\"$IN_FILE\"))), Me ! {res, X} end), XX = receive {res, Res} -> Res after 10*1000 -> timeout end, io:format(user, \"Result: ~p\n\", [XX]), case XX of \ + ok -> init:stop(); \ + _ -> init:stop(1) \ + end." + + ;; + quick-admin-apply) + # Make sure the local node IS running + node_up_check + + shift + + NODE_NAME=${NAME_ARG#* } # target machi server node name + IN_FILE="$1" + RELATIVE_HOST="$2" + + $ERTS_PATH/erl -noshell -noinput $NAME_PARAM machi_test$NAME_HOST $COOKIE_ARG \ + -remsh $NODE_NAME \ + -eval "Me = self(), spawn('"$NODE_NAME"', fun() -> X = (catch(machi_lifecycle_mgr:quick_admin_apply(\"$IN_FILE\", \"$RELATIVE_HOST\"))), Me ! {res, X} end), XX = receive {res, Res} -> Res after 10*1000 -> timeout end, io:format(user, \"Result: ~p\n\", [XX]), case XX of \ ok -> init:stop(); \ _ -> init:stop(1) \ end." diff --git a/rel/reltool.config b/rel/reltool.config index 1385ca1..33df951 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -47,6 +47,7 @@ {overlay, [ {mkdir, "data"}, + {mkdir, "data/^PRESERVE"}, {mkdir, "log"}, %% Copy base files for starting and interacting w/ node @@ -93,6 +94,19 @@ {template, "files/vm.args", "etc/vm.args"}, {template, "files/app.config", "etc/app.config"}, + {mkdir, "etc/chain-config"}, + {mkdir, "etc/flu-config"}, + {mkdir, "etc/pending"}, + {mkdir, "etc/rejected"}, + + %% Experiment: quick-admin + {mkdir, "etc/quick-admin-archive"}, + {mkdir, "priv"}, + {mkdir, "priv/quick-admin-examples"}, + {copy, "../priv/quick-admin-examples/000", "priv/quick-admin-examples"}, + {copy, "../priv/quick-admin-examples/001", "priv/quick-admin-examples"}, + {copy, "../priv/quick-admin-examples/002", "priv/quick-admin-examples"}, + {mkdir, "lib/basho-patches"} %% {copy, "../apps/machi/ebin/etop_txt.beam", "lib/basho-patches"} ]}. diff --git a/src/machi.proto b/src/machi.proto index 6228865..2fa8657 100644 --- a/src/machi.proto +++ b/src/machi.proto @@ -170,12 +170,11 @@ message Mpb_AuthResp { // High level API: append_chunk() request & response message Mpb_AppendChunkReq { - required string coc_namespace = 1; - required uint32 coc_locator = 2; - required string prefix = 3; - required bytes chunk = 4; - required Mpb_ChunkCSum csum = 5; - optional uint32 chunk_extra = 6; + optional bytes placement_key = 1; + required string prefix = 2; + required bytes chunk = 3; + required Mpb_ChunkCSum csum = 4; + optional uint32 chunk_extra = 5; } message Mpb_AppendChunkResp { @@ -331,18 +330,17 @@ message Mpb_ProjectionV1 { required uint32 epoch_number = 1; required bytes epoch_csum = 2; required string author_server = 3; - repeated string all_members = 4; - repeated string witnesses = 5; - required Mpb_Now creation_time = 6; - required Mpb_Mode mode = 7; - repeated string upi = 8; - repeated string repairing = 9; - repeated string down = 10; - optional bytes opaque_flap = 11; - optional bytes opaque_inner = 12; - required bytes opaque_dbg = 13; - required bytes opaque_dbg2 = 14; - repeated Mpb_MembersDictEntry members_dict = 15; + required string chain_name = 4; + repeated string all_members = 5; + repeated string witnesses = 6; + required Mpb_Now creation_time = 7; + required Mpb_Mode mode = 8; + repeated string upi = 9; + repeated string repairing = 10; + repeated string down = 11; + required bytes opaque_dbg = 12; + required bytes opaque_dbg2 = 13; + repeated Mpb_MembersDictEntry members_dict = 14; } ////////////////////////////////////////// @@ -379,13 +377,11 @@ message Mpb_ProjectionV1 { message Mpb_LL_AppendChunkReq { required Mpb_EpochID epoch_id = 1; - /* To avoid CoC use, use coc_namespace="" and coc_locator=0 */ - required string coc_namespace = 2; - required uint32 coc_locator = 3; - required string prefix = 4; - required bytes chunk = 5; - required Mpb_ChunkCSum csum = 6; - optional uint32 chunk_extra = 7; + optional bytes placement_key = 2; + required string prefix = 3; + required bytes chunk = 4; + required Mpb_ChunkCSum csum = 5; + optional uint32 chunk_extra = 6; } message Mpb_LL_AppendChunkResp { diff --git a/src/machi_admin_util.erl b/src/machi_admin_util.erl index 46f6c3d..fb6dedb 100644 --- a/src/machi_admin_util.erl +++ b/src/machi_admin_util.erl @@ -100,7 +100,7 @@ verify_file_checksums_common(Sock1, EpochID, File, ReadChunk) -> try case ?FLU_C:checksum_list(Sock1, EpochID, File) of {ok, InfoBin} -> - Info = machi_csum_table:split_checksum_list_blob_decode(InfoBin), + {Info, _} = machi_csum_table:split_checksum_list_blob_decode(InfoBin), Res = lists:foldl(verify_chunk_checksum(File, ReadChunk), [], Info), {ok, Res}; @@ -115,9 +115,7 @@ verify_file_checksums_common(Sock1, EpochID, File, ReadChunk) -> end. verify_chunk_checksum(File, ReadChunk) -> - fun({0, ?MINIMUM_OFFSET, none}, []) -> - []; - ({Offset, Size, <<_Tag:1/binary, CSum/binary>>}, Acc) -> + fun({Offset, Size, <<_Tag:1/binary, CSum/binary>>}, Acc) -> case ReadChunk(File, Offset, Size) of {ok, {[{_, Offset, Chunk, _}], _}} -> CSum2 = machi_util:checksum_chunk(Chunk), diff --git a/src/machi_basho_bench_driver.erl b/src/machi_basho_bench_driver.erl index 4d36328..2cf9e39 100644 --- a/src/machi_basho_bench_driver.erl +++ b/src/machi_basho_bench_driver.erl @@ -136,7 +136,7 @@ load_ets_table(Conn, ETS) -> {ok, Fs} = machi_cr_client:list_files(Conn), [begin {ok, InfoBin} = machi_cr_client:checksum_list(Conn, File), - PosList = machi_csum_table:split_checksum_list_blob_decode(InfoBin), + {PosList, _} = machi_csum_table:split_checksum_list_blob_decode(InfoBin), StartKey = ets:update_counter(ETS, max_key, 0), %% _EndKey = lists:foldl(fun({Off,Sz,CSum}, K) -> %% V = {File, Off, Sz, CSum}, diff --git a/src/machi_chain_manager1.erl b/src/machi_chain_manager1.erl index 157ac5b..9068958 100644 --- a/src/machi_chain_manager1.erl +++ b/src/machi_chain_manager1.erl @@ -108,7 +108,7 @@ %% API -export([start_link/2, start_link/3, stop/1, ping/1, - set_chain_members/2, set_chain_members/3, set_active/2, + set_chain_members/2, set_chain_members/6, set_active/2, trigger_react_to_env/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, format_status/2, code_change/3]). @@ -168,13 +168,22 @@ ping(Pid) -> %% with lowest rank, i.e. name z* first, name a* last. set_chain_members(Pid, MembersDict) -> - set_chain_members(Pid, MembersDict, []). + set_chain_members(Pid, ch0_name, 0, ap_mode, MembersDict, []). -set_chain_members(Pid, MembersDict, Witness_list) -> - case lists:all(fun(Witness) -> orddict:is_key(Witness, MembersDict) end, - Witness_list) of +set_chain_members(Pid, ChainName, OldEpoch, CMode, MembersDict, Witness_list) + when is_atom(ChainName) andalso + is_integer(OldEpoch) andalso OldEpoch >= 0 andalso + (CMode == ap_mode orelse CMode == cp_mode) andalso + is_list(MembersDict) andalso + is_list(Witness_list) -> + case lists:all(fun({X, #p_srvr{name=X}}) -> true; + (_) -> false + end, MembersDict) + andalso + lists:all(fun(Witness) -> orddict:is_key(Witness, MembersDict) end, + Witness_list) of true -> - Cmd = {set_chain_members, MembersDict, Witness_list}, + Cmd = {set_chain_members, ChainName, OldEpoch, CMode, MembersDict, Witness_list}, gen_server:call(Pid, Cmd, infinity); false -> {error, bad_arg} @@ -281,7 +290,7 @@ init({MyName, InitMembersDict, MgrOpts}) -> last_down=[no_such_server_initial_value_only], fitness_svr=machi_flu_psup:make_fitness_regname(MyName) }, Proj), - {_, S2} = do_set_chain_members_dict(MembersDict, S), + S2 = do_set_chain_members_dict(MembersDict, S), S3 = if ActiveP == false -> S2; ActiveP == true -> @@ -291,12 +300,17 @@ init({MyName, InitMembersDict, MgrOpts}) -> handle_call({ping}, _From, S) -> {reply, pong, S}; -handle_call({set_chain_members, MembersDict, Witness_list}, _From, +handle_call({set_chain_members, SetChainName, SetOldEpoch, CMode, + MembersDict, Witness_list}, _From, #ch_mgr{name=MyName, proj=#projection_v1{all_members=OldAll_list, epoch_number=OldEpoch, + chain_name=ChainName, upi=OldUPI}=OldProj}=S) -> - {Reply, S2} = do_set_chain_members_dict(MembersDict, S), + true = (OldEpoch == 0) % in this case we want unconditional set of ch name + orelse + (SetOldEpoch == OldEpoch andalso SetChainName == ChainName), + S2 = do_set_chain_members_dict(MembersDict, S), %% TODO: should there be any additional sanity checks? Right now, %% if someone does something bad, then do_react_to_env() will %% crash, which will crash us, and we'll restart in a sane & old @@ -310,10 +324,10 @@ handle_call({set_chain_members, MembersDict, Witness_list}, _From, {NUPI, All_list -- NUPI} end, NewEpoch = OldEpoch + ?SET_CHAIN_MEMBERS_EPOCH_SKIP, - CMode = calc_consistency_mode(Witness_list), ok = set_consistency_mode(machi_flu_psup:make_proj_supname(MyName), CMode), NewProj = machi_projection:update_checksum( OldProj#projection_v1{author_server=MyName, + chain_name=SetChainName, creation_time=now(), mode=CMode, epoch_number=NewEpoch, @@ -325,7 +339,11 @@ handle_call({set_chain_members, MembersDict, Witness_list}, _From, members_dict=MembersDict}), S3 = set_proj(S2#ch_mgr{proj_history=queue:new(), consistency_mode=CMode}, NewProj), - {_QQ, S4} = do_react_to_env(S3), + {Res, S4} = do_react_to_env(S3), + Reply = case Res of + {_,_,_} -> ok; + _ -> Res + end, {reply, Reply, S4}; handle_call({set_active, Boolean}, _From, #ch_mgr{timer=TRef}=S) -> case {Boolean, TRef} of @@ -357,8 +375,8 @@ handle_call({test_read_latest_public_projection, ReadRepairP}, _From, S) -> {reply, Res, S2}; handle_call({trigger_react_to_env}=Call, _From, S) -> gobble_calls(Call), - {TODOtodo, S2} = do_react_to_env(S), - {reply, TODOtodo, S2}; + {Res, S2} = do_react_to_env(S), + {reply, Res, S2}; handle_call(_Call, _From, S) -> io:format(user, "\nBad call to ~p: ~p\n", [S#ch_mgr.name, _Call]), {reply, whaaaaaaaaaa, S}. @@ -535,6 +553,7 @@ cl_write_public_proj2(FLUs, Partitions, Epoch, Proj, IgnoreWrittenErrorP, S) -> end end, {true, []}, FLUs), %% io:format(user, "\nWrite public ~w by ~w: ~w\n", [Epoch, S#ch_mgr.name, Rs]), + %% io:format(user, "mgr ~w epoch ~w Rs ~p\n", [S#ch_mgr.name, Epoch, Rs]), {{remote_write_results, Rs}, S}. do_cl_read_latest_public_projection(ReadRepairP, @@ -556,12 +575,41 @@ do_cl_read_latest_public_projection(ReadRepairP, read_latest_projection_call_only(ProjectionType, AllHosed, #ch_mgr{proj=CurrentProj}=S) -> #projection_v1{all_members=All_list} = CurrentProj, - All_queried_list = All_list -- AllHosed, + All_queried_list = lists:sort(All_list -- AllHosed), + read_latest_projection_call_only1(ProjectionType, AllHosed, + All_queried_list, S). - {Rs, S2} = read_latest_projection_call_only2(ProjectionType, - All_queried_list, S), - FLUsRs = lists:zip(All_queried_list, Rs), - {All_queried_list, FLUsRs, S2}. +read_latest_projection_call_only1(ProjectionType, AllHosed, + All_queried_list, S) -> + {Rs_tmp, S2} = read_latest_projection_call_only2(ProjectionType, + All_queried_list, S), + New_all_maybe = + lists:usort( + lists:flatten( + [A_l || #projection_v1{all_members=A_l} <- Rs_tmp])) -- AllHosed, + case New_all_maybe -- All_queried_list of + [] -> + FLUsRs = lists:zip(All_queried_list, Rs_tmp), + {All_queried_list, FLUsRs, S2}; + [AnotherFLU|_] -> + %% Stop AnotherFLU proxy, in unexpected case where it's open + try + Proxy = proxy_pid(AnotherFLU, S2), + ?FLU_PC:stop_proxies([Proxy]) + catch _:_ -> ok + end, + MD = orddict:from_list( + lists:usort( + lists:flatten( + [orddict:to_list(D) || #projection_v1{members_dict=D} <- Rs_tmp]))), + Another_P_srvr = orddict:fetch(AnotherFLU, MD), + {ok, Proxy2} = ?FLU_PC:start_link(Another_P_srvr), + S3 = S2#ch_mgr{proxies_dict=orddict:store(AnotherFLU, Proxy2, + S2#ch_mgr.proxies_dict)}, + read_latest_projection_call_only1( + ProjectionType, AllHosed, + lists:usort([AnotherFLU|All_queried_list]), S3) + end. read_latest_projection_call_only2(ProjectionType, All_queried_list, S) -> {_UpNodes, Partitions, S2} = calc_up_nodes(S), @@ -601,6 +649,8 @@ rank_and_sort_projections_with_extra(All_queried_list, FLUsRs, ProjectionType, Witness_list = CurrentProj#projection_v1.witnesses, NoneProj = make_none_projection(0, MyName, [], Witness_list, orddict:new()), + ChainName = CurrentProj#projection_v1.chain_name, + NoneProj2 = NoneProj#projection_v1{chain_name=ChainName}, Extra2 = [{all_members_replied, true}, {all_queried_list, All_queried_list}, {flus_rs, FLUsRs}, @@ -609,7 +659,7 @@ rank_and_sort_projections_with_extra(All_queried_list, FLUsRs, ProjectionType, {bad_answer_flus, BadAnswerFLUs}, {bad_answers, BadAnswers}, {not_unanimous_answers, []}], - {not_unanimous, NoneProj, Extra2, S}; + {not_unanimous, NoneProj2, Extra2, S}; ProjectionType == public, UnwrittenRs /= [] -> {needs_repair, FLUsRs, [flarfus], S}; true -> @@ -723,13 +773,14 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, runenv=RunEnv1, repair_final_status=RepairFS}=S) -> #projection_v1{epoch_number=OldEpochNum, + chain_name=ChainName, members_dict=MembersDict, witnesses=OldWitness_list, upi=OldUPI_list, repairing=OldRepairing_list } = LastProj, LastUp = lists:usort(OldUPI_list ++ OldRepairing_list), - AllMembers = (S#ch_mgr.proj)#projection_v1.all_members, + AllMembers = CurrentProj#projection_v1.all_members, {Up0, Partitions, RunEnv2} = calc_up_nodes(MyName, AllMembers, RunEnv1), Up = Up0 -- AllHosed, @@ -821,10 +872,11 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, end, ?REACT({calc,?LINE,[{new_upi, NewUPI},{new_rep, NewRepairing}]}), - P = machi_projection:new(OldEpochNum + 1, - MyName, MembersDict, Down, NewUPI, NewRepairing, - D_foo ++ - Dbg ++ [{ps, Partitions},{nodes_up, Up}]), + P0 = machi_projection:new(OldEpochNum + 1, + MyName, MembersDict, Down, NewUPI, NewRepairing, + D_foo ++ + Dbg ++ [{ps, Partitions},{nodes_up, Up}]), + P1 = P0#projection_v1{chain_name=ChainName}, P2 = if CMode == cp_mode -> UpWitnesses = [W || W <- Up, lists:member(W, OldWitness_list)], Majority = full_majority_size(AllMembers), @@ -833,7 +885,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, SoFar = length(NewUPI ++ NewRepairing), if SoFar >= Majority -> ?REACT({calc,?LINE,[]}), - P; + P1; true -> Need = Majority - SoFar, UpWitnesses = [W || W <- Up, @@ -842,7 +894,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, Ws = lists:sublist(UpWitnesses, Need), ?REACT({calc,?LINE,[{ws, Ws}]}), machi_projection:update_checksum( - P#projection_v1{upi=Ws++NewUPI}); + P1#projection_v1{upi=Ws++NewUPI}); true -> ?REACT({calc,?LINE,[]}), P_none0 = make_none_projection( @@ -855,6 +907,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, "Not enough witnesses are available now" end, P_none1 = P_none0#projection_v1{ + chain_name=ChainName, %% Stable creation time! creation_time={1,2,3}, dbg=[{none_projection,true}, @@ -875,7 +928,7 @@ calc_projection2(LastProj, RelativeToServer, AllHosed, Dbg, end; CMode == ap_mode -> ?REACT({calc,?LINE,[]}), - P + P1 end, P3 = machi_projection:update_checksum( P2#projection_v1{mode=CMode, witnesses=OldWitness_list}), @@ -1027,31 +1080,33 @@ rank_projection(#projection_v1{author_server=_Author, do_set_chain_members_dict(MembersDict, #ch_mgr{proxies_dict=OldProxiesDict}=S)-> _ = ?FLU_PC:stop_proxies(OldProxiesDict), ProxiesDict = ?FLU_PC:start_proxies(MembersDict), - {ok, S#ch_mgr{members_dict=MembersDict, - proxies_dict=ProxiesDict}}. + S#ch_mgr{members_dict=MembersDict, + proxies_dict=ProxiesDict}. do_react_to_env(#ch_mgr{name=MyName, proj=#projection_v1{epoch_number=Epoch, members_dict=[]=OldDict}=OldProj, opts=Opts}=S) -> + put(ttt, [?LINE]), %% Read from our local *public* projection store. If some other %% chain member has written something there, and if we are a %% member of that chain, then we'll adopt that projection and then %% start actively humming in that chain. - {NewMembersDict, NewProj} = + {NewMD, NewProj} = get_my_public_proj_boot_info(Opts, OldDict, OldProj), - case orddict:is_key(MyName, NewMembersDict) of + case orddict:is_key(MyName, NewMD) of false -> - {{empty_members_dict, [], Epoch}, S}; + {{empty_members_dict1, [], Epoch}, S}; true -> - {_, S2} = do_set_chain_members_dict(NewMembersDict, S), - CMode = calc_consistency_mode(NewProj#projection_v1.witnesses), - {{empty_members_dict, [], Epoch}, - set_proj(S2#ch_mgr{members_dict=NewMembersDict, - consistency_mode=CMode}, NewProj)} + CMode = NewProj#projection_v1.mode, + S2 = do_set_chain_members_dict(NewMD, S), + {Reply, S3} = react_to_env_C110(NewProj, + S2#ch_mgr{members_dict=NewMD, + consistency_mode=CMode}), + {Reply, S3} end; do_react_to_env(S) -> -put(ttt, [?LINE]), + put(ttt, [?LINE]), %% The not_sanes manager counting dictionary is not strictly %% limited to flapping scenarios. (Though the mechanism first %% started as a way to deal with rare flapping scenarios.) @@ -1150,7 +1205,7 @@ react_to_env_A10(S) -> ?REACT(a10), react_to_env_A20(0, poll_private_proj_is_upi_unanimous(S)). -react_to_env_A20(Retries, #ch_mgr{name=MyName}=S) -> +react_to_env_A20(Retries, #ch_mgr{name=MyName, proj=P_current}=S) -> ?REACT(a20), init_remember_down_list(), {UnanimousTag, P_latest, ReadExtra, S2} = @@ -1178,17 +1233,34 @@ react_to_env_A20(Retries, #ch_mgr{name=MyName}=S) -> false when P_latest#projection_v1.epoch_number /= LastComplaint, P_latest#projection_v1.all_members /= [] -> put(rogue_server_epoch, P_latest#projection_v1.epoch_number), - error_logger:info_msg("Chain manager ~p found latest public " - "projection ~p has author ~p has a " - "members list ~p that does not include me.\n", + error_logger:info_msg("Chain manager ~w found latest public " + "projection ~w with author ~w has a " + "members list ~w that does not include me. " + "We assume this is a result of administrator " + "action and will thus wedge ourselves until " + "we are re-added to the chain or shutdown.\n", [S#ch_mgr.name, P_latest#projection_v1.epoch_number, P_latest#projection_v1.author_server, - P_latest#projection_v1.all_members]); + P_latest#projection_v1.all_members]), + EpochID = machi_projection:make_epoch_id(P_current), + ProjStore = get_projection_store_pid_or_regname(S), + {ok, NotifyPid} = machi_projection_store:get_wedge_notify_pid(ProjStore), + _QQ = machi_flu1:update_wedge_state(NotifyPid, true, EpochID), + #projection_v1{epoch_number=Epoch, + chain_name=ChainName, + all_members=All_list, + witnesses=Witness_list, + members_dict=MembersDict} = P_current, + P_none0 = make_none_projection(Epoch, + MyName, All_list, Witness_list, MembersDict), + P_none = P_none0#projection_v1{chain_name=ChainName}, + {{now_using,[],Epoch}, set_proj(S2, P_none)}; _ -> - ok - end, + react_to_env_A21(Retries, UnanimousTag, P_latest, ReadExtra, S2) + end. +react_to_env_A21(Retries, UnanimousTag, P_latest, ReadExtra, S) -> %% The UnanimousTag isn't quite sufficient for our needs. We need %% to determine if *all* of the UPI+Repairing FLUs are members of %% the unanimous server replies. All Repairing FLUs should be up @@ -1233,7 +1305,7 @@ react_to_env_A20(Retries, #ch_mgr{name=MyName}=S) -> true -> exit({badbad, UnanimousTag}) end, - react_to_env_A29(Retries, P_latest, LatestUnanimousP, ReadExtra, S2). + react_to_env_A29(Retries, P_latest, LatestUnanimousP, ReadExtra, S). react_to_env_A29(Retries, P_latest, LatestUnanimousP, _ReadExtra, #ch_mgr{consistency_mode=CMode, @@ -1267,7 +1339,6 @@ react_to_env_A29(Retries, P_latest, LatestUnanimousP, _ReadExtra, ?REACT({a29, ?LINE, [{zerf_backstop, true}, {zerf_in, machi_projection:make_summary(Zerf)}]}), - %% io:format(user, "zerf_in: A29: ~p: ~w\n\t~p\n", [MyName, machi_projection:make_summary(Zerf), get(yyy_hack)]), #projection_v1{dbg=ZerfDbg} = Zerf, Backstop = if Zerf#projection_v1.upi == [] -> []; @@ -1287,7 +1358,8 @@ react_to_env_A29(Retries, P_latest, LatestUnanimousP, _ReadExtra, end. react_to_env_A30(Retries, P_latest, LatestUnanimousP, P_current_calc, - #ch_mgr{name=MyName, consistency_mode=CMode} = S) -> + #ch_mgr{name=MyName, proj=P_current, + consistency_mode=CMode} = S) -> V = case file:read_file("/tmp/moomoo."++atom_to_list(S#ch_mgr.name)) of {ok,_} -> true; _ -> false end, if V -> io:format(user, "A30: ~w: ~p\n", [S#ch_mgr.name, get(react)]); true -> ok end, ?REACT(a30), @@ -1307,15 +1379,17 @@ react_to_env_A30(Retries, P_latest, LatestUnanimousP, P_current_calc, P = #projection_v1{down=Down} = make_none_projection(Epoch + 1, MyName, All_list, Witness_list, MembersDict), + ChainName = P_current#projection_v1.chain_name, + P1 = P#projection_v1{chain_name=ChainName}, P_newprop = if CMode == ap_mode -> %% Not really none proj: just myself, AP style machi_projection:update_checksum( - P#projection_v1{upi=[MyName], + P1#projection_v1{upi=[MyName], down=Down -- [MyName], dbg=[{hosed_list,AllHosed}]}); CMode == cp_mode -> machi_projection:update_checksum( - P#projection_v1{dbg=[{hosed_list,AllHosed}]}) + P1#projection_v1{dbg=[{hosed_list,AllHosed}]}) end, react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP, P_current_calc, true, S); @@ -1388,13 +1462,22 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP, %% we have a disagreement. not ordsets:is_disjoint(P_latest_s, Down_s) end, + AmExcludedFromLatestAll_p = + P_latest#projection_v1.epoch_number /= 0 + andalso + (not lists:member(MyName, P_latest#projection_v1.all_members)), ?REACT({a40, ?LINE, [{latest_author, P_latest#projection_v1.author_server}, + {am_excluded_from_latest_all_p, AmExcludedFromLatestAll_p}, {author_is_down_p, LatestAuthorDownP}, {rank_latest, Rank_latest}, {rank_newprop, Rank_newprop}]}), if + AmExcludedFromLatestAll_p -> + ?REACT({a40, ?LINE, [{latest,machi_projection:make_summary(P_latest)}]}), + react_to_env_A50(P_latest, [], S); + AmHosedP -> ExpectedUPI = if CMode == cp_mode -> []; CMode == ap_mode -> [MyName] @@ -1560,12 +1643,10 @@ react_to_env_A40(Retries, P_newprop, P_latest, LatestUnanimousP, end, if GoTo50_p -> ?REACT({a40, ?LINE, []}), -%% io:format(user, "CONFIRM debug question line ~w\n", [?LINE]), FinalProps = [{throttle_seconds, 0}], react_to_env_A50(P_latest, FinalProps, S); true -> ?REACT({a40, ?LINE, []}), -io:format(user, "CONFIRM debug question line ~w\n", [?LINE]), react_to_env_C300(P_newprop, P_latest, S) end end. @@ -1575,7 +1656,6 @@ react_to_env_A50(P_latest, FinalProps, #ch_mgr{proj=P_current}=S) -> ?REACT({a50, ?LINE, [{current_epoch, P_current#projection_v1.epoch_number}, {latest_epoch, P_latest#projection_v1.epoch_number}, {final_props, FinalProps}]}), - %% if S#ch_mgr.name == c -> io:format(user, "A50: ~w: ~p\n", [S#ch_mgr.name, get(react)]); true -> ok end, V = case file:read_file("/tmp/moomoo."++atom_to_list(S#ch_mgr.name)) of {ok,_} -> true; _ -> false end, if V -> io:format(user, "A50: ~w: ~p\n", [S#ch_mgr.name, get(react)]); true -> ok end, {{no_change, FinalProps, P_current#projection_v1.epoch_number}, S}. @@ -1850,7 +1930,9 @@ react_to_env_C103(#projection_v1{epoch_number=_Epoch_newprop} = _P_newprop, members_dict=MembersDict} = P_current, P_none0 = make_none_projection(Epoch_latest, MyName, All_list, Witness_list, MembersDict), - P_none1 = P_none0#projection_v1{dbg=[{none_projection,true}]}, + ChainName = P_current#projection_v1.chain_name, + P_none1 = P_none0#projection_v1{chain_name=ChainName, + dbg=[{none_projection,true}]}, P_none = machi_projection:update_checksum(P_none1), ?REACT({c103, ?LINE, [{current_epoch, P_current#projection_v1.epoch_number}, @@ -2206,6 +2288,7 @@ projection_transition_is_sane_except_si_epoch( creation_time=CreationTime1, mode=CMode1, author_server=AuthorServer1, + chain_name=ChainName1, all_members=All_list1, witnesses=Witness_list1, down=Down_list1, @@ -2217,6 +2300,7 @@ projection_transition_is_sane_except_si_epoch( creation_time=CreationTime2, mode=CMode2, author_server=AuthorServer2, + chain_name=ChainName2, all_members=All_list2, witnesses=Witness_list2, down=Down_list2, @@ -2237,7 +2321,8 @@ projection_transition_is_sane_except_si_epoch( true = is_binary(CSum1) andalso is_binary(CSum2), {_,_,_} = CreationTime1, {_,_,_} = CreationTime2, - true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), % todo type may change? + true = is_atom(AuthorServer1) andalso is_atom(AuthorServer2), + true = is_atom(ChainName1) andalso is_atom(ChainName2), true = is_list(All_list1) andalso is_list(All_list2), true = is_list(Witness_list1) andalso is_list(Witness_list2), true = is_list(Down_list1) andalso is_list(Down_list2), @@ -2249,6 +2334,9 @@ projection_transition_is_sane_except_si_epoch( %% projection_transition_is_sane_with_si_epoch(). true = Epoch2 >= Epoch1, + %% Don't change chain names in the middle of the stream. + true = (ChainName1 == ChainName2), + %% No duplicates true = lists:sort(Witness_list2) == lists:usort(Witness_list2), true = lists:sort(Down_list2) == lists:usort(Down_list2), @@ -2256,7 +2344,7 @@ projection_transition_is_sane_except_si_epoch( true = lists:sort(Repairing_list2) == lists:usort(Repairing_list2), %% Disjoint-ness - All_list1 = All_list2, % todo will probably change + %% %% %% %% %% %% %% %% All_list1 = All_list2, % todo will probably change %% true = lists:sort(All_list2) == lists:sort(Down_list2 ++ UPI_list2 ++ %% Repairing_list2), [] = [X || X <- Witness_list2, not lists:member(X, All_list2)], @@ -2361,8 +2449,7 @@ poll_private_proj_is_upi_unanimous_sleep(Count, #ch_mgr{runenv=RunEnv}=S) -> S2 end. -poll_private_proj_is_upi_unanimous3(#ch_mgr{name=MyName, proj=P_current, - opts=MgrOpts} = S) -> +poll_private_proj_is_upi_unanimous3(#ch_mgr{name=MyName, proj=P_current} = S) -> UPI = P_current#projection_v1.upi, EpochID = machi_projection:make_epoch_id(P_current), {Rs, S2} = read_latest_projection_call_only2(private, UPI, S), @@ -2395,12 +2482,7 @@ poll_private_proj_is_upi_unanimous3(#ch_mgr{name=MyName, proj=P_current, Annotation = make_annotation(EpochID, Now), NewDbg2 = [Annotation|P_currentFull#projection_v1.dbg2], NewProj = P_currentFull#projection_v1{dbg2=NewDbg2}, - ProjStore = case get_projection_store_regname(MgrOpts) of - undefined -> - machi_flu_psup:make_proj_supname(MyName); - PStr -> - PStr - end, + ProjStore = get_projection_store_pid_or_regname(S), #projection_v1{epoch_number=_EpochRep, epoch_csum= <<_CSumRep:4/binary,_/binary>>, upi=_UPIRep, @@ -2420,8 +2502,6 @@ poll_private_proj_is_upi_unanimous3(#ch_mgr{name=MyName, proj=P_current, S2 end; _Else -> - %% io:format(user, "poll by ~w: want ~W got ~W\n", - %% [MyName, EpochID, 6, _Else, 8]), S2 end. @@ -2518,8 +2598,8 @@ do_repair(#ch_mgr{name=MyName, T1 = os:timestamp(), RepairId = proplists:get_value(repair_id, Opts, id1), error_logger:info_msg( - "Repair start: tail ~p of ~p -> ~p, ~p ID ~w\n", - [MyName, UPI0, Repairing, RepairMode, RepairId]), + "Repair ~w start: tail ~p of ~p -> ~p, ~p\n", + [RepairId, MyName, UPI0, Repairing, RepairMode]), UPI = UPI0 -- Witness_list, Res = machi_chain_repair:repair(RepairMode, MyName, Repairing, UPI, @@ -2532,10 +2612,9 @@ do_repair(#ch_mgr{name=MyName, end, Stats = [{K, ets:lookup_element(ETS, K, 2)} || K <- ETS_T_Keys], error_logger:info_msg( - "Repair ~s: tail ~p of ~p finished ~p repair ID ~w: " - "~p\nStats ~p\n", - [Summary, MyName, UPI0, RepairMode, RepairId, - Res, Stats]), + "Repair ~w ~s: tail ~p of ~p finished ~p: " + "~p Stats: ~p\n", + [RepairId, Summary, MyName, UPI0, RepairMode, Res, Stats]), ets:delete(ETS), exit({repair_final_status, Res}); _ -> @@ -2772,6 +2851,7 @@ full_majority_size(L) when is_list(L) -> full_majority_size(length(L)). make_zerf(#projection_v1{epoch_number=OldEpochNum, + chain_name=ChainName, all_members=AllMembers, members_dict=MembersDict, witnesses=OldWitness_list @@ -2794,7 +2874,8 @@ make_zerf(#projection_v1{epoch_number=OldEpochNum, MyName, AllMembers, OldWitness_list, MembersDict), machi_projection:update_checksum( - P#projection_v1{mode=cp_mode, + P#projection_v1{chain_name=ChainName, + mode=cp_mode, dbg2=[zerf_none,{up,Up},{maj,MajoritySize}]}); true -> make_zerf2(OldEpochNum, Up, MajoritySize, MyName, @@ -2809,7 +2890,6 @@ make_zerf2(OldEpochNum, Up, MajoritySize, MyName, AllMembers, OldWitness_list, Proj2 = Proj#projection_v1{dbg2=[{make_zerf,Epoch}, {yyy_hack, get(yyy_hack)}, {up,Up},{maj,MajoritySize}]}, - %% io:format(user, "ZERF ~w\n",[machi_projection:make_summary(Proj2)]), Proj2 catch throw:{zerf,no_common} -> @@ -2916,11 +2996,6 @@ perhaps_verbose_c111(P_latest2, S) -> ok end. -calc_consistency_mode(_Witness_list = []) -> - ap_mode; -calc_consistency_mode(_Witness_list) -> - cp_mode. - set_proj(S, Proj) -> S#ch_mgr{proj=Proj, proj_unanimous=false}. @@ -2953,3 +3028,10 @@ get_unfit_list(FitnessServer) -> [] end. +get_projection_store_pid_or_regname(#ch_mgr{name=MyName, opts=MgrOpts}) -> + case get_projection_store_regname(MgrOpts) of + undefined -> + machi_flu_psup:make_proj_supname(MyName); + PStr -> + PStr + end. diff --git a/src/machi_chain_repair.erl b/src/machi_chain_repair.erl index 87ce73c..0e6606b 100644 --- a/src/machi_chain_repair.erl +++ b/src/machi_chain_repair.erl @@ -103,7 +103,8 @@ repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) -> Add = fun(Name, Pid) -> put(proxies_dict, orddict:store(Name, Pid, get(proxies_dict))) end, OurFLUs = lists:usort([Src] ++ Repairing ++ UPI), % AP assumption! RepairMode = proplists:get_value(repair_mode, Opts, repair), - Verb = proplists:get_value(verbose, Opts, true), + Verb = proplists:get_value(verbose, Opts, false), + RepairId = proplists:get_value(repair_id, Opts, id1), Res = try _ = [begin {ok, Proxy} = machi_proxy_flu1_client:start_link(P), @@ -116,31 +117,38 @@ repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) -> get_file_lists(Proxy, FLU, Dict) end, D, ProxiesDict), MissingFileSummary = make_missing_file_summary(D2, OurFLUs), - ?VERB("MissingFileSummary ~p\n", [MissingFileSummary]), + %% ?VERB("~w MissingFileSummary ~p\n",[RepairId,MissingFileSummary]), + lager:info("Repair ~w MissingFileSummary ~p\n", + [RepairId, MissingFileSummary]), [ets:insert(ETS, {{directive_bytes, FLU}, 0}) || FLU <- OurFLUs], %% Repair files from perspective of Src, i.e. tail(UPI). SrcProxy = orddict:fetch(Src, ProxiesDict), {ok, EpochID} = machi_proxy_flu1_client:get_epoch_id( SrcProxy, ?SHORT_TIMEOUT), - ?VERB("Make repair directives: "), + %% ?VERB("Make repair directives: "), Ds = [{File, make_repair_directives( ConsistencyMode, RepairMode, File, Size, EpochID, Verb, Src, OurFLUs, ProxiesDict, ETS)} || {File, {Size, _MissingList}} <- MissingFileSummary], - ?VERB(" done\n"), + %% ?VERB(" done\n"), + lager:info("Repair ~w repair directives finished\n", [RepairId]), [begin [{_, Bytes}] = ets:lookup(ETS, {directive_bytes, FLU}), - ?VERB("Out-of-sync data for FLU ~p: ~s MBytes\n", - [FLU, mbytes(Bytes)]) + %% ?VERB("Out-of-sync data for FLU ~p: ~s MBytes\n", + %% [FLU, mbytes(Bytes)]), + lager:info("Repair ~w " + "Out-of-sync data for FLU ~p: ~s MBytes\n", + [RepairId, FLU, mbytes(Bytes)]) end || FLU <- OurFLUs], - ?VERB("Execute repair directives: "), + %% ?VERB("Execute repair directives: "), ok = execute_repair_directives(ConsistencyMode, Ds, Src, EpochID, Verb, OurFLUs, ProxiesDict, ETS), - ?VERB(" done\n"), + %% ?VERB(" done\n"), + lager:info("Repair ~w repair directives finished\n", [RepairId]), ok catch What:Why -> @@ -209,7 +217,9 @@ make_repair_directives(ConsistencyMode, RepairMode, File, Size, EpochID, case machi_proxy_flu1_client:checksum_list( Proxy, EpochID, File, ?LONG_TIMEOUT) of {ok, InfoBin} -> - machi_csum_table:split_checksum_list_blob_decode(InfoBin); + {Info, _} = + machi_csum_table:split_checksum_list_blob_decode(InfoBin), + Info; {error, no_such_file} -> [] end, diff --git a/src/machi_cr_client.erl b/src/machi_cr_client.erl index cec7c6a..e03262b 100644 --- a/src/machi_cr_client.erl +++ b/src/machi_cr_client.erl @@ -119,9 +119,7 @@ -export([ %% File API append_chunk/3, append_chunk/4, - append_chunk/5, append_chunk/6, append_chunk_extra/4, append_chunk_extra/5, - append_chunk_extra/6, append_chunk_extra/7, write_chunk/4, write_chunk/5, read_chunk/5, read_chunk/6, trim_chunk/4, trim_chunk/5, @@ -166,29 +164,13 @@ start_link(P_srvr_list, Opts) -> %% with `Prefix'. append_chunk(PidSpec, Prefix, Chunk) -> - append_chunk_extra(PidSpec, ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, 0, ?DEFAULT_TIMEOUT). + append_chunk(PidSpec, Prefix, Chunk, ?DEFAULT_TIMEOUT). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix'. append_chunk(PidSpec, Prefix, Chunk, Timeout) -> - append_chunk_extra(PidSpec, ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, 0, Timeout). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - -append_chunk(PidSpec, CoC_Namespace, CoC_Locator, Prefix, Chunk) -> - append_chunk_extra(PidSpec, CoC_Namespace, CoC_Locator, - Prefix, Chunk, 0, ?DEFAULT_TIMEOUT). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - -append_chunk(PidSpec, CoC_Namespace, CoC_Locator, Prefix, Chunk, Timeout) -> - append_chunk_extra(PidSpec, CoC_Namespace, CoC_Locator, - Prefix, Chunk, 0, Timeout). + append_chunk_extra(PidSpec, Prefix, Chunk, 0, Timeout). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix'. @@ -202,25 +184,7 @@ append_chunk_extra(PidSpec, Prefix, Chunk, ChunkExtra) append_chunk_extra(PidSpec, Prefix, Chunk, ChunkExtra, Timeout0) -> {TO, Timeout} = timeout(Timeout0), - gen_server:call(PidSpec, {req, {append_chunk_extra, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, - Chunk, ChunkExtra, TO}}, - Timeout). - -append_chunk_extra(PidSpec, CoC_Namespace, CoC_Locator, Prefix, Chunk, ChunkExtra) - when is_integer(ChunkExtra), ChunkExtra >= 0 -> - append_chunk_extra(PidSpec, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, ?DEFAULT_TIMEOUT). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - -append_chunk_extra(PidSpec, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, Timeout0) -> - {TO, Timeout} = timeout(Timeout0), - gen_server:call(PidSpec, {req, {append_chunk_extra, - CoC_Namespace, CoC_Locator, Prefix, + gen_server:call(PidSpec, {req, {append_chunk_extra, Prefix, Chunk, ChunkExtra, TO}}, Timeout). @@ -324,10 +288,8 @@ code_change(_OldVsn, S, _Extra) -> %%%%%%%%%%%%%%%%%%%%%%%%%%% -handle_call2({append_chunk_extra, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, TO}, _From, S) -> - do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, 0, os:timestamp(), TO, S); +handle_call2({append_chunk_extra, Prefix, Chunk, ChunkExtra, TO}, _From, S) -> + do_append_head(Prefix, Chunk, ChunkExtra, 0, os:timestamp(), TO, S); handle_call2({write_chunk, File, Offset, Chunk, TO}, _From, S) -> do_write_head(File, Offset, Chunk, 0, os:timestamp(), TO, S); handle_call2({read_chunk, File, Offset, Size, Opts, TO}, _From, S) -> @@ -339,12 +301,9 @@ handle_call2({checksum_list, File, TO}, _From, S) -> handle_call2({list_files, TO}, _From, S) -> do_list_files(0, os:timestamp(), TO, S). -do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, 0=Depth, STime, TO, S) -> - do_append_head2(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth + 1, STime, TO, S); -do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, #state{proj=P}=S) -> +do_append_head(Prefix, Chunk, ChunkExtra, 0=Depth, STime, TO, S) -> + do_append_head2(Prefix, Chunk, ChunkExtra, Depth + 1, STime, TO, S); +do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, #state{proj=P}=S) -> %% io:format(user, "head sleep1,", []), sleep_a_while(Depth), DiffMs = timer:now_diff(os:timestamp(), STime) div 1000, @@ -359,62 +318,53 @@ do_append_head(CoC_Namespace, CoC_Locator, Prefix, case S2#state.proj of P2 when P2 == undefined orelse P2#projection_v1.upi == [] -> - do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth + 1, + do_append_head(Prefix, Chunk, ChunkExtra, Depth + 1, STime, TO, S2); _ -> - do_append_head2(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth + 1, + do_append_head2(Prefix, Chunk, ChunkExtra, Depth + 1, STime, TO, S2) end end. -do_append_head2(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, +do_append_head2(Prefix, Chunk, ChunkExtra, Depth, STime, TO, #state{proj=P}=S) -> [HeadFLU|_RestFLUs] = mutation_flus(P), case is_witness_flu(HeadFLU, P) of true -> case witnesses_use_our_epoch(S) of true -> - do_append_head3(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, + do_append_head3(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S); false -> %% Bummer, go back to the beginning and retry. - do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, + do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S) end; false -> - do_append_head3(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, S) + do_append_head3(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S) end. -do_append_head3(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, +do_append_head3(Prefix, Chunk, ChunkExtra, Depth, STime, TO, #state{epoch_id=EpochID, proj=P, proxies_dict=PD}=S) -> [HeadFLU|RestFLUs] = non_witness_flus(mutation_flus(P), P), Proxy = orddict:fetch(HeadFLU, PD), - case ?FLU_PC:append_chunk_extra(Proxy, EpochID, - CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, ?TIMEOUT) of + case ?FLU_PC:append_chunk_extra(Proxy, + EpochID, Prefix, Chunk, ChunkExtra, + ?TIMEOUT) of {ok, {Offset, _Size, File}=_X} -> - do_append_midtail(RestFLUs, CoC_Namespace, CoC_Locator, Prefix, - File, Offset, Chunk, ChunkExtra, + %% io:format(user, "append ~w,", [HeadFLU]), + do_append_midtail(RestFLUs, Prefix, File, Offset, Chunk, ChunkExtra, [HeadFLU], 0, STime, TO, S); {error, bad_checksum}=BadCS -> {reply, BadCS, S}; {error, Retry} when Retry == partition; Retry == bad_epoch; Retry == wedged -> - do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, S); + do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S); {error, written} -> %% Implicit sequencing + this error = we don't know where this %% written block is. But we lost a race. Repeat, with a new %% sequencer assignment. - do_append_head(CoC_Namespace, CoC_Locator, Prefix, - Chunk, ChunkExtra, Depth, STime, TO, S); + do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S); {error, trimmed} = Err -> %% TODO: behaviour {reply, Err, S}; @@ -423,15 +373,12 @@ do_append_head3(CoC_Namespace, CoC_Locator, Prefix, Prefix,iolist_size(Chunk)}) end. -do_append_midtail(RestFLUs, CoC_Namespace, CoC_Locator, Prefix, - File, Offset, Chunk, ChunkExtra, +do_append_midtail(RestFLUs, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth, STime, TO, S) when RestFLUs == [] orelse Depth == 0 -> - do_append_midtail2(RestFLUs, CoC_Namespace, CoC_Locator, Prefix, - File, Offset, Chunk, ChunkExtra, + do_append_midtail2(RestFLUs, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth + 1, STime, TO, S); -do_append_midtail(_RestFLUs, CoC_Namespace, CoC_Locator, Prefix, File, - Offset, Chunk, ChunkExtra, +do_append_midtail(_RestFLUs, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth, STime, TO, #state{proj=P}=S) -> %% io:format(user, "midtail sleep2,", []), sleep_a_while(Depth), @@ -458,43 +405,36 @@ do_append_midtail(_RestFLUs, CoC_Namespace, CoC_Locator, Prefix, File, if Prefix == undefined -> % atom! not binary()!! {error, partition}; true -> - do_append_head2(CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, + do_append_head2(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S2) end; RestFLUs3 -> - do_append_midtail2(RestFLUs3, - CoC_Namespace, CoC_Locator, - Prefix, File, Offset, + do_append_midtail2(RestFLUs3, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth + 1, STime, TO, S2) end end end. -do_append_midtail2([], _CoC_Namespace, _CoC_Locator, - _Prefix, File, Offset, Chunk, +do_append_midtail2([], _Prefix, File, Offset, Chunk, _ChunkExtra, _Ws, _Depth, _STime, _TO, S) -> %% io:format(user, "ok!\n", []), {reply, {ok, {Offset, chunk_wrapper_size(Chunk), File}}, S}; -do_append_midtail2([FLU|RestFLUs]=FLUs, CoC_Namespace, CoC_Locator, - Prefix, File, Offset, Chunk, +do_append_midtail2([FLU|RestFLUs]=FLUs, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth, STime, TO, #state{epoch_id=EpochID, proxies_dict=PD}=S) -> Proxy = orddict:fetch(FLU, PD), case ?FLU_PC:write_chunk(Proxy, EpochID, File, Offset, Chunk, ?TIMEOUT) of ok -> %% io:format(user, "write ~w,", [FLU]), - do_append_midtail2(RestFLUs, CoC_Namespace, CoC_Locator, Prefix, - File, Offset, Chunk, + do_append_midtail2(RestFLUs, Prefix, File, Offset, Chunk, ChunkExtra, [FLU|Ws], Depth, STime, TO, S); {error, bad_checksum}=BadCS -> %% TODO: alternate strategy? {reply, BadCS, S}; {error, Retry} when Retry == partition; Retry == bad_epoch; Retry == wedged -> - do_append_midtail(FLUs, CoC_Namespace, CoC_Locator, Prefix, - File, Offset, Chunk, + do_append_midtail(FLUs, Prefix, File, Offset, Chunk, ChunkExtra, Ws, Depth, STime, TO, S); {error, written} -> %% We know what the chunk ought to be, so jump to the @@ -559,8 +499,7 @@ do_write_head2(File, Offset, Chunk, Depth, STime, TO, ok -> %% From this point onward, we use the same code & logic path as %% append does. - do_append_midtail(RestFLUs, undefined, undefined, undefined, - File, Offset, Chunk, + do_append_midtail(RestFLUs, undefined, File, Offset, Chunk, undefined, [HeadFLU], 0, STime, TO, S); {error, bad_checksum}=BadCS -> {reply, BadCS, S}; diff --git a/src/machi_csum_table.erl b/src/machi_csum_table.erl index 509dd36..f119465 100644 --- a/src/machi_csum_table.erl +++ b/src/machi_csum_table.erl @@ -2,32 +2,34 @@ -export([open/2, find/3, - write/6, write/4, trim/5, + write/6, write/4, trim/5, trim/3, find_leftneighbor/2, find_rightneighbor/2, all_trimmed/3, any_trimmed/3, all_trimmed/2, + sync/1, calc_unwritten_bytes/1, split_checksum_list_blob_decode/1, - all/1, close/1, delete/1, foldl_chunks/3]). +-export([encode_csum_file_entry/3, encode_csum_file_entry_bin/3, + decode_csum_file_entry/1]). + -include("machi.hrl"). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-export([all/1]). -endif. -record(machi_csum_table, {file :: string(), - table :: eleveldb:db_ref()}). + fd :: file:io_device(), + table :: ets:tid()}). -type table() :: #machi_csum_table{}. -type byte_sequence() :: { Offset :: non_neg_integer(), Size :: pos_integer()|infinity }. --type chunk() :: {Offset :: machi_dt:file_offset(), - Size :: machi_dt:chunk_size(), - machi_dt:chunk_csum() | trimmed | none}. -export_type([table/0]). @@ -35,101 +37,100 @@ {ok, table()} | {error, file:posix()}. open(CSumFilename, _Opts) -> - LevelDBOptions = [{create_if_missing, true}, - %% Keep this table small so as not to interfere - %% operating system's file cache, which is for - %% Machi's main read efficiency - {total_leveldb_mem_percent, 10}], - {ok, T} = eleveldb:open(CSumFilename, LevelDBOptions), - %% Dummy entry for reserved headers - ok = eleveldb:put(T, - sext:encode({0, ?MINIMUM_OFFSET}), - sext:encode(?CSUM_TAG_NONE_ATOM), - [{sync, true}]), + T = ets:new(?MODULE, [private, ordered_set]), + CSum = machi_util:make_tagged_csum(none), + %% Dummy entry for headers + true = ets:insert_new(T, {0, ?MINIMUM_OFFSET, CSum}), C0 = #machi_csum_table{ file=CSumFilename, table=T}, - {ok, C0}. + case file:read_file(CSumFilename) of + {ok, Bin} -> + List = case split_checksum_list_blob_decode(Bin) of + {List0, <<>>} -> + List0; + {List0, _Junk} -> + %% Partially written, needs repair TODO + List0 + end, + %% assuming all entries are strictly ordered by offset, + %% trim command should always come after checksum entry. + %% *if* by any chance that order cound not be kept, we + %% still can do ordering check and monotonic merge here. + %% TODO: make some random injection tests? + [begin %% Replay all file contents, Same logic as in write/6 + Chunks = find(C0, Offset, Size), + lists:foreach(fun({O, _, _}) -> + ets:delete(T, O) + end, Chunks), + true = ets:insert(T, {Offset, Size, CsumOrTrimmed}) + end + || {Offset, Size, CsumOrTrimmed} <- List], + ok; + {error, enoent} -> + ok; + Error -> + throw(Error) + end, + {ok, Fd} = file:open(CSumFilename, [raw, binary, append]), + {ok, C0#machi_csum_table{fd=Fd}}. --spec split_checksum_list_blob_decode(binary())-> [chunk()]. -split_checksum_list_blob_decode(Bin) -> - erlang:binary_to_term(Bin). - - --define(has_overlap(LeftOffset, LeftSize, RightOffset, RightSize), - ((LeftOffset - (RightOffset+RightSize)) * (LeftOffset+LeftSize - RightOffset) < 0)). - --spec find(table(), machi_dt:file_offset(), machi_dt:chunk_size()) - -> [chunk()]. +-spec find(table(), machi_dt:file_offset(), machi_dt:file_size()) -> + list({machi_dt:file_offset(), + machi_dt:file_size(), + machi_dt:chunk_csum()|trimmed}). find(#machi_csum_table{table=T}, Offset, Size) -> - {ok, I} = eleveldb:iterator(T, [], keys_only), - EndKey = sext:encode({Offset+Size, 0}), - StartKey = sext:encode({Offset, Size}), + ets:select(T, [{{'$1', '$2', '$3'}, + [inclusion_match_spec(Offset, Size)], + ['$_']}]). - {ok, FirstKey} = case eleveldb:iterator_move(I, StartKey) of - {error, invalid_iterator} -> - eleveldb:iterator_move(I, first); - {ok, _} = R0 -> - case eleveldb:iterator_move(I, prev) of - {error, invalid_iterator} -> - R0; - {ok, _} = R1 -> - R1 - end - end, - _ = eleveldb:iterator_close(I), - FoldFun = fun({K, V}, Acc) -> - {TargetOffset, TargetSize} = sext:decode(K), - case ?has_overlap(TargetOffset, TargetSize, Offset, Size) of - true -> - [{TargetOffset, TargetSize, sext:decode(V)}|Acc]; - false -> - Acc - end; - (_K, Acc) -> - lager:error("~p wrong option", [_K]), - Acc - end, - lists:reverse(eleveldb_fold(T, FirstKey, EndKey, FoldFun, [])). +-ifdef(TEST). +all(#machi_csum_table{table=T}) -> + ets:tab2list(T). +-endif. - -%% @doc Updates all chunk info, by deleting existing entries if exists -%% and putting new chunk info --spec write(table(), - machi_dt:file_offset(), machi_dt:chunk_size(), - machi_dt:chunk_csum()|'none'|'trimmed', - undefined|chunk(), undefined|chunk()) -> - ok | {error, term()}. -write(#machi_csum_table{table=T} = CsumT, Offset, Size, CSum, +write(#machi_csum_table{fd=Fd, table=T} = CsumT, + Offset, Size, CSum, LeftUpdate, RightUpdate) -> - PutOps = - [{put, - sext:encode({Offset, Size}), - sext:encode(CSum)}] - ++ case LeftUpdate of - {LO, LS, LCsum} when LO + LS =:= Offset -> - [{put, - sext:encode({LO, LS}), - sext:encode(LCsum)}]; - undefined -> - [] - end - ++ case RightUpdate of - {RO, RS, RCsum} when RO =:= Offset + Size -> - [{put, - sext:encode({RO, RS}), - sext:encode(RCsum)}]; - undefined -> - [] - end, - Chunks = find(CsumT, Offset, Size), - DeleteOps = lists:map(fun({O, L, _}) -> - {delete, sext:encode({O, L})} + Binary = + [encode_csum_file_entry_bin(Offset, Size, CSum), + case LeftUpdate of + {LO, LS, LCsum} when LO + LS =:= Offset -> + encode_csum_file_entry_bin(LO, LS, LCsum); + undefined -> + <<>> + end, + case RightUpdate of + {RO, RS, RCsum} when RO =:= Offset + Size -> + encode_csum_file_entry_bin(RO, RS, RCsum); + undefined -> + <<>> + end], + case file:write(Fd, Binary) of + ok -> + Chunks = find(CsumT, Offset, Size), + lists:foreach(fun({O, _, _}) -> + ets:delete(T, O) end, Chunks), - eleveldb:write(T, DeleteOps ++ PutOps, [{sync, true}]). + case LeftUpdate of + {LO1, LS1, _} when LO1 + LS1 =:= Offset -> + ets:insert(T, LeftUpdate); + undefined -> noop + end, + case RightUpdate of + {RO1, _, _} when RO1 =:= Offset + Size -> + ets:insert(T, RightUpdate); + undefined -> noop + end, + true = ets:insert(T, {Offset, Size, CSum}), + ok; + Error -> + Error + end. -spec find_leftneighbor(table(), non_neg_integer()) -> - undefined | chunk(). + undefined | + {non_neg_integer(), machi_dt:chunk_size(), trimmed|machi_dt:chunk_csum()}. find_leftneighbor(CsumT, Offset) -> case find(CsumT, Offset, 1) of [] -> undefined; @@ -138,7 +139,8 @@ find_leftneighbor(CsumT, Offset) -> end. -spec find_rightneighbor(table(), non_neg_integer()) -> - undefined | chunk(). + undefined | + {non_neg_integer(), machi_dt:chunk_size(), trimmed|machi_dt:chunk_csum()}. find_rightneighbor(CsumT, Offset) -> case find(CsumT, Offset, 1) of [] -> undefined; @@ -148,46 +150,40 @@ find_rightneighbor(CsumT, Offset) -> end. -spec write(table(), machi_dt:file_offset(), machi_dt:file_size(), - machi_dt:chunk_csum()|none|trimmed) -> + machi_dt:chunk_csum()|trimmed) -> ok | {error, trimmed|file:posix()}. write(CsumT, Offset, Size, CSum) -> write(CsumT, Offset, Size, CSum, undefined, undefined). trim(CsumT, Offset, Size, LeftUpdate, RightUpdate) -> - write(CsumT, Offset, Size, - trimmed, %% Should this be much smaller like $t or just 't' - LeftUpdate, RightUpdate). + write(CsumT, Offset, Size, trimmed, LeftUpdate, RightUpdate). -%% @doc returns whether all bytes in a specific window is continously -%% trimmed or not --spec all_trimmed(table(), non_neg_integer(), non_neg_integer()) -> boolean(). -all_trimmed(#machi_csum_table{table=T}, Left, Right) -> - FoldFun = fun({_, _}, false) -> - false; - ({K, V}, Pos) when is_integer(Pos) andalso Pos =< Right -> - case {sext:decode(K), sext:decode(V)} of - {{Pos, Size}, trimmed} -> - Pos + Size; - {{Offset, Size}, _} - when Offset + Size =< Left -> - Left; - _Eh -> - false - end - end, - case eleveldb:fold(T, FoldFun, Left, [{verify_checksums, true}]) of - false -> false; - Right -> true; - LastTrimmed when LastTrimmed < Right -> false; - _ -> %% LastTrimmed > Pos0, which is a irregular case but ok - true +-spec trim(table(), machi_dt:file_offset(), machi_dt:file_size()) -> + ok | {error, file:posix()}. +trim(#machi_csum_table{fd=Fd, table=T}, Offset, Size) -> + Binary = encode_csum_file_entry_bin(Offset, Size, trimmed), + case file:write(Fd, Binary) of + ok -> + true = ets:insert(T, {Offset, Size, trimmed}), + ok; + Error -> + Error end. -%% @doc returns whether all bytes 0-Pos0 is continously trimmed or -%% not, including header. +-spec all_trimmed(table(), non_neg_integer(), non_neg_integer()) -> boolean(). +all_trimmed(#machi_csum_table{table=T}, Left, Right) -> + runthru(ets:tab2list(T), Left, Right). + -spec all_trimmed(table(), non_neg_integer()) -> boolean(). -all_trimmed(CsumT, Pos0) -> - all_trimmed(CsumT, 0, Pos0). +all_trimmed(#machi_csum_table{table=T}, Pos) -> + case ets:tab2list(T) of + [{0, ?MINIMUM_OFFSET, _}|L] -> + %% tl/1 to remove header space {0, 1024, <<0>>} + runthru(L, ?MINIMUM_OFFSET, Pos); + List -> + %% In case a header is removed; + runthru(List, 0, Pos) + end. -spec any_trimmed(table(), pos_integer(), @@ -196,9 +192,13 @@ any_trimmed(CsumT, Offset, Size) -> Chunks = find(CsumT, Offset, Size), lists:any(fun({_, _, State}) -> State =:= trimmed end, Chunks). +-spec sync(table()) -> ok | {error, file:posix()}. +sync(#machi_csum_table{fd=Fd}) -> + file:sync(Fd). + -spec calc_unwritten_bytes(table()) -> [byte_sequence()]. -calc_unwritten_bytes(#machi_csum_table{table=_} = CsumT) -> - case lists:sort(all(CsumT)) of +calc_unwritten_bytes(#machi_csum_table{table=T}) -> + case lists:sort(ets:tab2list(T)) of [] -> [{?MINIMUM_OFFSET, infinity}]; Sorted -> @@ -206,34 +206,101 @@ calc_unwritten_bytes(#machi_csum_table{table=_} = CsumT) -> build_unwritten_bytes_list(Sorted, LastOffset, []) end. -all(CsumT) -> - FoldFun = fun(E, Acc) -> [E|Acc] end, - lists:reverse(foldl_chunks(FoldFun, [], CsumT)). - -spec close(table()) -> ok. -close(#machi_csum_table{table=T}) -> - ok = eleveldb:close(T). +close(#machi_csum_table{table=T, fd=Fd}) -> + true = ets:delete(T), + ok = file:close(Fd). -spec delete(table()) -> ok. -delete(#machi_csum_table{table=T, file=F}) -> - catch eleveldb:close(T), - %% TODO change this to directory walk - case os:cmd("rm -rf " ++ F) of - "" -> ok; +delete(#machi_csum_table{file=F} = C) -> + catch close(C), + case file:delete(F) of + ok -> ok; + {error, enoent} -> ok; E -> E end. --spec foldl_chunks(fun((chunk(), Acc0 :: term()) -> Acc :: term()), +-spec foldl_chunks(fun(({non_neg_integer(), non_neg_integer(), term()}, + Acc0 :: term()) + -> Acc :: term()), Acc0 :: term(), table()) -> Acc :: term(). foldl_chunks(Fun, Acc0, #machi_csum_table{table=T}) -> - FoldFun = fun({K, V}, Acc) -> - {Offset, Len} = sext:decode(K), - Fun({Offset, Len, sext:decode(V)}, Acc); - (_K, Acc) -> - _ = lager:error("~p: wrong option?", [_K]), - Acc - end, - eleveldb:fold(T, FoldFun, Acc0, [{verify_checksums, true}]). + ets:foldl(Fun, Acc0, T). + +%% @doc Encode `Offset + Size + TaggedCSum' into an `iolist()' type for +%% internal storage by the FLU. + +-spec encode_csum_file_entry( + machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()) -> + iolist(). +encode_csum_file_entry(Offset, Size, TaggedCSum) -> + Len = 8 + 4 + byte_size(TaggedCSum), + [<<$w, Len:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big>>, + TaggedCSum]. + +%% @doc Encode `Offset + Size + TaggedCSum' into an `binary()' type for +%% internal storage by the FLU. + +-spec encode_csum_file_entry_bin( + machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()) -> + binary(). +encode_csum_file_entry_bin(Offset, Size, trimmed) -> + <<$t, Offset:64/unsigned-big, Size:32/unsigned-big>>; +encode_csum_file_entry_bin(Offset, Size, TaggedCSum) -> + Len = 8 + 4 + byte_size(TaggedCSum), + <<$w, Len:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big, + TaggedCSum/binary>>. + +%% @doc Decode a single `binary()' blob into an +%% `{Offset,Size,TaggedCSum}' tuple. +%% +%% The internal encoding (which is currently exposed to the outside world +%% via this function and related ones) is: +%% +%% +%% +%% See `machi.hrl' for the tagged checksum types, e.g., +%% `?CSUM_TAG_NONE'. + +-spec decode_csum_file_entry(binary()) -> + error | + {machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()}. +decode_csum_file_entry(<<_:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big, TaggedCSum/binary>>) -> + {Offset, Size, TaggedCSum}; +decode_csum_file_entry(_Else) -> + error. + +%% @doc Split a `binary()' blob of `checksum_list' data into a list of +%% `{Offset,Size,TaggedCSum}' tuples. + +-spec split_checksum_list_blob_decode(binary()) -> + {list({machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()}), + TrailingJunk::binary()}. +split_checksum_list_blob_decode(Bin) -> + split_checksum_list_blob_decode(Bin, []). + +split_checksum_list_blob_decode(<<$w, Len:8/unsigned-big, Part:Len/binary, Rest/binary>>, Acc)-> + One = <>, + case decode_csum_file_entry(One) of + error -> + split_checksum_list_blob_decode(Rest, Acc); + DecOne -> + split_checksum_list_blob_decode(Rest, [DecOne|Acc]) + end; +split_checksum_list_blob_decode(<<$t, Offset:64/unsigned-big, Size:32/unsigned-big, Rest/binary>>, Acc) -> + %% trimmed offset + split_checksum_list_blob_decode(Rest, [{Offset, Size, trimmed}|Acc]); +split_checksum_list_blob_decode(Rest, Acc) -> + {lists:reverse(Acc), Rest}. -spec build_unwritten_bytes_list( CsumData :: [{ Offset :: non_neg_integer(), Size :: pos_integer(), @@ -255,46 +322,21 @@ build_unwritten_bytes_list([{CurrentOffset, CurrentSize, _Csum}|Rest], LastOffse build_unwritten_bytes_list([{CO, CS, _Ck}|Rest], _LastOffset, Acc) -> build_unwritten_bytes_list(Rest, CO + CS, Acc). +%% @doc make sure all trimmed chunks are continously chained +%% TODO: test with EQC +runthru([], Pos, Pos) -> true; +runthru([], Pos0, Pos) when Pos0 < Pos -> false; +runthru([{Offset0, Size0, trimmed}|T], Offset, Pos) when Offset0 =< Offset -> + runthru(T, Offset0+Size0, Pos); +runthru(_L, _O, _P) -> + false. + %% @doc If you want to find an overlap among two areas [x, y] and [a, -%% b] where x < y and a < b; if (a-y)*(b-x) < 0 then there's a +%% b] where x < y and a < b; if (a-y)*(b-x) < 0 then there's a %% overlap, else, > 0 then there're no overlap. border condition = 0 %% is not overlap in this offset-size case. -%% inclusion_match_spec(Offset, Size) -> -%% {'>', 0, -%% {'*', -%% {'-', Offset + Size, '$1'}, -%% {'-', Offset, {'+', '$1', '$2'}}}}. - --spec eleveldb_fold(eleveldb:db_ref(), binary(), binary(), - fun(({binary(), binary()}, AccType::term()) -> AccType::term()), - AccType0::term()) -> - AccType::term(). -eleveldb_fold(Ref, Start, End, FoldFun, InitAcc) -> - {ok, Iterator} = eleveldb:iterator(Ref, []), - try - eleveldb_do_fold(eleveldb:iterator_move(Iterator, Start), - Iterator, End, FoldFun, InitAcc) - catch throw:IteratorClosed -> - {error, IteratorClosed} - after - eleveldb:iterator_close(Iterator) - end. - --spec eleveldb_do_fold({ok, binary(), binary()}|{error, iterator_closed|invalid_iterator}|{ok,binary()}, - eleveldb:itr_ref(), binary(), - fun(({binary(), binary()}, AccType::term()) -> AccType::term()), - AccType::term()) -> - AccType::term(). -eleveldb_do_fold({ok, Key, Value}, _, End, FoldFun, Acc) - when End < Key -> - FoldFun({Key, Value}, Acc); -eleveldb_do_fold({ok, Key, Value}, Iterator, End, FoldFun, Acc) -> - eleveldb_do_fold(eleveldb:iterator_move(Iterator, next), - Iterator, End, FoldFun, - FoldFun({Key, Value}, Acc)); -eleveldb_do_fold({error, iterator_closed}, _, _, _, Acc) -> - %% It's really an error which is not expected - throw({iterator_closed, Acc}); -eleveldb_do_fold({error, invalid_iterator}, _, _, _, Acc) -> - %% Probably reached to end - Acc. +inclusion_match_spec(Offset, Size) -> + {'>', 0, + {'*', + {'-', Offset + Size, '$1'}, + {'-', Offset, {'+', '$1', '$2'}}}}. diff --git a/src/machi_dt.erl b/src/machi_dt.erl index 410a982..9bcc540 100644 --- a/src/machi_dt.erl +++ b/src/machi_dt.erl @@ -29,9 +29,6 @@ -type chunk_s() :: 'trimmed' | binary(). -type chunk_pos() :: {file_offset(), chunk_size(), file_name_s()}. -type chunk_size() :: non_neg_integer(). --type coc_namespace() :: string(). --type coc_nl() :: {coc, coc_namespace(), coc_locator()}. --type coc_locator() :: non_neg_integer(). -type error_general() :: 'bad_arg' | 'wedged' | 'bad_checksum'. -type epoch_csum() :: binary(). -type epoch_num() :: -1 | non_neg_integer(). @@ -47,7 +44,7 @@ -type projection() :: #projection_v1{}. -type projection_type() :: 'public' | 'private'. -%% @doc Tags that stand for how that checksum was generated. See +%% Tags that stand for how that checksum was generated. See %% machi_util:make_tagged_csum/{1,2} for further documentation and %% implementation. -type csum_tag() :: none | client_sha | server_sha | server_regen_sha. @@ -61,9 +58,6 @@ chunk_s/0, chunk_pos/0, chunk_size/0, - coc_namespace/0, - coc_nl/0, - coc_locator/0, error_general/0, epoch_csum/0, epoch_num/0, diff --git a/src/machi_file_proxy.erl b/src/machi_file_proxy.erl index cae292c..ff6748f 100644 --- a/src/machi_file_proxy.erl +++ b/src/machi_file_proxy.erl @@ -57,8 +57,7 @@ write/4, trim/4, append/2, - append/4, - checksum_list/1 + append/4 ]). %% gen_server callbacks @@ -211,10 +210,6 @@ append(_Pid, ClientMeta, Extra, _Data) -> lager:warning("Bad arg to append: ClientMeta ~p, Extra ~p", [ClientMeta, Extra]), {error, bad_arg}. --spec checksum_list(pid()) -> {ok, list()}. -checksum_list(Pid) -> - gen_server:call(Pid, {checksum_list}, ?TIMEOUT). - %% gen_server callbacks % @private @@ -254,22 +249,27 @@ handle_call({sync, data}, _From, State = #state{ data_filehandle = FHd }) -> R = file:sync(FHd), {reply, R, State}; -handle_call({sync, csum}, _From, State) -> - %% machi_csum_table always writes in {sync, true} option, so here - %% explicit sync isn't actually needed. - {reply, ok, State}; +handle_call({sync, csum}, _From, State = #state{ csum_table = T }) -> + R = machi_csum_table:sync(T), + {reply, R, State}; handle_call({sync, all}, _From, State = #state{filename = F, data_filehandle = FHd, - csum_table = _T + csum_table = T }) -> - Resp = case file:sync(FHd) of - ok -> - ok; - Error -> - lager:error("Got ~p syncing all files for file ~p", - [Error, F]), - Error + R = machi_csum_table:sync(T), + R1 = file:sync(FHd), + Resp = case {R, R1} of + {ok, ok} -> ok; + {ok, O1} -> + lager:error("Got ~p during a data file sync on file ~p", [O1, F]), + O1; + {O2, ok} -> + lager:error("Got ~p during a csum file sync on file ~p", [O2, F]), + O2; + {O3, O4} -> + lager:error("Got ~p ~p syncing all files for file ~p", [O3, O4, F]), + {O3, O4} end, {reply, Resp, State}; @@ -435,10 +435,6 @@ handle_call({append, ClientMeta, Extra, Data}, _From, {reply, Resp, State#state{appends = {T+1, NewErr}, eof_position = NewEof}}; -handle_call({checksum_list}, _FRom, State = #state{csum_table=T}) -> - All = machi_csum_table:all(T), - {reply, {ok, All}, State}; - handle_call(Req, _From, State) -> lager:warning("Unknown call: ~p", [Req]), {reply, whoaaaaaaaaaaaa, State}. @@ -549,6 +545,7 @@ terminate(Reason, #state{filename = F, undefined -> noop; %% file deleted _ -> + ok = machi_csum_table:sync(T), ok = machi_csum_table:close(T) end, ok. @@ -600,8 +597,7 @@ check_or_make_tagged_csum(OtherTag, _ClientCsum, _Data) -> Size :: non_neg_integer(), NoChunk :: boolean(), NoChecksum :: boolean() - ) -> {ok, {Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum :: binary()}], - Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]}} | + ) -> {ok, Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum :: binary()}]} | {error, bad_checksum} | {error, partial_read} | {error, file:posix()} | @@ -625,14 +621,6 @@ do_read(FHd, Filename, CsumTable, Offset, Size, _, _) -> ChunkCsums = machi_csum_table:find(CsumTable, Offset, Size), read_all_ranges(FHd, Filename, ChunkCsums, [], []). --spec read_all_ranges(file:io_device(), string(), - [{non_neg_integer(),non_neg_integer(),trimmed|binary()}], - Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum::binary()}], - Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]) -> - {ok, { - Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum::binary()}], - Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]}} | - {erorr, term()|partial_read}. read_all_ranges(_, _, [], ReadChunks, TrimmedChunks) -> %% TODO: currently returns empty list of trimmed chunks {ok, {lists:reverse(ReadChunks), lists:reverse(TrimmedChunks)}}; @@ -644,11 +632,6 @@ read_all_ranges(FHd, Filename, [{Offset, Size, TaggedCsum}|T], ReadChunks, Trimm case file:pread(FHd, Offset, Size) of eof -> read_all_ranges(FHd, Filename, T, ReadChunks, TrimmedChunks); - {ok, Bytes} when byte_size(Bytes) == Size, TaggedCsum =:= none -> - read_all_ranges(FHd, Filename, T, - [{Filename, Offset, Bytes, - machi_util:make_tagged_csum(none, <<>>)}|ReadChunks], - TrimmedChunks); {ok, Bytes} when byte_size(Bytes) == Size -> {Tag, Ck} = machi_util:unmake_tagged_csum(TaggedCsum), case check_or_make_tagged_csum(Tag, Ck, Bytes) of @@ -844,7 +827,7 @@ maybe_gc(Reply, S = #state{fluname=FluName, filename = Filename, eof_position = Eof, csum_table=CsumTable}) -> - case machi_csum_table:all_trimmed(CsumTable, ?MINIMUM_OFFSET, Eof) of + case machi_csum_table:all_trimmed(CsumTable, Eof) of true -> lager:debug("GC? Let's do it: ~p.~n", [Filename]), %% Before unlinking a file, it should inform diff --git a/src/machi_fitness.erl b/src/machi_fitness.erl index bf16198..2b54244 100644 --- a/src/machi_fitness.erl +++ b/src/machi_fitness.erl @@ -39,7 +39,8 @@ get_unfit_list/1, update_local_down_list/3, add_admin_down/3, delete_admin_down/2, send_fitness_update_spam/3, - send_spam_to_everyone/1]). + send_spam_to_everyone/1, + trigger_early_adjustment/2]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -81,6 +82,13 @@ send_fitness_update_spam(Pid, FromName, Dict) -> send_spam_to_everyone(Pid) -> gen_server:call(Pid, {send_spam_to_everyone}, infinity). +%% @doc For testing purposes, we don't want a test to wait for +%% wall-clock time to elapse before the fitness server makes a +%% down->up status decision. + +trigger_early_adjustment(Pid, FLU) -> + Pid ! {adjust_down_list, FLU}. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% init([{MyFluName}|Args]) -> diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl index e620308..042eaed 100644 --- a/src/machi_flu1.erl +++ b/src/machi_flu1.erl @@ -58,9 +58,9 @@ -export([start_link/1, stop/1, update_wedge_state/3, wedge_myself/2]). --export([make_projection_server_regname/1]). +-export([make_listener_regname/1, make_projection_server_regname/1]). %% TODO: remove or replace in OTP way after gen_*'ified --export([main2/4, run_append_server/2, +-export([main2/4, run_append_server/2, run_listen_server/1, current_state/1, format_state/1]). -record(state, { @@ -68,9 +68,14 @@ proj_store :: pid(), witness = false :: boolean(), append_pid :: pid(), + tcp_port :: non_neg_integer(), + data_dir :: string(), wedged = true :: boolean(), etstab :: ets:tid(), epoch_id :: 'undefined' | machi_dt:epoch_id(), + pb_mode = undefined :: 'undefined' | 'high' | 'low', + high_clnt :: 'undefined' | pid(), + trim_table :: ets:tid(), props = [] :: list() % proplist }). @@ -82,12 +87,7 @@ start_link([{FluName, TcpPort, DataDir}|Rest]) proc_lib:start_link(?MODULE, main2, [FluName, TcpPort, DataDir, Rest], ?INIT_TIMEOUT). -stop(RegName) when is_atom(RegName) -> - case whereis(RegName) of - undefined -> ok; - Pid -> stop(Pid) - end; -stop(Pid) when is_pid(Pid) -> +stop(Pid) -> case erlang:is_process_alive(Pid) of true -> Pid ! killme, @@ -152,6 +152,8 @@ main2(FluName, TcpPort, DataDir, Props) -> S0 = #state{flu_name=FluName, proj_store=ProjectionPid, + tcp_port=TcpPort, + data_dir=DataDir, wedged=Wedged_p, witness=Witness_p, etstab=ets_table_name(FluName), @@ -165,8 +167,7 @@ main2(FluName, TcpPort, DataDir, Props) -> ok end, S1 = S0#state{append_pid=AppendPid}, - {ok, ListenerPid} = start_listen_server(TcpPort, DataDir, S1), - %% io:format(user, "Listener started: ~w~n", [{FluName, ListenerPid}]), + {ok, ListenPid} = start_listen_server(S1), Config_e = machi_util:make_config_filename(DataDir, "unused"), ok = filelib:ensure_dir(Config_e), @@ -178,23 +179,36 @@ main2(FluName, TcpPort, DataDir, Props) -> put(flu_flu_name, FluName), put(flu_append_pid, S1#state.append_pid), put(flu_projection_pid, ProjectionPid), - put(flu_listen_pid, ListenerPid), + put(flu_listen_pid, ListenPid), proc_lib:init_ack({ok, self()}), receive killme -> ok end, (catch exit(S1#state.append_pid, kill)), (catch exit(ProjectionPid, kill)), - (catch exit(ListenerPid, kill)), + (catch exit(ListenPid, kill)), ok. +start_listen_server(S) -> + proc_lib:start_link(?MODULE, run_listen_server, [S], ?INIT_TIMEOUT). + start_append_server(S, AckPid) -> proc_lib:start_link(?MODULE, run_append_server, [AckPid, S], ?INIT_TIMEOUT). -start_listen_server(TcpPort, DataDir, - #state{flu_name=FluName, witness=Witness, etstab=EtsTab, - proj_store=ProjStore}=_S) -> - machi_listener_sup:start_listener(FluName, TcpPort, Witness, DataDir, - EtsTab, ProjStore). +run_listen_server(#state{flu_name=FluName, tcp_port=TcpPort}=S) -> + register(make_listener_regname(FluName), self()), + SockOpts = ?PB_PACKET_OPTS ++ + [{reuseaddr, true}, {mode, binary}, {active, false}, + {backlog,8192}], + case gen_tcp:listen(TcpPort, SockOpts) of + {ok, LSock} -> + proc_lib:init_ack({ok, self()}), + listen_server_loop(LSock, S); + Else -> + error_logger:warning_msg("~s:run_listen_server: " + "listen to TCP port ~w: ~w\n", + [?MODULE, TcpPort, Else]), + exit({?MODULE, run_listen_server, tcp_port, TcpPort, Else}) + end. run_append_server(FluPid, #state{flu_name=Name, wedged=Wedged_p,epoch_id=EpochId}=S) -> @@ -206,31 +220,33 @@ run_append_server(FluPid, #state{flu_name=Name, proc_lib:init_ack({ok, self()}), append_server_loop(FluPid, S#state{etstab=TID}). +listen_server_loop(LSock, S) -> + {ok, Sock} = gen_tcp:accept(LSock), + spawn_link(fun() -> net_server_loop(Sock, S) end), + listen_server_loop(LSock, S). + append_server_loop(FluPid, #state{wedged=Wedged_p, witness=Witness_p, epoch_id=OldEpochId, flu_name=FluName}=S) -> receive - {seq_append, From, _N, _L, _Prefix, _Chunk, _CSum, _Extra, _EpochID} + {seq_append, From, _Prefix, _Chunk, _CSum, _Extra, _EpochID} when Witness_p -> - %% The FLU's machi_flu1_net_server process ought to filter all + %% The FLU's net_server_loop() process ought to filter all %% witness states, but we'll keep this clause for extra %% paranoia. From ! witness, append_server_loop(FluPid, S); - {seq_append, From, _N, _L, _Prefix, _Chunk, _CSum, _Extra, _EpochID} + {seq_append, From, _Prefix, _Chunk, _CSum, _Extra, _EpochID} when Wedged_p -> From ! wedged, append_server_loop(FluPid, S); - {seq_append, From, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum, Extra, EpochID} -> + {seq_append, From, Prefix, Chunk, CSum, Extra, EpochID} -> %% Old is the one from our state, plain old 'EpochID' comes %% from the client. _ = case OldEpochId == EpochID of true -> spawn(fun() -> - append_server_dispatch(From, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum, Extra, - FluName, EpochID) + append_server_dispatch(From, Prefix, Chunk, CSum, Extra, FluName, EpochID) end); false -> From ! {error, bad_epoch} @@ -273,10 +289,396 @@ append_server_loop(FluPid, #state{wedged=Wedged_p, append_server_loop(FluPid, S) end. -append_server_dispatch(From, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum, Extra, FluName, EpochId) -> - Result = case handle_append(CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum, Extra, FluName, EpochId) of +net_server_loop(Sock, S) -> + case gen_tcp:recv(Sock, 0, ?SERVER_CMD_READ_TIMEOUT) of + {ok, Bin} -> + {RespBin, S2} = + case machi_pb:decode_mpb_ll_request(Bin) of + LL_req when LL_req#mpb_ll_request.do_not_alter == 2 -> + {R, NewS} = do_pb_ll_request(LL_req, S), + {maybe_encode_response(R), mode(low, NewS)}; + _ -> + HL_req = machi_pb:decode_mpb_request(Bin), + 1 = HL_req#mpb_request.do_not_alter, + {R, NewS} = do_pb_hl_request(HL_req, make_high_clnt(S)), + {machi_pb:encode_mpb_response(R), mode(high, NewS)} + end, + if RespBin == async_no_response -> + net_server_loop(Sock, S2); + true -> + case gen_tcp:send(Sock, RespBin) of + ok -> + net_server_loop(Sock, S2); + {error, _} -> + (catch gen_tcp:close(Sock)), + exit(normal) + end + end; + {error, SockError} -> + Msg = io_lib:format("Socket error ~w", [SockError]), + R = #mpb_ll_response{req_id= <<>>, + generic=#mpb_errorresp{code=1, msg=Msg}}, + _Resp = machi_pb:encode_mpb_ll_response(R), + %% TODO: Weird that sometimes neither catch nor try/catch + %% can prevent OTP's SASL from logging an error here. + %% Error in process <0.545.0> with exit value: {badarg,[{erlang,port_command,....... + %% TODO: is this what causes the intermittent PULSE deadlock errors? + %% _ = (catch gen_tcp:send(Sock, _Resp)), timer:sleep(1000), + (catch gen_tcp:close(Sock)), + exit(normal) + end. + +maybe_encode_response(async_no_response=X) -> + X; +maybe_encode_response(R) -> + machi_pb:encode_mpb_ll_response(R). + +mode(Mode, #state{pb_mode=undefined}=S) -> + S#state{pb_mode=Mode}; +mode(_, S) -> + S. + +make_high_clnt(#state{high_clnt=undefined}=S) -> + {ok, Proj} = machi_projection_store:read_latest_projection( + S#state.proj_store, private), + Ps = [P_srvr || {_, P_srvr} <- orddict:to_list( + Proj#projection_v1.members_dict)], + {ok, Clnt} = machi_cr_client:start_link(Ps), + S#state{high_clnt=Clnt}; +make_high_clnt(S) -> + S. + +do_pb_ll_request(#mpb_ll_request{req_id=ReqID}, #state{pb_mode=high}=S) -> + Result = {high_error, 41, "Low protocol request while in high mode"}, + {machi_pb_translate:to_pb_response(ReqID, unused, Result), S}; +do_pb_ll_request(PB_request, S) -> + Req = machi_pb_translate:from_pb_request(PB_request), + {ReqID, Cmd, Result, S2} = + case Req of + {RqID, {LowCmd, _}=CMD} + when LowCmd == low_proj; + LowCmd == low_wedge_status; LowCmd == low_list_files -> + %% Skip wedge check for projection commands! + %% Skip wedge check for these unprivileged commands + {Rs, NewS} = do_pb_ll_request3(CMD, S), + {RqID, CMD, Rs, NewS}; + {RqID, CMD} -> + EpochID = element(2, CMD), % by common convention + {Rs, NewS} = do_pb_ll_request2(EpochID, CMD, S), + {RqID, CMD, Rs, NewS} + end, + {machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}. + +do_pb_ll_request2(EpochID, CMD, S) -> + {Wedged_p, CurrentEpochID} = ets:lookup_element(S#state.etstab, epoch, 2), + if Wedged_p == true -> + {{error, wedged}, S#state{epoch_id=CurrentEpochID}}; + is_tuple(EpochID) + andalso + EpochID /= CurrentEpochID -> + {Epoch, _} = EpochID, + {CurrentEpoch, _} = CurrentEpochID, + if Epoch < CurrentEpoch -> + ok; + true -> + %% We're at same epoch # but different checksum, or + %% we're at a newer/bigger epoch #. + _ = wedge_myself(S#state.flu_name, CurrentEpochID), + ok + end, + {{error, bad_epoch}, S#state{epoch_id=CurrentEpochID}}; + true -> + do_pb_ll_request3(CMD, S#state{epoch_id=CurrentEpochID}) + end. + +%% Witness status does not matter below. +do_pb_ll_request3({low_echo, _BogusEpochID, Msg}, S) -> + {Msg, S}; +do_pb_ll_request3({low_auth, _BogusEpochID, _User, _Pass}, S) -> + {-6, S}; +do_pb_ll_request3({low_wedge_status, _EpochID}, S) -> + {do_server_wedge_status(S), S}; +do_pb_ll_request3({low_proj, PCMD}, S) -> + {do_server_proj_request(PCMD, S), S}; +%% Witness status *matters* below +do_pb_ll_request3({low_append_chunk, _EpochID, PKey, Prefix, Chunk, CSum_tag, + CSum, ChunkExtra}, + #state{witness=false}=S) -> + {do_server_append_chunk(PKey, Prefix, Chunk, CSum_tag, CSum, + ChunkExtra, S), S}; +do_pb_ll_request3({low_write_chunk, _EpochID, File, Offset, Chunk, CSum_tag, + CSum}, + #state{witness=false}=S) -> + {do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, S), S}; +do_pb_ll_request3({low_read_chunk, _EpochID, File, Offset, Size, Opts}, + #state{witness=false} = S) -> + {do_server_read_chunk(File, Offset, Size, Opts, S), S}; +do_pb_ll_request3({low_trim_chunk, _EpochID, File, Offset, Size, TriggerGC}, + #state{witness=false}=S) -> + {do_server_trim_chunk(File, Offset, Size, TriggerGC, S), S}; +do_pb_ll_request3({low_checksum_list, _EpochID, File}, + #state{witness=false}=S) -> + {do_server_checksum_listing(File, S), S}; +do_pb_ll_request3({low_list_files, _EpochID}, + #state{witness=false}=S) -> + {do_server_list_files(S), S}; +do_pb_ll_request3({low_delete_migration, _EpochID, File}, + #state{witness=false}=S) -> + {do_server_delete_migration(File, S), + #state{witness=false}=S}; +do_pb_ll_request3({low_trunc_hack, _EpochID, File}, + #state{witness=false}=S) -> + {do_server_trunc_hack(File, S), S}; +do_pb_ll_request3(_, #state{witness=true}=S) -> + {{error, bad_arg}, S}. % TODO: new status code?? + +do_pb_hl_request(#mpb_request{req_id=ReqID}, #state{pb_mode=low}=S) -> + Result = {low_error, 41, "High protocol request while in low mode"}, + {machi_pb_translate:to_pb_response(ReqID, unused, Result), S}; +do_pb_hl_request(PB_request, S) -> + {ReqID, Cmd} = machi_pb_translate:from_pb_request(PB_request), + {Result, S2} = do_pb_hl_request2(Cmd, S), + {machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}. + +do_pb_hl_request2({high_echo, Msg}, S) -> + {Msg, S}; +do_pb_hl_request2({high_auth, _User, _Pass}, S) -> + {-77, S}; +do_pb_hl_request2({high_append_chunk, _todoPK, Prefix, ChunkBin, TaggedCSum, + ChunkExtra}, #state{high_clnt=Clnt}=S) -> + Chunk = {TaggedCSum, ChunkBin}, + Res = machi_cr_client:append_chunk_extra(Clnt, Prefix, Chunk, + ChunkExtra), + {Res, S}; +do_pb_hl_request2({high_write_chunk, File, Offset, ChunkBin, TaggedCSum}, + #state{high_clnt=Clnt}=S) -> + Chunk = {TaggedCSum, ChunkBin}, + Res = machi_cr_client:write_chunk(Clnt, File, Offset, Chunk), + {Res, S}; +do_pb_hl_request2({high_read_chunk, File, Offset, Size, Opts}, + #state{high_clnt=Clnt}=S) -> + Res = machi_cr_client:read_chunk(Clnt, File, Offset, Size, Opts), + {Res, S}; +do_pb_hl_request2({high_trim_chunk, File, Offset, Size}, + #state{high_clnt=Clnt}=S) -> + Res = machi_cr_client:trim_chunk(Clnt, File, Offset, Size), + {Res, S}; +do_pb_hl_request2({high_checksum_list, File}, #state{high_clnt=Clnt}=S) -> + Res = machi_cr_client:checksum_list(Clnt, File), + {Res, S}; +do_pb_hl_request2({high_list_files}, #state{high_clnt=Clnt}=S) -> + Res = machi_cr_client:list_files(Clnt), + {Res, S}. + +do_server_proj_request({get_latest_epochid, ProjType}, + #state{proj_store=ProjStore}) -> + machi_projection_store:get_latest_epochid(ProjStore, ProjType); +do_server_proj_request({read_latest_projection, ProjType}, + #state{proj_store=ProjStore}) -> + machi_projection_store:read_latest_projection(ProjStore, ProjType); +do_server_proj_request({read_projection, ProjType, Epoch}, + #state{proj_store=ProjStore}) -> + machi_projection_store:read(ProjStore, ProjType, Epoch); +do_server_proj_request({write_projection, ProjType, Proj}, + #state{flu_name=FluName, proj_store=ProjStore}) -> + if Proj#projection_v1.epoch_number == ?SPAM_PROJ_EPOCH -> + %% io:format(user, "DBG ~s ~w ~P\n", [?MODULE, ?LINE, Proj, 5]), + Chmgr = machi_flu_psup:make_fitness_regname(FluName), + [Map] = Proj#projection_v1.dbg, + catch machi_fitness:send_fitness_update_spam( + Chmgr, Proj#projection_v1.author_server, Map); + true -> + catch machi_projection_store:write(ProjStore, ProjType, Proj) + end; +do_server_proj_request({get_all_projections, ProjType}, + #state{proj_store=ProjStore}) -> + machi_projection_store:get_all_projections(ProjStore, ProjType); +do_server_proj_request({list_all_projections, ProjType}, + #state{proj_store=ProjStore}) -> + machi_projection_store:list_all_projections(ProjStore, ProjType); +do_server_proj_request({kick_projection_reaction}, + #state{flu_name=FluName}) -> + %% Tell my chain manager that it might want to react to + %% this new world. + Chmgr = machi_chain_manager1:make_chmgr_regname(FluName), + spawn(fun() -> + catch machi_chain_manager1:trigger_react_to_env(Chmgr) + end), + async_no_response. + +do_server_append_chunk(PKey, Prefix, Chunk, CSum_tag, CSum, + ChunkExtra, S) -> + case sanitize_prefix(Prefix) of + ok -> + do_server_append_chunk2(PKey, Prefix, Chunk, CSum_tag, CSum, + ChunkExtra, S); + _ -> + {error, bad_arg} + end. + +do_server_append_chunk2(_PKey, Prefix, Chunk, CSum_tag, Client_CSum, + ChunkExtra, #state{flu_name=FluName, + epoch_id=EpochID}=_S) -> + %% TODO: Do anything with PKey? + try + TaggedCSum = check_or_make_tagged_checksum(CSum_tag, Client_CSum,Chunk), + R = {seq_append, self(), Prefix, Chunk, TaggedCSum, ChunkExtra, EpochID}, + FluName ! R, + receive + {assignment, Offset, File} -> + Size = iolist_size(Chunk), + {ok, {Offset, Size, File}}; + witness -> + {error, bad_arg}; + wedged -> + {error, wedged} + after 10*1000 -> + {error, partition} + end + catch + throw:{bad_csum, _CS} -> + {error, bad_checksum}; + error:badarg -> + error_logger:error_msg("Message send to ~p gave badarg, make certain server is running with correct registered name\n", [?MODULE]), + {error, bad_arg} + end. + +do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, #state{flu_name=FluName}) -> + case sanitize_file_string(File) of + ok -> + case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of + {ok, Pid} -> + Meta = [{client_csum_tag, CSum_tag}, {client_csum, CSum}], + machi_file_proxy:write(Pid, Offset, Meta, Chunk); + {error, trimmed} = Error -> + Error + end; + _ -> + {error, bad_arg} + end. + +do_server_read_chunk(File, Offset, Size, Opts, #state{flu_name=FluName})-> + case sanitize_file_string(File) of + ok -> + case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of + {ok, Pid} -> + case machi_file_proxy:read(Pid, Offset, Size, Opts) of + %% XXX FIXME + %% For now we are omiting the checksum data because it blows up + %% protobufs. + {ok, ChunksAndTrimmed} -> {ok, ChunksAndTrimmed}; + Other -> Other + end; + {error, trimmed} = Error -> + Error + end; + _ -> + {error, bad_arg} + end. + +do_server_trim_chunk(File, Offset, Size, TriggerGC, #state{flu_name=FluName}) -> + lager:debug("Hi there! I'm trimming this: ~s, (~p, ~p), ~p~n", + [File, Offset, Size, TriggerGC]), + case sanitize_file_string(File) of + ok -> + case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of + {ok, Pid} -> + machi_file_proxy:trim(Pid, Offset, Size, TriggerGC); + {error, trimmed} = Trimmed -> + %% Should be returned back to (maybe) trigger repair + Trimmed + end; + _ -> + {error, bad_arg} + end. + +do_server_checksum_listing(File, #state{flu_name=FluName, data_dir=DataDir}=_S) -> + case sanitize_file_string(File) of + ok -> + ok = sync_checksum_file(FluName, File), + CSumPath = machi_util:make_checksum_filename(DataDir, File), + %% TODO: If this file is legitimately bigger than our + %% {packet_size,N} limit, then we'll have a difficult time, eh? + case file:read_file(CSumPath) of + {ok, Bin} -> + if byte_size(Bin) > (?PB_MAX_MSG_SIZE - 1024) -> + %% TODO: Fix this limitation by streaming the + %% binary in multiple smaller PB messages. + %% Also, don't read the file all at once. ^_^ + error_logger:error_msg("~s:~w oversize ~s\n", + [?MODULE, ?LINE, CSumPath]), + {error, bad_arg}; + true -> + {ok, Bin} + end; + {error, enoent} -> + {error, no_such_file}; + {error, _} -> + {error, bad_arg} + end; + _ -> + {error, bad_arg} + end. + +do_server_list_files(#state{data_dir=DataDir}=_S) -> + {_, WildPath} = machi_util:make_data_filename(DataDir, ""), + Files = filelib:wildcard("*", WildPath), + {ok, [begin + {ok, FI} = file:read_file_info(WildPath ++ "/" ++ File), + Size = FI#file_info.size, + {Size, File} + end || File <- Files]}. + +do_server_wedge_status(S) -> + {Wedged_p, CurrentEpochID0} = ets:lookup_element(S#state.etstab, epoch, 2), + CurrentEpochID = if CurrentEpochID0 == undefined -> + ?DUMMY_PV1_EPOCH; + true -> + CurrentEpochID0 + end, + {Wedged_p, CurrentEpochID}. + +do_server_delete_migration(File, #state{data_dir=DataDir}=_S) -> + case sanitize_file_string(File) of + ok -> + {_, Path} = machi_util:make_data_filename(DataDir, File), + case file:delete(Path) of + ok -> + ok; + {error, enoent} -> + {error, no_such_file}; + _ -> + {error, bad_arg} + end; + _ -> + {error, bad_arg} + end. + +do_server_trunc_hack(File, #state{data_dir=DataDir}=_S) -> + case sanitize_file_string(File) of + ok -> + {_, Path} = machi_util:make_data_filename(DataDir, File), + case file:open(Path, [read, write, binary, raw]) of + {ok, FH} -> + try + {ok, ?MINIMUM_OFFSET} = file:position(FH, + ?MINIMUM_OFFSET), + ok = file:truncate(FH), + ok + after + file:close(FH) + end; + {error, enoent} -> + {error, no_such_file}; + _ -> + {error, bad_arg} + end; + _ -> + {error, bad_arg} + end. + +append_server_dispatch(From, Prefix, Chunk, CSum, Extra, FluName, EpochId) -> + Result = case handle_append(Prefix, Chunk, CSum, Extra, FluName, EpochId) of {ok, File, Offset} -> {assignment, Offset, File}; Other -> @@ -285,13 +687,10 @@ append_server_dispatch(From, CoC_Namespace, CoC_Locator, From ! Result, exit(normal). -handle_append(_N, _L, _Prefix, <<>>, _Csum, _Extra, _FluName, _EpochId) -> +handle_append(_Prefix, <<>>, _Csum, _Extra, _FluName, _EpochId) -> {error, bad_arg}; -handle_append(CoC_Namespace, CoC_Locator, - Prefix, Chunk, Csum, Extra, FluName, EpochId) -> - CoC = {coc, CoC_Namespace, CoC_Locator}, - Res = machi_flu_filename_mgr:find_or_make_filename_from_prefix( - FluName, EpochId, {prefix, Prefix}, CoC), +handle_append(Prefix, Chunk, Csum, Extra, FluName, EpochId) -> + Res = machi_flu_filename_mgr:find_or_make_filename_from_prefix(FluName, EpochId, {prefix, Prefix}), case Res of {file, F} -> case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}) of @@ -306,6 +705,47 @@ handle_append(CoC_Namespace, CoC_Locator, Error end. +sanitize_file_string(Str) -> + case has_no_prohibited_chars(Str) andalso machi_util:is_valid_filename(Str) of + true -> ok; + false -> error + end. + +has_no_prohibited_chars(Str) -> + case re:run(Str, "/") of + nomatch -> + true; + _ -> + true + end. + +sanitize_prefix(Prefix) -> + %% We are using '^' as our component delimiter + case re:run(Prefix, "/|\\^") of + nomatch -> + ok; + _ -> + error + end. + +sync_checksum_file(FluName, File) -> + %% We just lookup the pid here - we don't start a proxy server. If + %% there isn't a pid for this file, then we just return ok. The + %% csum file was synced when the proxy was shutdown. + %% + %% If there *is* a pid, we call the sync function to ensure the + %% csum file is sync'd before we return. (Or an error if we get + %% an error). + case machi_flu_metadata_mgr:lookup_proxy_pid(FluName, {file, File}) of + undefined -> + ok; + Pid -> + machi_file_proxy:sync(Pid, csum) + end. + +make_listener_regname(BaseName) -> + list_to_atom(atom_to_list(BaseName) ++ "_listener"). + %% This is the name of the projection store that is spawned by the %% *flu*, for use primarily in testing scenarios. In normal use, we %% ought to be using the OTP style of managing processes, via @@ -316,6 +756,26 @@ handle_append(CoC_Namespace, CoC_Locator, make_projection_server_regname(BaseName) -> list_to_atom(atom_to_list(BaseName) ++ "_pstore"). +check_or_make_tagged_checksum(?CSUM_TAG_NONE, _Client_CSum, Chunk) -> + %% TODO: If the client was foolish enough to use + %% this type of non-checksum, then the client gets + %% what it deserves wrt data integrity, alas. In + %% the client-side Chain Replication method, each + %% server will calculated this independently, which + %% isn't exactly what ought to happen for best data + %% integrity checking. In server-side CR, the csum + %% should be calculated by the head and passed down + %% the chain together with the value. + CS = machi_util:checksum_chunk(Chunk), + machi_util:make_tagged_csum(server_sha, CS); +check_or_make_tagged_checksum(?CSUM_TAG_CLIENT_SHA, Client_CSum, Chunk) -> + CS = machi_util:checksum_chunk(Chunk), + if CS == Client_CSum -> + machi_util:make_tagged_csum(server_sha, + Client_CSum); + true -> + throw({bad_csum, CS}) + end. -ifdef(TEST). diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl index e5b65fc..119e154 100644 --- a/src/machi_flu1_client.erl +++ b/src/machi_flu1_client.erl @@ -55,9 +55,7 @@ -export([ %% File API append_chunk/4, append_chunk/5, - append_chunk/6, append_chunk/7, append_chunk_extra/5, append_chunk_extra/6, - append_chunk_extra/7, append_chunk_extra/8, read_chunk/6, read_chunk/7, checksum_list/3, checksum_list/4, list_files/2, list_files/3, @@ -95,9 +93,7 @@ -spec append_chunk(port_wrap(), machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk()) -> {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. append_chunk(Sock, EpochID, Prefix, Chunk) -> - append_chunk2(Sock, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, 0). + append_chunk2(Sock, EpochID, Prefix, Chunk, 0). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix'. @@ -108,39 +104,7 @@ append_chunk(Sock, EpochID, Prefix, Chunk) -> append_chunk(Host, TcpPort, EpochID, Prefix, Chunk) -> Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}), try - append_chunk2(Sock, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, 0) - after - disconnect(Sock) - end. - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - --spec append_chunk(port_wrap(), machi_dt:epoch_id(), - machi_dt:coc_namespace(), machi_dt:coc_locator(), - machi_dt:file_prefix(), machi_dt:chunk()) -> - {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. -append_chunk(Sock, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk) -> - append_chunk2(Sock, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, 0). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - --spec append_chunk(machi_dt:inet_host(), machi_dt:inet_port(), - machi_dt:epoch_id(), - machi_dt:coc_namespace(), machi_dt:coc_locator(), - machi_dt:file_prefix(), machi_dt:chunk()) -> - {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. -append_chunk(Host, TcpPort, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk) -> - Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}), - try - append_chunk2(Sock, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, 0) + append_chunk2(Sock, EpochID, Prefix, Chunk, 0) after disconnect(Sock) end. @@ -153,14 +117,11 @@ append_chunk(Host, TcpPort, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk) %% be reserved by the file sequencer for later write(s) by the %% `write_chunk()' API. --spec append_chunk_extra(port_wrap(), machi_dt:epoch_id(), - machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) -> +-spec append_chunk_extra(port_wrap(), machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) -> {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. append_chunk_extra(Sock, EpochID, Prefix, Chunk, ChunkExtra) when is_integer(ChunkExtra), ChunkExtra >= 0 -> - append_chunk2(Sock, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, ChunkExtra). + append_chunk2(Sock, EpochID, Prefix, Chunk, ChunkExtra). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix' and also request an additional `Extra' bytes. @@ -177,54 +138,7 @@ append_chunk_extra(Host, TcpPort, EpochID, Prefix, Chunk, ChunkExtra) when is_integer(ChunkExtra), ChunkExtra >= 0 -> Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}), try - append_chunk2(Sock, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, ChunkExtra) - after - disconnect(Sock) - end. - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix' and also request an additional `Extra' bytes. -%% -%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then -%% the file offsets that follow `Chunk''s position for the following 4K will -%% be reserved by the file sequencer for later write(s) by the -%% `write_chunk()' API. - --spec append_chunk_extra(port_wrap(), machi_dt:epoch_id(), - machi_dt:coc_namespace(), machi_dt:coc_locator(), - machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) -> - {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. -append_chunk_extra(Sock, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra) - when is_integer(ChunkExtra), ChunkExtra >= 0 -> - append_chunk2(Sock, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix' and also request an additional `Extra' bytes. -%% -%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then -%% the file offsets that follow `Chunk''s position for the following 4K will -%% be reserved by the file sequencer for later write(s) by the -%% `write_chunk()' API. - --spec append_chunk_extra(machi_dt:inet_host(), machi_dt:inet_port(), - machi_dt:epoch_id(), - machi_dt:coc_namespace(), machi_dt:coc_locator(), - machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) -> - {ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}. -append_chunk_extra(Host, TcpPort, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra) - when is_integer(ChunkExtra), ChunkExtra >= 0 -> - Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}), - try - append_chunk2(Sock, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra) + append_chunk2(Sock, EpochID, Prefix, Chunk, ChunkExtra) after disconnect(Sock) end. @@ -628,8 +542,7 @@ read_chunk2(Sock, EpochID, File0, Offset, Size, Opts) -> {low_read_chunk, EpochID, File, Offset, Size, Opts}), do_pb_request_common(Sock, ReqID, Req). -append_chunk2(Sock, EpochID, CoC_Namespace, CoC_Locator, - Prefix0, Chunk0, ChunkExtra) -> +append_chunk2(Sock, EpochID, Prefix0, Chunk0, ChunkExtra) -> ReqID = <<"id">>, {Chunk, CSum_tag, CSum} = case Chunk0 of @@ -639,11 +552,12 @@ append_chunk2(Sock, EpochID, CoC_Namespace, CoC_Locator, {Tag, CS} = machi_util:unmake_tagged_csum(ChunkCSum), {Chk, Tag, CS} end, + PKey = <<>>, % TODO Prefix = machi_util:make_binary(Prefix0), Req = machi_pb_translate:to_pb_request( ReqID, - {low_append_chunk, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, ChunkExtra}), + {low_append_chunk, EpochID, PKey, Prefix, Chunk, CSum_tag, CSum, + ChunkExtra}), do_pb_request_common(Sock, ReqID, Req). write_chunk2(Sock, EpochID, File0, Offset, Chunk0) -> diff --git a/src/machi_flu1_net_server.erl b/src/machi_flu1_net_server.erl deleted file mode 100644 index 93e3675..0000000 --- a/src/machi_flu1_net_server.erl +++ /dev/null @@ -1,603 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% @doc Ranch protocol callback module to handle PB protocol over -%% transport, including both high and low modes. - -%% TODO -%% - Two modes, high and low should be separated at listener level? - --module(machi_flu1_net_server). - --behaviour(gen_server). --behaviour(ranch_protocol). - --export([start_link/4]). --export([init/1]). --export([handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --include_lib("kernel/include/file.hrl"). - --include("machi.hrl"). --include("machi_pb.hrl"). --include("machi_projection.hrl"). - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). --endif. % TEST - --record(state, { - %% Ranch's transport management stuff - ref :: ranch:ref(), - socket :: socket(), - transport :: module(), - - %% Machi FLU configurations, common for low and high - data_dir :: string(), - witness :: boolean(), - pb_mode :: undefined | high | low, - %% - Used in projection related requests in low mode - %% - Used in spawning CR client in high mode - proj_store :: pid(), - - %% Low mode only items - %% Current best knowledge, used for wedge_self / bad_epoch check - epoch_id :: undefined | machi_dt:epoch_id(), - %% Used in dispatching append_chunk* reqs to the - %% append serializing process - flu_name :: pv1_server(), - %% Used in server_wedge_status to lookup the table - epoch_tab :: ets:tab(), - - %% High mode only - high_clnt :: pid(), - - %% anything you want - props = [] :: list() % proplist - }). - --type socket() :: any(). --type state() :: #state{}. - --spec start_link(ranch:ref(), socket(), module(), [term()]) -> {ok, pid()}. -start_link(Ref, Socket, Transport, [FluName, Witness, DataDir, EpochTab, ProjStore]) -> - proc_lib:start_link(?MODULE, init, [#state{ref=Ref, - socket=Socket, - transport=Transport, - flu_name=FluName, - witness=Witness, - data_dir=DataDir, - epoch_tab=EpochTab, - proj_store=ProjStore}]). - --spec init(state()) -> no_return(). -init(#state{ref=Ref, socket=Socket, transport=Transport}=State) -> - ok = proc_lib:init_ack({ok, self()}), - ok = ranch:accept_ack(Ref), - {_Wedged_p, CurrentEpochID} = lookup_epoch(State), - ok = Transport:setopts(Socket, [{active, once}|?PB_PACKET_OPTS]), - gen_server:enter_loop(?MODULE, [], State#state{epoch_id=CurrentEpochID}). - -handle_call(Request, _From, S) -> - lager:warning("~s:handle_call UNKNOWN message: ~w", [?MODULE, Request]), - Reply = {error, {unknown_message, Request}}, - {reply, Reply, S}. - -handle_cast(_Msg, S) -> - lager:warning("~s:handle_cast UNKNOWN message: ~w", [?MODULE, _Msg]), - {noreply, S}. - -%% TODO: Other transport support needed?? TLS/SSL, SCTP -handle_info({tcp, Socket, Data}=_Info, #state{socket=Socket}=S) -> - lager:debug("~s:handle_info: ~w", [?MODULE, _Info]), - transport_received(Socket, Data, S); -handle_info({tcp_closed, Socket}=_Info, #state{socket=Socket}=S) -> - lager:debug("~s:handle_info: ~w", [?MODULE, _Info]), - transport_closed(Socket, S); -handle_info({tcp_error, Socket, Reason}=_Info, #state{socket=Socket}=S) -> - lager:warning("~s:handle_info (socket=~w) tcp_error: ~w", [?MODULE, Socket, Reason]), - transport_error(Socket, Reason, S); -handle_info(_Info, S) -> - lager:warning("~s:handle_info UNKNOWN message: ~w", [?MODULE, _Info]), - {noreply, S}. - -terminate(normal, #state{socket=undefined}=_S) -> - ok; -terminate(Reason, #state{socket=undefined}=_S) -> - lager:warning("~s:terminate (socket=undefined): ~w", [?MODULE, Reason]), - ok; -terminate(normal, #state{socket=Socket}=_S) -> - (catch gen_tcp:close(Socket)), - ok; -terminate(Reason, #state{socket=Socket}=_S) -> - lager:warning("~s:terminate (socket=Socket): ~w", [?MODULE, Reason]), - (catch gen_tcp:close(Socket)), - ok. - -code_change(_OldVsn, S, _Extra) -> - {ok, S}. - -%% -- private - -%%%% Common transport handling - --spec transport_received(socket(), machi_dt:chunk(), state()) -> - {noreply, state()}. -transport_received(Socket, <<"QUIT\n">>, #state{socket=Socket}=S) -> - {stop, normal, S}; -transport_received(Socket, Bin, #state{transport=Transport}=S) -> - {RespBin, S2} = - case machi_pb:decode_mpb_ll_request(Bin) of - LL_req when LL_req#mpb_ll_request.do_not_alter == 2 -> - {R, NewS} = do_pb_ll_request(LL_req, S), - {maybe_encode_response(R), set_mode(low, NewS)}; - _ -> - HL_req = machi_pb:decode_mpb_request(Bin), - 1 = HL_req#mpb_request.do_not_alter, - {R, NewS} = do_pb_hl_request(HL_req, make_high_clnt(S)), - {machi_pb:encode_mpb_response(R), set_mode(high, NewS)} - end, - case RespBin of - async_no_response -> - Transport:setopts(Socket, [{active, once}]), - {noreply, S2}; - _ -> - case Transport:send(Socket, RespBin) of - ok -> - Transport:setopts(Socket, [{active, once}]), - {noreply, S2}; - {error, Reason} -> - transport_error(Socket, Reason, S2) - end - end. - --spec transport_closed(socket(), state()) -> {stop, term(), state()}. -transport_closed(_Socket, S) -> - {stop, normal, S}. - --spec transport_error(socket(), term(), state()) -> no_return(). -transport_error(Socket, Reason, #state{transport=Transport}=_S) -> - Msg = io_lib:format("Socket error ~w", [Reason]), - R = #mpb_ll_response{req_id= <<>>, - generic=#mpb_errorresp{code=1, msg=Msg}}, - _Resp = machi_pb:encode_mpb_ll_response(R), - %% TODO for TODO comments: comments below with four %s are copy-n-paste'd, - %% then it should be considered they are still open and should be addressed. - %%%% TODO: Weird that sometimes neither catch nor try/catch - %%%% can prevent OTP's SASL from logging an error here. - %%%% Error in process <0.545.0> with exit value: {badarg,[{erlang,port_command,....... - %%%% TODO: is this what causes the intermittent PULSE deadlock errors? - %%%% _ = (catch gen_tcp:send(Sock, _Resp)), timer:sleep(1000), - (catch Transport:close(Socket)), - _ = lager:warning("Socket error (~w -> ~w): ~w", - [Transport:sockname(Socket), Transport:peername(Socket), Reason]), - %% TODO: better to exit with `Reason' without logging? - exit(normal). - -maybe_encode_response(async_no_response=R) -> - R; -maybe_encode_response(R) -> - machi_pb:encode_mpb_ll_response(R). - -set_mode(Mode, #state{pb_mode=undefined}=S) -> - S#state{pb_mode=Mode}; -set_mode(_, S) -> - S. - -%%%% Low PB mode %%%% - -do_pb_ll_request(#mpb_ll_request{req_id=ReqID}, #state{pb_mode=high}=S) -> - Result = {high_error, 41, "Low protocol request while in high mode"}, - {machi_pb_translate:to_pb_response(ReqID, unused, Result), S}; -do_pb_ll_request(PB_request, S) -> - Req = machi_pb_translate:from_pb_request(PB_request), - %% io:format(user, "[~w] do_pb_ll_request Req: ~w~n", [S#state.flu_name, Req]), - {ReqID, Cmd, Result, S2} = - case Req of - {RqID, {LowCmd, _}=Cmd0} - when LowCmd =:= low_proj; - LowCmd =:= low_wedge_status; - LowCmd =:= low_list_files -> - %% Skip wedge check for these unprivileged commands - {Rs, NewS} = do_pb_ll_request3(Cmd0, S), - {RqID, Cmd0, Rs, NewS}; - {RqID, Cmd0} -> - EpochID = element(2, Cmd0), % by common convention - {Rs, NewS} = do_pb_ll_request2(EpochID, Cmd0, S), - {RqID, Cmd0, Rs, NewS} - end, - {machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}. - -do_pb_ll_request2(EpochID, CMD, S) -> - {Wedged_p, CurrentEpochID} = lookup_epoch(S), - %% io:format(user, "{Wedged_p, CurrentEpochID}: ~w~n", [{Wedged_p, CurrentEpochID}]), - if Wedged_p == true -> - {{error, wedged}, S#state{epoch_id=CurrentEpochID}}; - is_tuple(EpochID) - andalso - EpochID /= CurrentEpochID -> - {Epoch, _} = EpochID, - {CurrentEpoch, _} = CurrentEpochID, - if Epoch < CurrentEpoch -> - ok; - true -> - %% We're at same epoch # but different checksum, or - %% we're at a newer/bigger epoch #. - _ = machi_flu1:wedge_myself(S#state.flu_name, CurrentEpochID), - ok - end, - {{error, bad_epoch}, S#state{epoch_id=CurrentEpochID}}; - true -> - do_pb_ll_request3(CMD, S#state{epoch_id=CurrentEpochID}) - end. - -lookup_epoch(#state{epoch_tab=T}) -> - %% TODO: race in shutdown to access ets table after owner dies - ets:lookup_element(T, epoch, 2). - -%% Witness status does not matter below. -do_pb_ll_request3({low_echo, _BogusEpochID, Msg}, S) -> - {Msg, S}; -do_pb_ll_request3({low_auth, _BogusEpochID, _User, _Pass}, S) -> - {-6, S}; -do_pb_ll_request3({low_wedge_status, _EpochID}, S) -> - {do_server_wedge_status(S), S}; -do_pb_ll_request3({low_proj, PCMD}, S) -> - {do_server_proj_request(PCMD, S), S}; - -%% Witness status *matters* below -do_pb_ll_request3({low_append_chunk, _EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, - CSum, ChunkExtra}, - #state{witness=false}=S) -> - {do_server_append_chunk(CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, - ChunkExtra, S), S}; -do_pb_ll_request3({low_write_chunk, _EpochID, File, Offset, Chunk, CSum_tag, - CSum}, - #state{witness=false}=S) -> - {do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, S), S}; -do_pb_ll_request3({low_read_chunk, _EpochID, File, Offset, Size, Opts}, - #state{witness=false} = S) -> - {do_server_read_chunk(File, Offset, Size, Opts, S), S}; -do_pb_ll_request3({low_trim_chunk, _EpochID, File, Offset, Size, TriggerGC}, - #state{witness=false}=S) -> - {do_server_trim_chunk(File, Offset, Size, TriggerGC, S), S}; -do_pb_ll_request3({low_checksum_list, _EpochID, File}, - #state{witness=false}=S) -> - {do_server_checksum_listing(File, S), S}; -do_pb_ll_request3({low_list_files, _EpochID}, - #state{witness=false}=S) -> - {do_server_list_files(S), S}; -do_pb_ll_request3({low_delete_migration, _EpochID, File}, - #state{witness=false}=S) -> - {do_server_delete_migration(File, S), - #state{witness=false}=S}; -do_pb_ll_request3({low_trunc_hack, _EpochID, File}, - #state{witness=false}=S) -> - {do_server_trunc_hack(File, S), S}; - -do_pb_ll_request3(_, #state{witness=true}=S) -> - {{error, bad_arg}, S}. % TODO: new status code?? - -do_server_proj_request({get_latest_epochid, ProjType}, - #state{proj_store=ProjStore}) -> - machi_projection_store:get_latest_epochid(ProjStore, ProjType); -do_server_proj_request({read_latest_projection, ProjType}, - #state{proj_store=ProjStore}) -> - machi_projection_store:read_latest_projection(ProjStore, ProjType); -do_server_proj_request({read_projection, ProjType, Epoch}, - #state{proj_store=ProjStore}) -> - machi_projection_store:read(ProjStore, ProjType, Epoch); -do_server_proj_request({write_projection, ProjType, Proj}, - #state{flu_name=FluName, proj_store=ProjStore}) -> - if Proj#projection_v1.epoch_number == ?SPAM_PROJ_EPOCH -> - %% io:format(user, "DBG ~s ~w ~P\n", [?MODULE, ?LINE, Proj, 5]), - Chmgr = machi_flu_psup:make_fitness_regname(FluName), - [Map] = Proj#projection_v1.dbg, - catch machi_fitness:send_fitness_update_spam( - Chmgr, Proj#projection_v1.author_server, Map); - true -> - catch machi_projection_store:write(ProjStore, ProjType, Proj) - end; -do_server_proj_request({get_all_projections, ProjType}, - #state{proj_store=ProjStore}) -> - machi_projection_store:get_all_projections(ProjStore, ProjType); -do_server_proj_request({list_all_projections, ProjType}, - #state{proj_store=ProjStore}) -> - machi_projection_store:list_all_projections(ProjStore, ProjType); -do_server_proj_request({kick_projection_reaction}, - #state{flu_name=FluName}) -> - %% Tell my chain manager that it might want to react to - %% this new world. - Chmgr = machi_chain_manager1:make_chmgr_regname(FluName), - spawn(fun() -> - catch machi_chain_manager1:trigger_react_to_env(Chmgr) - end), - async_no_response. - -do_server_append_chunk(CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, - ChunkExtra, S) -> - case sanitize_prefix(Prefix) of - ok -> - do_server_append_chunk2(CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, - ChunkExtra, S); - _ -> - {error, bad_arg} - end. - -do_server_append_chunk2(CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, Client_CSum, - ChunkExtra, #state{flu_name=FluName, - epoch_id=EpochID}=_S) -> - %% TODO: Do anything with PKey? - try - TaggedCSum = check_or_make_tagged_checksum(CSum_tag, Client_CSum,Chunk), - R = {seq_append, self(), CoC_Namespace, CoC_Locator, - Prefix, Chunk, TaggedCSum, ChunkExtra, EpochID}, - FluName ! R, - receive - {assignment, Offset, File} -> - Size = iolist_size(Chunk), - {ok, {Offset, Size, File}}; - witness -> - {error, bad_arg}; - wedged -> - {error, wedged} - after 10*1000 -> - {error, partition} - end - catch - throw:{bad_csum, _CS} -> - {error, bad_checksum}; - error:badarg -> - lager:error("badarg at ~w:do_server_append_chunk2:~w ~w", - [?MODULE, ?LINE, erlang:get_stacktrace()]), - {error, bad_arg} - end. - -do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, #state{flu_name=FluName}) -> - case sanitize_file_string(File) of - ok -> - case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of - {ok, Pid} -> - Meta = [{client_csum_tag, CSum_tag}, {client_csum, CSum}], - machi_file_proxy:write(Pid, Offset, Meta, Chunk); - {error, trimmed} = Error -> - Error - end; - _ -> - {error, bad_arg} - end. - -do_server_read_chunk(File, Offset, Size, Opts, #state{flu_name=FluName})-> - case sanitize_file_string(File) of - ok -> - case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of - {ok, Pid} -> - case machi_file_proxy:read(Pid, Offset, Size, Opts) of - %% XXX FIXME - %% For now we are omiting the checksum data because it blows up - %% protobufs. - {ok, ChunksAndTrimmed} -> {ok, ChunksAndTrimmed}; - Other -> Other - end; - {error, trimmed} = Error -> - Error - end; - _ -> - {error, bad_arg} - end. - -do_server_trim_chunk(File, Offset, Size, TriggerGC, #state{flu_name=FluName}) -> - lager:debug("Hi there! I'm trimming this: ~s, (~p, ~p), ~p~n", - [File, Offset, Size, TriggerGC]), - case sanitize_file_string(File) of - ok -> - case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of - {ok, Pid} -> - machi_file_proxy:trim(Pid, Offset, Size, TriggerGC); - {error, trimmed} = Trimmed -> - %% Should be returned back to (maybe) trigger repair - Trimmed - end; - _ -> - {error, bad_arg} - end. - -do_server_checksum_listing(File, #state{flu_name=FluName, data_dir=DataDir}=_S) -> - case sanitize_file_string(File) of - ok -> - case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of - {ok, Pid} -> - {ok, List} = machi_file_proxy:checksum_list(Pid), - Bin = erlang:term_to_binary(List), - if byte_size(Bin) > (?PB_MAX_MSG_SIZE - 1024) -> - %% TODO: Fix this limitation by streaming the - %% binary in multiple smaller PB messages. - %% Also, don't read the file all at once. ^_^ - error_logger:error_msg("~s:~w oversize ~s\n", - [?MODULE, ?LINE, DataDir]), - {error, bad_arg}; - true -> - {ok, Bin} - end; - {error, trimmed} -> - {error, trimmed} - end; - _ -> - {error, bad_arg} - end. - -do_server_list_files(#state{data_dir=DataDir}=_S) -> - {_, WildPath} = machi_util:make_data_filename(DataDir, ""), - Files = filelib:wildcard("*", WildPath), - {ok, [begin - {ok, FI} = file:read_file_info(WildPath ++ "/" ++ File), - Size = FI#file_info.size, - {Size, File} - end || File <- Files]}. - -do_server_wedge_status(S) -> - {Wedged_p, CurrentEpochID0} = lookup_epoch(S), - CurrentEpochID = if CurrentEpochID0 == undefined -> - ?DUMMY_PV1_EPOCH; - true -> - CurrentEpochID0 - end, - {Wedged_p, CurrentEpochID}. - -do_server_delete_migration(File, #state{data_dir=DataDir}=_S) -> - case sanitize_file_string(File) of - ok -> - {_, Path} = machi_util:make_data_filename(DataDir, File), - case file:delete(Path) of - ok -> - ok; - {error, enoent} -> - {error, no_such_file}; - _ -> - {error, bad_arg} - end; - _ -> - {error, bad_arg} - end. - -do_server_trunc_hack(File, #state{data_dir=DataDir}=_S) -> - case sanitize_file_string(File) of - ok -> - {_, Path} = machi_util:make_data_filename(DataDir, File), - case file:open(Path, [read, write, binary, raw]) of - {ok, FH} -> - try - {ok, ?MINIMUM_OFFSET} = file:position(FH, - ?MINIMUM_OFFSET), - ok = file:truncate(FH), - ok - after - file:close(FH) - end; - {error, enoent} -> - {error, no_such_file}; - _ -> - {error, bad_arg} - end; - _ -> - {error, bad_arg} - end. - -sanitize_file_string(Str) -> - case has_no_prohibited_chars(Str) andalso machi_util:is_valid_filename(Str) of - true -> ok; - false -> error - end. - -has_no_prohibited_chars(Str) -> - case re:run(Str, "/") of - nomatch -> - true; - _ -> - true - end. - -sanitize_prefix(Prefix) -> - %% We are using '^' as our component delimiter - case re:run(Prefix, "/|\\^") of - nomatch -> - ok; - _ -> - error - end. - -check_or_make_tagged_checksum(?CSUM_TAG_NONE, _Client_CSum, Chunk) -> - %% TODO: If the client was foolish enough to use - %% this type of non-checksum, then the client gets - %% what it deserves wrt data integrity, alas. In - %% the client-side Chain Replication method, each - %% server will calculated this independently, which - %% isn't exactly what ought to happen for best data - %% integrity checking. In server-side CR, the csum - %% should be calculated by the head and passed down - %% the chain together with the value. - CS = machi_util:checksum_chunk(Chunk), - machi_util:make_tagged_csum(server_sha, CS); -check_or_make_tagged_checksum(?CSUM_TAG_CLIENT_SHA, Client_CSum, Chunk) -> - CS = machi_util:checksum_chunk(Chunk), - if CS == Client_CSum -> - machi_util:make_tagged_csum(server_sha, - Client_CSum); - true -> - throw({bad_csum, CS}) - end. - -%%%% High PB mode %%%% - -do_pb_hl_request(#mpb_request{req_id=ReqID}, #state{pb_mode=low}=S) -> - Result = {low_error, 41, "High protocol request while in low mode"}, - {machi_pb_translate:to_pb_response(ReqID, unused, Result), S}; -do_pb_hl_request(PB_request, S) -> - {ReqID, Cmd} = machi_pb_translate:from_pb_request(PB_request), - {Result, S2} = do_pb_hl_request2(Cmd, S), - {machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}. - -do_pb_hl_request2({high_echo, Msg}, S) -> - {Msg, S}; -do_pb_hl_request2({high_auth, _User, _Pass}, S) -> - {-77, S}; -do_pb_hl_request2({high_append_chunk, CoC_Namespace, CoC_Locator, - Prefix, ChunkBin, TaggedCSum, - ChunkExtra}, #state{high_clnt=Clnt}=S) -> - Chunk = {TaggedCSum, ChunkBin}, - Res = machi_cr_client:append_chunk_extra(Clnt, CoC_Namespace, CoC_Locator, - Prefix, Chunk, - ChunkExtra), - {Res, S}; -do_pb_hl_request2({high_write_chunk, File, Offset, ChunkBin, TaggedCSum}, - #state{high_clnt=Clnt}=S) -> - Chunk = {TaggedCSum, ChunkBin}, - Res = machi_cr_client:write_chunk(Clnt, File, Offset, Chunk), - {Res, S}; -do_pb_hl_request2({high_read_chunk, File, Offset, Size, Opts}, - #state{high_clnt=Clnt}=S) -> - Res = machi_cr_client:read_chunk(Clnt, File, Offset, Size, Opts), - {Res, S}; -do_pb_hl_request2({high_trim_chunk, File, Offset, Size}, - #state{high_clnt=Clnt}=S) -> - Res = machi_cr_client:trim_chunk(Clnt, File, Offset, Size), - {Res, S}; -do_pb_hl_request2({high_checksum_list, File}, #state{high_clnt=Clnt}=S) -> - Res = machi_cr_client:checksum_list(Clnt, File), - {Res, S}; -do_pb_hl_request2({high_list_files}, #state{high_clnt=Clnt}=S) -> - Res = machi_cr_client:list_files(Clnt), - {Res, S}. - -make_high_clnt(#state{high_clnt=undefined}=S) -> - {ok, Proj} = machi_projection_store:read_latest_projection( - S#state.proj_store, private), - Ps = [P_srvr || {_, P_srvr} <- orddict:to_list( - Proj#projection_v1.members_dict)], - {ok, Clnt} = machi_cr_client:start_link(Ps), - S#state{high_clnt=Clnt}; -make_high_clnt(S) -> - S. diff --git a/src/machi_flu_filename_mgr.erl b/src/machi_flu_filename_mgr.erl index 45e580e..7e8bb9d 100644 --- a/src/machi_flu_filename_mgr.erl +++ b/src/machi_flu_filename_mgr.erl @@ -51,8 +51,8 @@ -export([ child_spec/2, start_link/2, - find_or_make_filename_from_prefix/4, - increment_prefix_sequence/3, + find_or_make_filename_from_prefix/3, + increment_prefix_sequence/2, list_files_by_prefix/2 ]). @@ -87,31 +87,27 @@ start_link(FluName, DataDir) when is_atom(FluName) andalso is_list(DataDir) -> N = make_filename_mgr_name(FluName), gen_server:start_link({local, N}, ?MODULE, [FluName, DataDir], []). --spec find_or_make_filename_from_prefix( FluName :: atom(), - EpochId :: pv1_epoch_n(), - Prefix :: {prefix, string()}, - machi_dt:coc_nl()) -> +-spec find_or_make_filename_from_prefix( FluName :: atom(), + EpochId :: pv1_epoch_n(), + Prefix :: {prefix, string()} ) -> {file, Filename :: string()} | {error, Reason :: term() } | timeout. % @doc Find the latest available or make a filename from a prefix. A prefix % should be in the form of a tagged tuple `{prefix, P}'. Returns a tagged % tuple in the form of `{file, F}' or an `{error, Reason}' -find_or_make_filename_from_prefix(FluName, EpochId, - {prefix, Prefix}, - {coc, _CoC_Ns, _CoC_Loc}=CoC_NL) - when is_atom(FluName) -> +find_or_make_filename_from_prefix(FluName, EpochId, {prefix, Prefix}) when is_atom(FluName) -> N = make_filename_mgr_name(FluName), - gen_server:call(N, {find_filename, EpochId, CoC_NL, Prefix}, ?TIMEOUT); -find_or_make_filename_from_prefix(_FluName, _EpochId, Other, Other2) -> - lager:error("~p is not a valid prefix/CoC ~p", [Other, Other2]), + gen_server:call(N, {find_filename, EpochId, Prefix}, ?TIMEOUT); +find_or_make_filename_from_prefix(_FluName, _EpochId, Other) -> + lager:error("~p is not a valid prefix.", [Other]), error(badarg). --spec increment_prefix_sequence( FluName :: atom(), CoC_NL :: machi_dt:coc_nl(), Prefix :: {prefix, string()} ) -> +-spec increment_prefix_sequence( FluName :: atom(), Prefix :: {prefix, string()} ) -> ok | {error, Reason :: term() } | timeout. % @doc Increment the sequence counter for a given prefix. Prefix should % be in the form of `{prefix, P}'. -increment_prefix_sequence(FluName, {coc,_CoC_Namespace,_CoC_Locator}=CoC_NL, {prefix, Prefix}) when is_atom(FluName) -> - gen_server:call(make_filename_mgr_name(FluName), {increment_sequence, CoC_NL, Prefix}, ?TIMEOUT); -increment_prefix_sequence(_FluName, _CoC_NL, Other) -> +increment_prefix_sequence(FluName, {prefix, Prefix}) when is_atom(FluName) -> + gen_server:call(make_filename_mgr_name(FluName), {increment_sequence, Prefix}, ?TIMEOUT); +increment_prefix_sequence(_FluName, Other) -> lager:error("~p is not a valid prefix.", [Other]), error(badarg). @@ -142,23 +138,23 @@ handle_cast(Req, State) -> %% the FLU has already validated that the caller's epoch id and the FLU's epoch id %% are the same. So we *assume* that remains the case here - that is to say, we %% are not wedged. -handle_call({find_filename, EpochId, CoC_NL, Prefix}, _From, S = #state{ datadir = DataDir, - epoch = EpochId, - tid = Tid }) -> +handle_call({find_filename, EpochId, Prefix}, _From, S = #state{ datadir = DataDir, + epoch = EpochId, + tid = Tid}) -> %% Our state and the caller's epoch ids are the same. Business as usual. - File = handle_find_file(Tid, CoC_NL, Prefix, DataDir), + File = handle_find_file(Tid, Prefix, DataDir), {reply, {file, File}, S}; -handle_call({find_filename, EpochId, CoC_NL, Prefix}, _From, S = #state{ datadir = DataDir, tid = Tid }) -> +handle_call({find_filename, EpochId, Prefix}, _From, S = #state{ datadir = DataDir, tid = Tid }) -> %% If the epoch id in our state and the caller's epoch id were the same, it would've %% matched the above clause. Since we're here, we know that they are different. %% If epoch ids between our state and the caller's are different, we must increment the %% sequence number, generate a filename and then cache it. - File = increment_and_cache_filename(Tid, DataDir, CoC_NL, Prefix), + File = increment_and_cache_filename(Tid, DataDir, Prefix), {reply, {file, File}, S#state{epoch = EpochId}}; -handle_call({increment_sequence, {coc,CoC_Namespace,CoC_Locator}=_CoC_NL, Prefix}, _From, S = #state{ datadir = DataDir }) -> - ok = machi_util:increment_max_filenum(DataDir, CoC_Namespace,CoC_Locator, Prefix), +handle_call({increment_sequence, Prefix}, _From, S = #state{ datadir = DataDir }) -> + ok = machi_util:increment_max_filenum(DataDir, Prefix), {reply, ok, S}; handle_call({list_files, Prefix}, From, S = #state{ datadir = DataDir }) -> spawn(fun() -> @@ -191,24 +187,22 @@ generate_uuid_v4_str() -> io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b", [A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]). -find_file(DataDir, {coc,CoC_Namespace,CoC_Locator}=_CoC_NL, Prefix, N) -> - {_Filename, Path} = machi_util:make_data_filename(DataDir, - CoC_Namespace,CoC_Locator, - Prefix, "*", N), +find_file(DataDir, Prefix, N) -> + {_Filename, Path} = machi_util:make_data_filename(DataDir, Prefix, "*", N), filelib:wildcard(Path). list_files(DataDir, Prefix) -> - {F_bin, Path} = machi_util:make_data_filename(DataDir, "*^" ++ Prefix ++ "^*"), + {F_bin, Path} = machi_util:make_data_filename(DataDir, Prefix, "*", "*"), filelib:wildcard(binary_to_list(F_bin), filename:dirname(Path)). make_filename_mgr_name(FluName) when is_atom(FluName) -> list_to_atom(atom_to_list(FluName) ++ "_filename_mgr"). -handle_find_file(Tid, {coc,CoC_Namespace,CoC_Locator}=CoC_NL, Prefix, DataDir) -> - N = machi_util:read_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix), - {File, Cleanup} = case find_file(DataDir, CoC_NL, Prefix, N) of +handle_find_file(Tid, Prefix, DataDir) -> + N = machi_util:read_max_filenum(DataDir, Prefix), + {File, Cleanup} = case find_file(DataDir, Prefix, N) of [] -> - {find_or_make_filename(Tid, DataDir, CoC_Namespace, CoC_Locator, Prefix, N), false}; + {find_or_make_filename(Tid, DataDir, Prefix, N), false}; [H] -> {H, true}; [Fn | _ ] = L -> lager:debug( @@ -216,23 +210,23 @@ handle_find_file(Tid, {coc,CoC_Namespace,CoC_Locator}=CoC_NL, Prefix, DataDir) - [Prefix, N, L]), {Fn, true} end, - maybe_cleanup(Tid, {CoC_Namespace, CoC_Locator, Prefix, N}, Cleanup), + maybe_cleanup(Tid, {Prefix, N}, Cleanup), filename:basename(File). -find_or_make_filename(Tid, DataDir, CoC_Namespace, CoC_Locator, Prefix, N) -> - case ets:lookup(Tid, {CoC_Namespace, CoC_Locator, Prefix, N}) of +find_or_make_filename(Tid, DataDir, Prefix, N) -> + case ets:lookup(Tid, {Prefix, N}) of [] -> - F = generate_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix, N), - true = ets:insert_new(Tid, {{CoC_Namespace, CoC_Locator, Prefix, N}, F}), + F = generate_filename(DataDir, Prefix, N), + true = ets:insert_new(Tid, {{Prefix, N}, F}), F; [{_Key, File}] -> File end. -generate_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix, N) -> +generate_filename(DataDir, Prefix, N) -> {F, _} = machi_util:make_data_filename( DataDir, - CoC_Namespace, CoC_Locator, Prefix, + Prefix, generate_uuid_v4_str(), N), binary_to_list(F). @@ -242,11 +236,11 @@ maybe_cleanup(_Tid, _Key, false) -> maybe_cleanup(Tid, Key, true) -> true = ets:delete(Tid, Key). -increment_and_cache_filename(Tid, DataDir, {coc,CoC_Namespace,CoC_Locator}, Prefix) -> - ok = machi_util:increment_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix), - N = machi_util:read_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix), - F = generate_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix, N), - true = ets:insert_new(Tid, {{CoC_Namespace, CoC_Locator, Prefix, N}, F}), +increment_and_cache_filename(Tid, DataDir, Prefix) -> + ok = machi_util:increment_max_filenum(DataDir, Prefix), + N = machi_util:read_max_filenum(DataDir, Prefix), + F = generate_filename(DataDir, Prefix, N), + true = ets:insert_new(Tid, {{Prefix, N}, F}), filename:basename(F). diff --git a/src/machi_flu_metadata_mgr.erl b/src/machi_flu_metadata_mgr.erl index 66274b3..d4447ae 100644 --- a/src/machi_flu_metadata_mgr.erl +++ b/src/machi_flu_metadata_mgr.erl @@ -185,17 +185,14 @@ handle_info({'DOWN', Mref, process, Pid, file_rollover}, State = #state{ fluname tid = Tid }) -> lager:info("file proxy ~p shutdown because of file rollover", [Pid]), R = get_md_record_by_mref(Tid, Mref), - {Prefix, CoC_Namespace, CoC_Locator, _, _} = - machi_util:parse_filename(R#md.filename), - %% CoC_Namespace = list_to_binary(CoC_Namespace_str), - %% CoC_Locator = list_to_integer(CoC_Locator_str), + [Prefix | _Rest] = machi_util:parse_filename(R#md.filename), %% We only increment the counter here. The filename will be generated on the %% next append request to that prefix and since the filename will have a new %% sequence number it probably will be associated with a different metadata %% manager. That's why we don't want to generate a new file name immediately %% and use it to start a new file proxy. - ok = machi_flu_filename_mgr:increment_prefix_sequence(FluName, {coc, CoC_Namespace, CoC_Locator}, {prefix, Prefix}), + ok = machi_flu_filename_mgr:increment_prefix_sequence(FluName, {prefix, Prefix}), %% purge our ets table of this entry completely since it is likely the %% new filename (whenever it comes) will be in a different manager than diff --git a/src/machi_flu_psup.erl b/src/machi_flu_psup.erl index 55584d5..a8fe946 100644 --- a/src/machi_flu_psup.erl +++ b/src/machi_flu_psup.erl @@ -83,6 +83,8 @@ %% Supervisor callbacks -export([init/1]). +make_package_spec(#p_srvr{name=FluName, port=TcpPort, props=Props}) when is_list(Props) -> + make_package_spec({FluName, TcpPort, Props}); make_package_spec({FluName, TcpPort, Props}) when is_list(Props) -> FluDataDir = get_env(flu_data_dir, undefined_is_invalid), MyDataDir = filename:join(FluDataDir, atom_to_list(FluName)), @@ -94,7 +96,7 @@ make_package_spec(FluName, TcpPort, DataDir, Props) -> permanent, ?SHUTDOWN, supervisor, []}. start_flu_package(#p_srvr{name=FluName, port=TcpPort, props=Props}) -> - DataDir = get_data_dir(Props), + DataDir = get_data_dir(FluName, Props), start_flu_package(FluName, TcpPort, DataDir, Props). start_flu_package(FluName, TcpPort, DataDir, Props) -> @@ -143,19 +145,16 @@ init([FluName, TcpPort, DataDir, Props0]) -> FProxySupSpec = machi_file_proxy_sup:child_spec(FluName), - ListenerSupSpec = {machi_listener_sup:make_listener_sup_name(FluName), - {machi_listener_sup, start_link, [FluName]}, - permanent, ?SHUTDOWN, supervisor, []}, - FluSpec = {FluName, {machi_flu1, start_link, [ [{FluName, TcpPort, DataDir}|Props] ]}, permanent, ?SHUTDOWN, worker, []}, + {ok, {SupFlags, [ - ProjSpec, FitnessSpec, MgrSpec, - FProxySupSpec, FNameMgrSpec, MetaSupSpec, - ListenerSupSpec, FluSpec]}}. + ProjSpec, FitnessSpec, MgrSpec, + FProxySupSpec, FNameMgrSpec, MetaSupSpec, + FluSpec]}}. make_flu_regname(FluName) when is_atom(FluName) -> FluName. @@ -178,8 +177,11 @@ get_env(Setting, Default) -> {ok, V} -> V end. -get_data_dir(Props) -> +get_data_dir(FluName, Props) -> case proplists:get_value(data_dir, Props) of Path when is_list(Path) -> - Path + Path; + undefined -> + {ok, Dir} = application:get_env(machi, flu_data_dir), + Dir ++ "/" ++ atom_to_list(FluName) end. diff --git a/src/machi_flu_sup.erl b/src/machi_flu_sup.erl index 74995bb..450f505 100644 --- a/src/machi_flu_sup.erl +++ b/src/machi_flu_sup.erl @@ -21,6 +21,9 @@ %% @doc Supervisor for Machi FLU servers and their related support %% servers. %% +%% Responsibility for managing FLU and chain lifecycle after the initial +%% application startup is delegated to {@link machi_lifecycle_mgr}. +%% %% See {@link machi_flu_psup} for an illustration of the entire Machi %% application process structure. @@ -29,8 +32,11 @@ -behaviour(supervisor). -include("machi.hrl"). +-include("machi_projection.hrl"). -include("machi_verbose.hrl"). +-ifdef(TEST). +-compile(export_all). -ifdef(PULSE). -compile({parse_transform, pulse_instrument}). -include_lib("pulse_otp/include/pulse_otp.hrl"). @@ -38,9 +44,12 @@ -else. -define(SHUTDOWN, 5000). -endif. +-endif. %TEST %% API --export([start_link/0]). +-export([start_link/0, + get_initial_flus/0, load_rc_d_files_from_dir/1, + sanitize_p_srvr_records/1]). %% Supervisor callbacks -export([init/1]). @@ -69,5 +78,66 @@ get_initial_flus() -> []. -else. % PULSE get_initial_flus() -> - application:get_env(machi, initial_flus, []). + DoesNotExist = "/tmp/does/not/exist", + ConfigDir = case application:get_env(machi, flu_config_dir, DoesNotExist) of + DoesNotExist -> + DoesNotExist; + Dir -> + Dir + end, + Ps = [P || {_File, P} <- load_rc_d_files_from_dir(ConfigDir)], + sanitize_p_srvr_records(Ps). -endif. % PULSE + +load_rc_d_files_from_dir(Dir) -> + Files = filelib:wildcard(Dir ++ "/*"), + [case file:consult(File) of + {ok, [X]} -> + {File, X}; + _ -> + lager:warning("Error parsing file '~s', ignoring", + [File]), + {File, []} + end || File <- Files]. + +sanitize_p_srvr_records(Ps) -> + {Sane, _} = lists:foldl(fun sanitize_p_srvr_rec/2, {[], dict:new()}, Ps), + Sane. + +sanitize_p_srvr_rec(Whole, {Acc, D}) -> + try + #p_srvr{name=Name, + proto_mod=PMod, + address=Address, + port=Port, + props=Props} = Whole, + true = is_atom(Name), + NameK = {name, Name}, + error = dict:find(NameK, D), + true = is_atom(PMod), + case code:is_loaded(PMod) of + {file, _} -> + ok; + _ -> + {module, _} = code:load_file(PMod), + ok + end, + if is_list(Address) -> ok; + is_tuple(Address) -> ok % Erlang-style IPv4 or IPv6 + end, + true = is_integer(Port) andalso Port >= 1024 andalso Port =< 65534, + PortK = {port, Port}, + error = dict:find(PortK, D), + true = is_list(Props), + + %% All is sane enough. + D2 = dict:store(NameK, Name, + dict:store(PortK, Port, D)), + {[Whole|Acc], D2} + catch _:_ -> + _ = lager:log(error, self(), + "~s: Bad (or duplicate name/port) p_srvr record, " + "skipping: ~P\n", + [?MODULE, Whole, 15]), + {Acc, D} + end. diff --git a/src/machi_lifecycle_mgr.erl b/src/machi_lifecycle_mgr.erl new file mode 100644 index 0000000..9d4a688 --- /dev/null +++ b/src/machi_lifecycle_mgr.erl @@ -0,0 +1,1016 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Lifecycle manager for Machi FLUs and chains. +%% +%% Over the lifetime of a Machi cluster, both the number and types of +%% FLUs and chains may change. The lifecycle manager is responsible +%% for implementing the lifecycle changes as expressed by "policy". +%% In our case, "policy" is created by an external administrative +%% entity that creates and deletes configuration files that define +%% FLUs and chains relative to this local machine. (This "policy" +%% may be a human administrator or (as the Machi project matures) +%% partial or full automatic implementation of policy.) +%% +%% The "master configuration" for deciding which FLUs should be +%% running on this machine was inspired by BSD UNIX's `init(8)' and the +%% "rc.d" scheme. FLU definitions are found in a single directory, +%% with one file per FLU. Chains are defined similarly, with one +%% definition file per chain. +%% +%% If a definition file for a FLU (or chain) exists, then that +%% FLU/chain ought to be configured into being and running. If a +%% definition file for a FLU/chain is removed, then that FLU/chain +%% should be stopped gracefully. However, deleting of a file destroys +%% information that is stored inside of that file. Therefore, we will +%% not allow arbitrary unlinking of lifecycle config files. If +%% the administrator deletes these config files using `unlink(8)' +%% directly, then "the warranty has been broken". +%% +%% We will rely on using an administrative command to inform the +%% running system to stop and/or delete lifecycle resources. If the +%% Machi application is not running, sorry, please start Machi first. +%% +%% == Wheel reinvention == +%% +%% There's a whole mess of configuration management research & +%% libraries out there. I hope to ignore them all by doing something +%% quick & dirty & good enough here. If I fail, then I'll go +%% pay attention to That Other Stuff. +%% +%% == A note about policy == +%% +%% It is outside of the scope of this local lifecycle manager to make +%% decisions about policy or to distribute policy info/files/whatever +%% to other machines. This is our machine. There are many like it, +%% but this one is ours. +%% +%% == Machi Application Variables == +%% +%% All OTP application environment variables below are defined in the +%% `machi' application. +%% +%%
    +%%
  • flu_config_dir: Stores the `rc.d-'like config files for +%% FLU runtime policy. +%%
  • +%%
  • flu_data_dir: Stores the file data and metadata for +%% all FLUs. +%%
  • +%%
  • chain_config_dir: Stores the `rc.d'-like config files for +%% chain runtime policy. +%%
  • +%%
+%% +%% == The FLU Lifecycle == +%% +%% See also: [https://github.com/basho/machi/tree/master/doc/flu-and-chain-lifecycle.org] +%% +%% FLUs on the local machine may be started and stopped, as defined by +%% administrative policy. In order to do any useful work, however, a +%% running FLU must also be configured to be a member of a replication +%% chain. Thus, as a practical matter, both a FLU and the chain that +%% the FLU participates in must both be managed by this manager. +%% +%% The FLU will be started with the file's specified parameters. A +%% FLU should be defined and started before configuring its chain +%% membership. +%% +%% Usually a FLU is removed implicitly by removing that FLU from the a +%% newer definition of its chain (by omitting the FLU's name). +%% If a FLU has been started but never been a chain +%% member, then the FLU can be stopped and removed explicitly. +%% +%% A FLU may be created or removed (via implicit or explicit policy). +%% An existing FLU may not be reconfigured. +%% +%% == The Chain Lifecycle == +%% +%% See also: [https://github.com/basho/machi/tree/master/doc/flu-and-chain-lifecycle.org] +%% +%% If a FLU on the local machine is expected to participate in a +%% replication chain, then an `rc.d'-style chain definition file must +%% also be present on each machine that runs a FLU in the chain. +%% +%% FLUs in a new chain should have definition files created on each +%% FLU's respective machine prior to defining their chain. Similarly, +%% on each machine that hosts a chain member, a chain definition file +%% created. External policy is responsible for creating each of these +%% files. +%% +%% A chain may be created or modified. +%% +%% A modification request writes a `#chain_def_v1{}' record with the +%% same name but different `full' and/or `witnesses' list to each +%% machine that hosts a FLU in the chain (in both the old and new +%% versions of the chain). +%% +%% == Conflicts with TCP ports, FLU & chain names, etc == +%% +%% This manager is not responsible for managing conflicts in resource +%% namespaces, e.g., TCP port numbers, FLU names, chain names, etc. +%% Managing these namespaces is external policy's responsibility. + +-module(machi_lifecycle_mgr). + +-behaviour(gen_server). + +-include("machi_projection.hrl"). + +%% API +-export([start_link/0, + process_pending/0]). +-export([get_pending_dir/0, get_rejected_dir/0, get_flu_config_dir/0, + get_flu_data_dir/0, get_chain_config_dir/0, get_data_dir/0]). +-export([quick_admin_sanity_check/1, quick_admin_apply/2]). +-export([make_pending_config/1, run_ast/1, diff_env/2]). +-ifdef(TEST). +-compile(export_all). +-endif. % TEST + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, { + flus = [] :: [atom()], + chains = [] :: list() + }). + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +process_pending() -> + gen_server:call(?SERVER, {process_pending}, infinity). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init([]) -> + self() ! finish_init, + {ok, #state{}}. + +handle_call({process_pending}, _From, State) -> + {Reply, NewState} = do_process_pending(State), + {reply, Reply, NewState}; +handle_call(_Request, _From, State) -> + Reply = 'whatwatwha????????????????????', + {reply, Reply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(finish_init, State) -> + {noreply, finish_init(State)}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +finish_init(S) -> + %% machi_flu_sup will start all FLUs that have a valid definition + %% file. That supervisor's structure + OTP supervisor behavior + %% guarantees that those FLUs should now be running. + %% (TODO: Unless they absolutely cannot keep running and the + %% supervisor has given up and terminated them.) + RunningFLUs = get_local_running_flus(), + RunningFLU_Epochs = get_latest_public_epochs(RunningFLUs), + RunningFLUs_at_zero = [FLU || {FLU, 0} <- RunningFLU_Epochs], + lager:info("Running FLUs: ~p\n", [RunningFLUs]), + lager:info("Running FLUs at epoch 0: ~p\n", [RunningFLUs_at_zero]), + ChainDefs = get_initial_chains(), + perhaps_bootstrap_chains(ChainDefs, RunningFLUs_at_zero, RunningFLUs), + S#state{flus=RunningFLUs, chains=ChainDefs}. + +get_local_running_flus() -> + [Name || {Name,_,_,_} <- supervisor:which_children(machi_flu_sup)]. + +get_latest_public_epochs(FLUs) -> + [begin + PS = machi_flu1:make_projection_server_regname(FLU), + {ok, {Epoch, _CSum}} = machi_projection_store:get_latest_epochid( + PS, public), + {FLU, Epoch} + end || FLU <- FLUs]. + +get_initial_chains() -> + ConfigDir = get_chain_config_dir(), + CDefs = [CDef || {_File, CDef} <- machi_flu_sup:load_rc_d_files_from_dir( + ConfigDir)], + sanitize_chain_def_records(CDefs). + +sanitize_chain_def_records(Ps) -> + {Sane, _} = lists:foldl(fun sanitize_chain_def_rec/2, {[], gb_trees:empty()}, Ps), + Sane. + +sanitize_chain_def_rec(Whole, {Acc, D}) -> + try + #chain_def_v1{name=Name, + mode=Mode, + full=Full, + witnesses=Witnesses} = Whole, + {true, ?LINE} = {is_atom(Name), ?LINE}, + NameK = {name, Name}, + {none, ?LINE} = {gb_trees:lookup(NameK, D), ?LINE}, + {true, ?LINE} = {(Mode == ap_mode orelse Mode == cp_mode), ?LINE}, + IsPSrvr = fun(X) when is_record(X, p_srvr) -> true; + (_) -> false + end, + {true, ?LINE} = {lists:all(IsPSrvr, Full), ?LINE}, + {true, ?LINE} = {lists:all(IsPSrvr, Witnesses), ?LINE}, + + %% All is sane enough. + D2 = gb_trees:enter(NameK, Name, D), + {[Whole|Acc], D2} + catch X:Y -> % badmatch will include ?LINE + lager:error("~s: Bad chain_def record (~w ~w), skipping: ~P\n", + [?MODULE, X, Y, Whole, 15]), + {Acc, D} + end. + +perhaps_bootstrap_chains([], LocalFLUs_at_zero, LocalFLUs) -> + if LocalFLUs == [] -> + ok; + LocalFLUs_at_zero == [] -> + ok; + true -> + lager:warning("The following FLUs are defined but are not also " + "members of a defined chain: ~w\n", + [LocalFLUs_at_zero]) + end, + ok; +perhaps_bootstrap_chains([CD|ChainDefs], LocalFLUs_at_zero, LocalFLUs) -> + #chain_def_v1{full=Full, witnesses=Witnesses} = CD, + AllNames = [Name || #p_srvr{name=Name} <- Full ++ Witnesses], + case ordsets:intersection(ordsets:from_list(AllNames), + ordsets:from_list(LocalFLUs_at_zero)) of + [] -> + perhaps_bootstrap_chains(ChainDefs, LocalFLUs_at_zero, LocalFLUs); + [FLU1|_]=FLUs -> + %% One FLU is enough: Humming Consensus will config the remaining + _ = bootstrap_chain(CD, FLU1), + perhaps_bootstrap_chains(ChainDefs, LocalFLUs_at_zero -- FLUs, + LocalFLUs -- FLUs) + end. + +bootstrap_chain(CD, FLU) -> + bootstrap_chain2(CD, FLU, 20). + +bootstrap_chain2(CD, FLU, 0) -> + lager:warning("Failed all attempts to bootstrap chain ~w via FLU ~w ", + [CD,FLU]), + failed; +bootstrap_chain2(#chain_def_v1{name=NewChainName, mode=NewCMode, + full=Full, witnesses=Witnesses, + old_full=ReqOldFull, + old_witnesses=ReqOldWitnesses, + props=_Props}=CD, + FLU, N) -> + All_p_srvrs = Witnesses ++ Full, + L = [{Name, P_srvr} || #p_srvr{name=Name}=P_srvr <- All_p_srvrs], + MembersDict = orddict:from_list(L), + NewAll_list = [Name || #p_srvr{name=Name} <- All_p_srvrs], + NewWitnesses_list = [Name || #p_srvr{name=Name} <- Witnesses], + + Mgr = machi_chain_manager1:make_chmgr_regname(FLU), + PStore = machi_flu1:make_projection_server_regname(FLU), + {ok, #projection_v1{epoch_number=OldEpoch, chain_name=OldChainName, + mode=OldCMode, + all_members=OldAll_list, witnesses=OldWitnesses}} = + machi_projection_store:read_latest_projection(PStore, private), + case set_chain_members(OldChainName, NewChainName, OldCMode, + ReqOldFull, ReqOldWitnesses, + OldAll_list, OldWitnesses, + NewAll_list, Witnesses, + Mgr, NewChainName, OldEpoch, + NewCMode, MembersDict, NewWitnesses_list) of + ok -> + lager:info("Configured chain ~w via FLU ~w to " + "mode=~w all=~w witnesses=~w\n", + [NewChainName, FLU, NewCMode, + NewAll_list, NewWitnesses_list]), + ok; + chain_bad_state=Else -> + lager:error("Attempt to bootstrap chain ~w via FLU ~w " + "failed (no retries): ~w (defn ~w)\n", + [NewChainName, FLU, Else, CD]), + Else; + Else -> + lager:error("Attempt ~w to bootstrap chain ~w via FLU ~w " + "failed: ~w (may retry with defn ~w)\n", + [N, NewChainName, FLU, Else, CD]), + bootstrap_chain2(CD, FLU, N-1) + end. + +set_chain_members(OldChainName, NewChainName, OldCMode, + ReqOldFull, ReqOldWitnesses, + OldFull_list, OldWitnesses, NewAll_list, NewWitnesses, + Mgr, ChainName, OldEpoch, NewCMode, MembersDict, _Props) -> + if OldChainName == NewChainName, OldCMode == NewCMode, + OldFull_list == NewAll_list, OldWitnesses == NewWitnesses -> + %% The chain's current definition at this FLU is already what we + %% want. Let's pretend that we sent the command and that it was + %% successful. + ok; + OldEpoch == 0 orelse (OldChainName == NewChainName andalso + OldCMode == NewCMode andalso + ReqOldFull == OldFull_list andalso + ReqOldWitnesses == OldWitnesses) -> + %% The old epoch is 0 (no chain defined) or else the prerequisites + %% for our chain change request are indeed matched by the FLU's + %% current private projection. + machi_chain_manager1:set_chain_members(Mgr, ChainName, OldEpoch, + NewCMode, + MembersDict, NewWitnesses); + true -> + chain_bad_state + end. + +do_process_pending(S) -> + PendingDir = get_pending_dir(), + PendingParsed = machi_flu_sup:load_rc_d_files_from_dir(PendingDir), + %% A pending file has exactly one record (#p_srvr{} or #chain_def_v1{}). + P_FLUs = [X || {_File, #p_srvr{}}=X <- PendingParsed], + P_Chains = [X || {_File, #chain_def_v1{}}=X <- PendingParsed], + BadFiles = [File || {File, []} <- PendingParsed], + S2 = process_pending_flus(P_FLUs, S), + S3 = process_pending_chains(P_Chains, S2), + S4 = process_bad_files(BadFiles, S3), + {{P_FLUs, P_Chains}, S4}. + +flu_config_exists(FLU) -> + ConfigDir = get_flu_config_dir(), + case file:read_file_info(ConfigDir ++ "/" ++ atom_to_list(FLU)) of + {ok, _} -> + true; + _ -> + false + end. + +get_pending_dir() -> + {ok, EtcDir} = application:get_env(machi, platform_etc_dir), + EtcDir ++ "/pending". + +get_rejected_dir() -> + {ok, EtcDir} = application:get_env(machi, platform_etc_dir), + EtcDir ++ "/rejected". + +get_flu_config_dir() -> + {ok, Dir} = application:get_env(machi, flu_config_dir), + Dir. + +get_flu_data_dir() -> + {ok, Dir} = application:get_env(machi, flu_data_dir), + Dir. + +get_chain_config_dir() -> + {ok, Dir} = application:get_env(machi, chain_config_dir), + Dir. + +get_data_dir() -> + {ok, Dir} = application:get_env(machi, platform_data_dir), + Dir. + +get_preserve_dir() -> + get_data_dir() ++ "/^PRESERVE". + +get_quick_admin_dir() -> + {ok, EtcDir} = application:get_env(machi, platform_etc_dir), + EtcDir ++ "/quick-admin-archive". + +process_pending_flus(P_FLUs, S) -> + lists:foldl(fun process_pending_flu/2, S, P_FLUs). + +process_pending_flu({File, P}, S) -> + #p_srvr{name=FLU} = P, + CurrentPs = machi_flu_sup:get_initial_flus(), + Valid_Ps = machi_flu_sup:sanitize_p_srvr_records(CurrentPs ++ [P]), + case lists:member(P, Valid_Ps) + andalso + (not lists:keymember(FLU, #p_srvr.name, CurrentPs)) of + false -> + lager:error("Pending FLU config file ~s has been rejected\n", + [File]), + _ = move_to_rejected(File, S), + S; + true -> + try + {ok, SupPid} = machi_flu_psup:start_flu_package(P), + lager:info("Started FLU ~w with supervisor pid ~p\n", + [FLU, SupPid]), + _ = move_to_flu_config(FLU, File, S), + S + catch error:Else -> + lager:error("Start FLU ~w failed: ~p\n", [FLU, Else]), + _ = move_to_rejected(File, S), + S + end + end. + +process_pending_chains(P_Chains, S) -> + lists:foldl(fun process_pending_chain/2, S, P_Chains). + +process_pending_chain({File, CD}, S) -> + #chain_def_v1{name=Name, + local_stop=LocalStopFLUs, local_run=LocalRunFLUs} = CD, + case sanitize_chain_def_records([CD]) of + [CD] -> + case LocalRunFLUs of + [] -> + case LocalStopFLUs of + [] -> + lager:error("Pending chain config file ~s has no " + "FLUs on this machine, rejected\n", + [File]), + _ = move_to_rejected(File, S), + S; + [_|_] -> + lager:info("Pending chain config file ~s stops " + "all local members of chain ~w: ~w\n", + [File, Name, LocalStopFLUs]), + process_pending_chain2(File, CD, LocalStopFLUs, + delete, S) + end; + [FLU|_] -> + %% TODO: Between the successful chain change inside of + %% bootstrap_chain() (and before it returns to us!) and + %% the return of process_pending_chain2(), we have a race + %% window if this process crashes. (Review again!) + case bootstrap_chain(CD, FLU) of + ok -> + process_pending_chain2(File, CD, LocalStopFLUs, + move, S); + Else -> + lager:error("Pending chain config file ~s " + "has failed (~w), rejected\n", + [Else, File]), + _ = move_to_rejected(File, S), + S + end + end; + [] -> + lager:error("Pending chain config file ~s has been rejected\n", + [File]), + _ = move_to_rejected(File, S), + S + end. + +process_pending_chain2(File, CD, RemovedFLUs, ChainConfigAction, S) -> + LocalRemovedFLUs = [FLU || FLU <- RemovedFLUs, + flu_config_exists(FLU)], + case LocalRemovedFLUs of + [] -> + ok; + [_|_] -> + %% %% Sleep for a little bit to allow HC to settle. + %% timer:sleep(1000), + [begin + %% We may be retrying this, so be liberal with any pattern + %% matching on return values. + _ = machi_flu_psup:stop_flu_package(FLU), + ConfigDir = get_flu_config_dir(), + FluDataDir = get_flu_data_dir(), + PreserveDir = get_preserve_dir(), + Suffix = make_ts_suffix(), + FLU_str = atom_to_list(FLU), + MyPreserveDir = PreserveDir ++ "/" ++ FLU_str ++ "." ++ Suffix, + ok = filelib:ensure_dir(MyPreserveDir ++ "/unused"), + _ = file:make_dir(MyPreserveDir), + Src1 = ConfigDir ++ "/" ++ FLU_str, + Dst1 = MyPreserveDir ++ "/" ++ FLU_str ++ ".config", + lager:info("Stopped FLU ~w: rename ~s ~s\n", + [FLU, Src1, Dst1]), + _ = file:rename(Src1, Dst1), + Src2 = FluDataDir ++ "/" ++ FLU_str, + Dst2 = MyPreserveDir ++ "/" ++ FLU_str ++ ".data", + lager:info("Stopped FLU ~w: rename ~s ~s\n", + [FLU, Src2, Dst2]), + %% TODO: If EXDEV, then we should rename to + %% another dir on same device, but ... where? + _ = file:rename(Src2, Dst2), + ok + end || FLU <- LocalRemovedFLUs], + ok + end, + #chain_def_v1{name=Name} = CD, + if ChainConfigAction == move -> + _ = move_to_chain_config(Name, File, S); + ChainConfigAction == delete -> + _ = delete_chain_config(Name, File, S) + end, + S. + +process_bad_files(Files, S) -> + lists:foldl(fun move_to_rejected/2, S, Files). + +move_to_rejected(File, S) -> + lager:error("Pending unknown config file ~s has been rejected\n", [File]), + Dst = get_rejected_dir(), + Suffix = make_ts_suffix(), + ok = file:rename(File, Dst ++ "/" ++ filename:basename(File) ++ Suffix), + S. + +make_ts_suffix() -> + str("~w,~w,~w", tuple_to_list(os:timestamp())). + +move_to_flu_config(FLU, File, S) -> + lager:info("Creating FLU config file ~w\n", [FLU]), + Dst = get_flu_config_dir(), + ok = file:rename(File, Dst ++ "/" ++ atom_to_list(FLU)), + S. + +move_to_chain_config(Name, File, S) -> + lager:info("Creating chain config file ~w\n", [Name]), + Dst = get_chain_config_dir(), + ok = file:rename(File, Dst ++ "/" ++ atom_to_list(Name)), + S. + +delete_chain_config(Name, File, S) -> + lager:info("Deleting chain config file ~s for chain ~w\n", [File, Name]), + Dst = get_chain_config_dir(), + ok = file:delete(Dst ++ "/" ++ atom_to_list(Name)), + S. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_ast_tuple_syntax(Ts) -> + lists:partition(fun check_an_ast_tuple/1, Ts). + +check_an_ast_tuple({host, Name, Props}) -> + is_stringy(Name) andalso is_proplisty(Props) andalso + lists:all(fun({admin_interface, X}) -> is_stringy(X); + ({client_interface, X}) -> is_stringy(X); + (_) -> false + end, Props); +check_an_ast_tuple({host, Name, AdminI, ClientI, Props}) -> + is_stringy(Name) andalso + is_stringy(AdminI) andalso is_stringy(ClientI) andalso + is_proplisty(Props) andalso + lists:all(fun({admin_interface, X}) -> is_stringy(X); + ({client_interface, X}) -> is_stringy(X); + (_) -> false + end, Props); +check_an_ast_tuple({flu, Name, HostName, Port, Props}) -> + is_atom(Name) andalso is_stringy(HostName) andalso + is_porty(Port) andalso is_proplisty(Props); +check_an_ast_tuple({chain, Name, FullList, Props}) -> + is_atom(Name) andalso + lists:all(fun erlang:is_atom/1, FullList) andalso + is_proplisty(Props); +check_an_ast_tuple({chain, Name, CMode, FullList, Witnesses, Props}) -> + is_atom(Name) andalso + (CMode == ap_mode orelse CMode == cp_mode) andalso + lists:all(fun erlang:is_atom/1, FullList) andalso + lists:all(fun erlang:is_atom/1, Witnesses) andalso + is_proplisty(Props); +check_an_ast_tuple(switch_old_and_new) -> + true; +check_an_ast_tuple(_) -> + false. + +%% Prerequisite: all tuples are approved by check_ast_tuple_syntax(). + +normalize_ast_tuple_syntax(Ts) -> + lists:map(fun normalize_an_ast_tuple/1, Ts). + +normalize_an_ast_tuple({host, Name, Props}) -> + AdminI = proplists:get_value(admin_interface, Props, Name), + ClientI = proplists:get_value(client_interface, Props, Name), + Props2 = lists:keydelete(admin_interface, 1, + lists:keydelete(client_interface, 1, Props)), + {host, Name, AdminI, ClientI, n(Props2)}; +normalize_an_ast_tuple({host, Name, AdminI, ClientI, Props}) -> + Props2 = lists:keydelete(admin_interface, 1, + lists:keydelete(client_interface, 1, Props)), + {host, Name, AdminI, ClientI, n(Props2)}; +normalize_an_ast_tuple({flu, Name, HostName, Port, Props}) -> + {flu, Name, HostName, Port, n(Props)}; +normalize_an_ast_tuple({chain, Name, FullList, Props}) -> + {chain, Name, ap_mode, n(FullList), [], n(Props)}; +normalize_an_ast_tuple({chain, Name, CMode, FullList, Witnesses, Props}) -> + {chain, Name, CMode, n(FullList), n(Witnesses), n(Props)}; +normalize_an_ast_tuple(A=switch_old_and_new) -> + A. + +run_ast(Ts) -> + case check_ast_tuple_syntax(Ts) of + {_, []} -> + Ts2 = normalize_ast_tuple_syntax(Ts), + Env1 = make_ast_run_env(), + try + Env2 = lists:foldl(fun run_ast_cmd/2, Env1, Ts2), + {ok, Env2} + catch throw:DbgStuff -> + {error, DbgStuff} + end; + {_, Else} -> + {error, {bad_syntax, Else}} + end. + +%% Legend for env key naming scheme +%% +%% {kv, X} +%% Mutable: no. +%% Reference KV store for X. Variations of X are: +%% {host, Name} | {flu, Name} | {chain, Name} +%% Value is a {host,...} or {flu,...}, or {chain,...} AST tuple. +%% +%% {p_srvr, Name} +%% #p_srvr{} record for FLU Name, for cache/convenience purposes. +%% If a FLU has been defined via {kv,_}, this key must also exist. +%% +%% +%% {tmp, X} +%% Mutable: yes. +%% Tmp scratch for X. Variations of X are: +%% {flu_assigned_to, ChainName} +%% If a FLU is currently assigned to a chain, map to ChainName. +%% If a FLU is not currently assigned to a chain, key does not exist. + +run_ast_cmd({host, Name, _AdminI, _ClientI, _Props}=T, E) -> + Key = {kv,{host,Name}}, + case d_find(Key, E) of + error -> + d_store(Key, T, E); + {ok, _} -> + err("Duplicate host definition ~p", [Name], T) + end; +run_ast_cmd({flu, Name, HostName, Port, Props}=T, E) -> + Key = {kv,{flu,Name}}, + Key_p = {kv,{p_srvr,Name}}, + HostExists_p = env_host_exists(HostName, E), + case d_find(Key, E) of + error when HostExists_p -> + case host_port_is_assigned(HostName, Port, E) of + false -> + {ok, ClientI} = get_host_client_interface(HostName, E), + Mod = proplists:get_value( + proto_mod, Props, 'machi_flu1_client'), + Val_p = #p_srvr{name=Name, proto_mod=Mod, + address=ClientI, port=Port, props=Props}, + d_store(Key, T, + d_store(Key_p, Val_p, E)); + {true, UsedBy} -> + err("Host ~p port ~p already in use by FLU ~p", + [HostName, Port, UsedBy], T) + end; + error -> + err("Unknown host ~p", [HostName], T); + {ok, _} -> + err("Duplicate flu ~p", [Name], T) + end; +run_ast_cmd({chain, Name, CMode, FullList, Witnesses, _Props}=T, E) -> + Key = {kv,{chain,Name}}, + AllFLUs = FullList ++ Witnesses, + + %% All FLUs must exist. + case lists:sort(AllFLUs) == lists:usort(AllFLUs) of + true -> ok; + false -> err("Duplicate FLU(s) specified", [], T) + end, + MissingFLUs = [FLU || FLU <- AllFLUs, not env_flu_exists(FLU, E)], + case MissingFLUs of + [] -> ok; + [_|_] -> err("Undefined FLU(s) ~p", [MissingFLUs], T) + end, + + %% All FLUs must not be assigned to another chain. + AssignedFLUs = + [case d_find({tmp,{flu_assigned_to,FLU}}, E) of + error -> + []; + {ok, Ch} when Ch == Name -> + []; % It's assigned to me already + {ok, Ch} -> + [{flu, FLU, assigned_to_chain, Ch}] + end || FLU <- AllFLUs], + case lists:append(AssignedFLUs) of + [] -> ok; + As -> err("Some FLUs are assigned to other chains: ~p\n", [As], T) + end, + + %% If chain already exists, then the consistency mode cannot change. + case d_find(Key, E) of + error -> + ok; + {ok, C_old} -> + {chain, _, OldCMode, _, _, _} = C_old, + if CMode == OldCMode -> + ok; + true -> + err("Consistency mode change ~w -> ~w is not permitted\n", + [OldCMode, CMode], T) + end + end, + + E2 = lists:foldl(fun(FLU, Env) -> + d_erase({tmp,{flu_assigned_to,FLU}}, Env) + end, E, AllFLUs), + E3 = lists:foldl(fun(FLU, Env) -> + d_store({tmp,{flu_assigned_to,FLU}}, Name, Env) + end, E2, AllFLUs), + + %% It's good, let's roll. + d_store(Key, T, E3); +run_ast_cmd(switch_old_and_new, E) -> + switch_env_dict(E); +run_ast_cmd(Unknown, _E) -> + err("Unknown AST thingie", [], Unknown). + +make_ast_run_env() -> + {_KV_old=gb_trees:empty(), _KV_new=gb_trees:empty(), _IsNew=false}. + +env_host_exists(Name, E) -> + Key = {kv,{host,Name}}, + case d_find(Key, E) of + error -> + false; + {ok, _} -> + true + end. + +env_flu_exists(Name, E) -> + Key = {kv,{flu,Name}}, + case d_find(Key, E) of + error -> + false; + {ok, _} -> + true + end. + +get_host_client_interface(HostName, E) -> + Key = {kv,{host,HostName}}, + case d_find(Key, E) of + error -> + false; + {ok, {host, _Name, _AdminI, ClientI, _Props}} -> + {ok, ClientI} + end. + +host_port_is_assigned(HostName, Port, {KV_old, KV_new, _}) -> + L = gb_trees:to_list(KV_old) ++ gb_trees:to_list(KV_new), + FLU_Ts = [V || {{kv,{flu,_}}, V} <- L], + case [V || {flu, _Nm, Host_, Port_, _Ps}=V <- FLU_Ts, + Host_ == HostName, Port_ == Port] of + [{flu, Name, _Host, _Port, _Ps}] -> + {true, Name}; + [] -> + false + end. + +d_find(Key, {KV_old, KV_new, IsNew}) -> + %% Bah, use 'dict' return value convention. + case gb_trees:lookup(Key, KV_new) of + {value, Val} when IsNew -> + {ok, Val}; + _ -> + case gb_trees:lookup(Key, KV_old) of + {value, Val} -> + {ok, Val}; + _ -> + error + end + end. + +d_get(Key, {KV_old, KV_new, IsNew}) -> + %% Bah, use 'dict' return value convention. + %% Assume key exists, fail if not found. + case gb_trees:lookup(Key, KV_new) of + {value, Val} when IsNew -> + Val; + _ -> + case gb_trees:lookup(Key, KV_old) of + {value, Val} -> + Val + end + end. + +d_store(Key, Val, {KV_old, KV_new, false}) -> + {gb_trees:enter(Key, Val, KV_old), KV_new, false}; +d_store(Key, Val, {KV_old, KV_new, true}) -> + {KV_old, gb_trees:enter(Key, Val, KV_new), true}. + +d_erase({tmp,_}=Key, {KV_old, KV_new, IsNew}) -> + {gb_trees:delete_any(Key, KV_old), gb_trees:delete_any(Key, KV_new), IsNew}. + +switch_env_dict({KV_old, KV_new, false}) -> + {KV_old, KV_new, true}; +switch_env_dict({_, _, true}) -> + A = switch_old_and_new, + err("Duplicate ~p", [A], A). + +n(L) -> + lists:sort(L). + +err(Fmt, Args, AST) -> + throw({str(Fmt, Args), AST}). + +str(Fmt, Args) -> + lists:flatten(io_lib:format(Fmt, Args)). + +%% We won't allow 'atom' style proplist members: too difficult to normalize. +%% Also, no duplicates, again because normalizing useful for checksums but +%% bad for order-based traditional proplists (first key wins). + +is_proplisty(Props) -> + is_list(Props) andalso + lists:all(fun({_,_}) -> true; + %% nope: (X) when is_atom(X) -> true; + (_) -> false + end, Props) andalso + begin + Ks = [K || {K,_V} <- Props], + lists:sort(Ks) == lists:usort(Ks) + end. + +is_stringy(L) -> + is_list(L) andalso + lists:all(fun(C) when 33 =< C, C =< 126 -> true; + (_) -> false + end, L). + +is_porty(Port) -> + is_integer(Port) andalso 1024 =< Port andalso Port =< 65535. + +diff_env({KV_old, KV_new, _IsNew}=E, RelativeHost) -> + New_list = gb_trees:to_list(KV_new), + put(final, []), + Add = fun(X) -> put(final, [X|get(final)]) end, + + %% Find all new FLUs and define them. + [begin + {flu, Name, Host, _Port, _Ps} = V, + if Host == RelativeHost orelse RelativeHost == all -> + {ok, P_srvr} = d_find({kv,{p_srvr,Name}}, E), + Add(P_srvr), + ok; + true -> + ok + end + end || {{kv,{flu,Name}}, V} <- New_list], + + %% Find new chains on this host and define them. + %% Find modified chains on this host and re-define them. + [begin + {chain, Name, CMode, FullList, Witnesses, Props} = V, + FLUsF = [d_get({kv,{flu,FLU}}, E) || FLU <- FullList], + FLUsW = [d_get({kv,{flu,FLU}}, E) || FLU <- Witnesses], + TheFLU_Hosts = + [{FLU, Host} || {flu, FLU, Host, _Port, _Ps} <- FLUsF ++ FLUsW], + case (lists:keymember(RelativeHost, 2, TheFLU_Hosts) + orelse RelativeHost == all) of + true -> + Ps_F = [d_get({kv,{p_srvr,FLU}}, E) || FLU <- FullList], + Ps_W = [d_get({kv,{p_srvr,FLU}}, E) || FLU <- Witnesses], + + case gb_trees:lookup({kv,{chain,Name}}, KV_old) of + {value, OldT} -> + {chain, _, _, OldFull_ss, OldWitnesses_ss, _} = OldT, + OldFull = [Str || Str <- OldFull_ss], + OldWitnesses = [Str || Str <- OldWitnesses_ss]; + none -> + OldFull = [], + OldWitnesses = [] + end, + Run = [FLU || {FLU, Hst} <- TheFLU_Hosts, + Hst == RelativeHost + orelse RelativeHost == all, + not lists:member(FLU, + OldFull++OldWitnesses)], + %% Gaaah, need to find the host for FLUs not present + %% in FLUsF ++ FLUsW. + OldFLUsF = [d_get({kv,{flu,FLU}}, E) || FLU <- OldFull], + OldFLUsW = [d_get({kv,{flu,FLU}}, E) || FLU <- OldWitnesses], + OldTheFLU_Hosts = + [{FLU, Host} || {flu, FLU, Host, _Port, _Ps} <- OldFLUsF ++ OldFLUsW], + %% Yay, now we have the info we need for local FLU Stop list. + Stop = [FLU || FLU <- OldFull++OldWitnesses, + not (lists:member(FLU, FullList) + orelse + lists:member(FLU, Witnesses)), + lists:member({FLU, RelativeHost}, OldTheFLU_Hosts) + orelse RelativeHost == all], + PropsExtra = [], + %% PropsExtra = [{auto_gen,true}], + Add(#chain_def_v1{name=Name, + mode=CMode, full=Ps_F, witnesses=Ps_W, + old_full=OldFull, old_witnesses=OldWitnesses, + local_run=Run, local_stop=Stop, + props=Props ++ PropsExtra}), + ok; + false -> + ok + end + end || {{kv,{chain,Name}}, V} <- New_list], + + {x, lists:reverse(get(final))}. + +make_pending_config(Term) -> + Dir = get_pending_dir(), + Blob = io_lib:format("~p.\n", [Term]), + {A,B,C} = os:timestamp(), + Path = str("~s/~w.~6..0w",[Dir, A*1000000+B, C]), + ok = file:write_file(Path, Blob). + +%% @doc Check a "quick admin" directory's for sanity +%% +%% This is a quick admin tool, though perhaps "tool" is too big of a word. +%% The meaning of "quick" is closer to "quick & dirty hack". Any +%% violation of the assumptions of these quick admin functions will result in +%% unspecified behavior, bugs, plagues, and perhaps lost data. +%% +%% Add files in this directory are assumed to have names of the same length +%% (e.g. 4 bytes), ASCII formatted numbers, greater than 0, left-padded with +%% "0". Each file is assumed to have zero or more AST tuples within, parsable +%% using {ok, ListOfAST_tuples} = file:consult(QuickAdminFile). +%% +%% The largest numbered file is assumed to be all of the AST changes that we +%% want to apply in a single batch. The AST tuples of all files with smaller +%% numbers will be concatenated together to create the prior history of +%% cluster-of-clusters. We assume that all transitions inside these earlier +%% files were actually safe & sane, therefore any sanity problem can only +%% be caused by the contents of the largest numbered file. + +quick_admin_sanity_check(File) -> + try + {ok, Env} = quick_admin_run_ast(File), + ok + catch X:Y -> + {error, {X,Y, erlang:get_stacktrace()}} + end. + +quick_admin_apply(File, HostName) -> + try + {ok, Env} = quick_admin_run_ast(File), + {_, Cs} = diff_env(Env, HostName), + [ok = make_pending_config(C) || C <- Cs], + {PassFLUs, PassChains} = machi_lifecycle_mgr:process_pending(), + case length(PassFLUs) + length(PassChains) of + N when N == length(Cs) -> + ok = quick_admin_add_archive_file(File), + ok; + _ -> + {error, {expected, length(Cs), Cs, got, PassFLUs, PassChains}} + end + catch X:Y -> + {error, {X,Y, erlang:get_stacktrace()}} + end. + + +quick_admin_parse_quick(F) -> + {ok, Terms} = file:consult(F), + Terms. + +quick_admin_run_ast(File) -> + Prevs = quick_admin_list_archive_files(), + PrevASTs = lists:append([quick_admin_parse_quick(F) || F <- Prevs]), + LatestASTs = quick_admin_parse_quick(File), + {ok, _Env} = run_ast(PrevASTs ++ [switch_old_and_new] ++ LatestASTs). + +quick_admin_list_archive_files() -> + Prevs0 = filelib:wildcard(get_quick_admin_dir() ++ "/*"), + [Fn || Fn <- Prevs0, base_fn_all_digits(Fn)]. + +base_fn_all_digits(Path) -> + Base = filename:basename(Path), + lists:all(fun is_ascii_digit/1, Base). + +is_ascii_digit(X) when $0 =< X, X =< $9 -> + true; +is_ascii_digit(_) -> + false. + +quick_admin_add_archive_file(File) -> + Prevs = quick_admin_list_archive_files(), + N = case [list_to_integer(filename:basename(Fn)) || Fn <- Prevs] of + [] -> 0; + Ns -> lists:max(Ns) + end, + Dir = get_quick_admin_dir(), + NewName = str("~s/~6..0w", [Dir, N + 1]), + {ok, Contents} = file:read_file(File), + ok = file:write_file(NewName, Contents). diff --git a/src/machi_listener_sup.erl b/src/machi_listener_sup.erl deleted file mode 100644 index f2362ad..0000000 --- a/src/machi_listener_sup.erl +++ /dev/null @@ -1,89 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% @doc A supervisor to hold ranch listener for sigle FLU. -%% It holds at most one child worker. - -%% TODO: This supervisor is maybe useless. First introduced for -%% workaround to start listener dynamically in flu1 initialization -%% time. Because psup is being blocked in flu1 initialization time, -%% adding a child to psup leads to deadlock. If initialization can be -%% done only by static arguments, then this supervisor should be -%% removed and added as a direct child of `machi_flu_psup'. - --module(machi_listener_sup). --behaviour(supervisor). - -%% public API --export([start_link/1, - start_listener/6, - stop_listener/1, - make_listener_sup_name/1, - make_listener_name/1]). - -%% supervisor callback --export([init/1]). - --include("machi_projection.hrl"). - --define(BACKLOG, 8192). - --spec start_link(pv1_server()) -> {ok, pid()}. -start_link(FluName) -> - supervisor:start_link({local, make_listener_sup_name(FluName)}, ?MODULE, []). - --spec start_listener(pv1_server(), inet:port_number(), boolean(), - string(), ets:tab(), atom() | pid()) -> {ok, pid()}. -start_listener(FluName, TcpPort, Witness, DataDir, EpochTab, ProjStore) -> - supervisor:start_child(make_listener_sup_name(FluName), - child_spec(FluName, TcpPort, Witness, DataDir, - EpochTab, ProjStore)). - --spec stop_listener(pv1_server()) -> ok. -stop_listener(FluName) -> - SupName = make_listener_sup_name(FluName), - ListenerName = make_listener_name(FluName), - ok = supervisor:terminate_child(SupName, ListenerName), - ok = supervisor:delete_child(SupName, ListenerName). - --spec make_listener_name(pv1_server()) -> atom(). -make_listener_sup_name(FluName) when is_atom(FluName) -> - list_to_atom(atom_to_list(FluName) ++ "_listener_sup"). - --spec make_listener_sup_name(pv1_server()) -> atom(). -make_listener_name(FluName) -> - list_to_atom(atom_to_list(FluName) ++ "_listener"). - -%% Supervisor callback - -init([]) -> - SupFlags = {one_for_one, 1000, 10}, - {ok, {SupFlags, []}}. - --spec child_spec(pv1_server(), inet:port_number(), boolean(), - string(), ets:tab(), atom() | pid()) -> supervisor:child_spec(). -child_spec(FluName, TcpPort, Witness, DataDir, EpochTab, ProjStore) -> - ListenerName = make_listener_name(FluName), - NbAcceptors = 100, - TcpOpts = [{port, TcpPort}, {backlog, ?BACKLOG}], - NetServerOpts = [FluName, Witness, DataDir, EpochTab, ProjStore], - ranch:child_spec(ListenerName, NbAcceptors, - ranch_tcp, TcpOpts, - machi_flu1_net_server, NetServerOpts). diff --git a/src/machi_pb_high_client.erl b/src/machi_pb_high_client.erl index c8149ad..8cccbfd 100644 --- a/src/machi_pb_high_client.erl +++ b/src/machi_pb_high_client.erl @@ -38,7 +38,7 @@ connected_p/1, echo/2, echo/3, auth/3, auth/4, - append_chunk/7, append_chunk/8, + append_chunk/6, append_chunk/7, write_chunk/5, write_chunk/6, read_chunk/5, read_chunk/6, trim_chunk/4, trim_chunk/5, @@ -58,7 +58,7 @@ count=0 :: non_neg_integer() }). -%% @doc official error types that is specific in Machi +%% Official error types that is specific in Machi -type machi_client_error_reason() :: bad_arg | wedged | bad_checksum | partition | not_written | written | trimmed | no_such_file | partial_read | @@ -96,21 +96,21 @@ auth(PidSpec, User, Pass) -> auth(PidSpec, User, Pass, Timeout) -> send_sync(PidSpec, {auth, User, Pass}, Timeout). --spec append_chunk(pid(), CoC_namespace::binary(), CoC_locator::integer(), Prefix::binary(), Chunk::binary(), +-spec append_chunk(pid(), PlacementKey::binary(), Prefix::binary(), Chunk::binary(), CSum::binary(), ChunkExtra::non_neg_integer()) -> {ok, Filename::string(), Offset::machi_dt:file_offset()} | {error, machi_client_error_reason()}. -append_chunk(PidSpec, CoC_namespace, CoC_locator, Prefix, Chunk, CSum, ChunkExtra) -> - append_chunk(PidSpec, CoC_namespace, CoC_locator, Prefix, Chunk, CSum, ChunkExtra, ?DEFAULT_TIMEOUT). +append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra) -> + append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra, ?DEFAULT_TIMEOUT). --spec append_chunk(pid(), CoC_namespace::binary(), CoC_locator::integer(), Prefix::binary(), +-spec append_chunk(pid(), PlacementKey::binary(), Prefix::binary(), Chunk::binary(), CSum::binary(), ChunkExtra::non_neg_integer(), Timeout::non_neg_integer()) -> {ok, Filename::string(), Offset::machi_dt:file_offset()} | {error, machi_client_error_reason()}. -append_chunk(PidSpec, CoC_namespace, CoC_locator, Prefix, Chunk, CSum, ChunkExtra, Timeout) -> - send_sync(PidSpec, {append_chunk, CoC_namespace, CoC_locator, Prefix, Chunk, CSum, ChunkExtra}, Timeout). +append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra, Timeout) -> + send_sync(PidSpec, {append_chunk, PlacementKey, Prefix, Chunk, CSum, ChunkExtra}, Timeout). -spec write_chunk(pid(), File::string(), machi_dt:file_offset(), Chunk::binary(), CSum::binary()) -> @@ -145,7 +145,7 @@ read_chunk(PidSpec, File, Offset, Size, Options, Timeout) -> send_sync(PidSpec, {read_chunk, File, Offset, Size, Options}, Timeout). %% @doc Trims arbitrary binary range of any file. If a specified range -%% has any byte trimmed, it fails and returns `{error, trimmed}`. +%% has any byte trimmed, it fails and returns `{error, trimmed}'. %% Otherwise it trims all bytes in that range. If there are %% overlapping chunks with client-specified checksum, they will cut %% off and checksum are re-calculated in server side. TODO: Add @@ -281,14 +281,15 @@ do_send_sync2({auth, User, Pass}, #state{sock=Sock}=S) -> Res = {bummer, {X, Y, erlang:get_stacktrace()}}, {Res, S} end; -do_send_sync2({append_chunk, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum, ChunkExtra}, +do_send_sync2({append_chunk, PlacementKey, Prefix, Chunk, CSum, ChunkExtra}, #state{sock=Sock, sock_id=Index, count=Count}=S) -> try ReqID = <>, + PK = if PlacementKey == <<>> -> undefined; + true -> PlacementKey + end, CSumT = convert_csum_req(CSum, Chunk), - Req = #mpb_appendchunkreq{coc_namespace=CoC_Namespace, - coc_locator=CoC_Locator, + Req = #mpb_appendchunkreq{placement_key=PK, prefix=Prefix, chunk=Chunk, csum=CSumT, diff --git a/src/machi_pb_translate.erl b/src/machi_pb_translate.erl index 4c5472c..cb8cef2 100644 --- a/src/machi_pb_translate.erl +++ b/src/machi_pb_translate.erl @@ -52,16 +52,14 @@ from_pb_request(#mpb_ll_request{ req_id=ReqID, append_chunk=#mpb_ll_appendchunkreq{ epoch_id=PB_EpochID, - coc_namespace=CoC_Namespace, - coc_locator=CoC_Locator, + placement_key=PKey, prefix=Prefix, chunk=Chunk, csum=#mpb_chunkcsum{type=CSum_type, csum=CSum}, chunk_extra=ChunkExtra}}) -> EpochID = conv_to_epoch_id(PB_EpochID), CSum_tag = conv_to_csum_tag(CSum_type), - {ReqID, {low_append_chunk, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, + {ReqID, {low_append_chunk, EpochID, PKey, Prefix, Chunk, CSum_tag, CSum, ChunkExtra}}; from_pb_request(#mpb_ll_request{ req_id=ReqID, @@ -172,15 +170,14 @@ from_pb_request(#mpb_request{req_id=ReqID, {ReqID, {high_auth, User, Pass}}; from_pb_request(#mpb_request{req_id=ReqID, append_chunk=IR=#mpb_appendchunkreq{}}) -> - #mpb_appendchunkreq{coc_namespace=CoC_namespace, - coc_locator=CoC_locator, + #mpb_appendchunkreq{placement_key=__todoPK, prefix=Prefix, chunk=Chunk, csum=CSum, chunk_extra=ChunkExtra} = IR, TaggedCSum = make_tagged_csum(CSum, Chunk), - {ReqID, {high_append_chunk, CoC_namespace, CoC_locator, Prefix, Chunk, - TaggedCSum, ChunkExtra}}; + {ReqID, {high_append_chunk, __todoPK, Prefix, Chunk, TaggedCSum, + ChunkExtra}}; from_pb_request(#mpb_request{req_id=ReqID, write_chunk=IR=#mpb_writechunkreq{}}) -> #mpb_writechunkreq{chunk=#mpb_chunk{file_name=File, @@ -391,16 +388,15 @@ to_pb_request(ReqID, {low_echo, _BogusEpochID, Msg}) -> to_pb_request(ReqID, {low_auth, _BogusEpochID, User, Pass}) -> #mpb_ll_request{req_id=ReqID, do_not_alter=2, auth=#mpb_authreq{user=User, password=Pass}}; -to_pb_request(ReqID, {low_append_chunk, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, CSum_tag, CSum, ChunkExtra}) -> +to_pb_request(ReqID, {low_append_chunk, EpochID, PKey, Prefix, Chunk, + CSum_tag, CSum, ChunkExtra}) -> PB_EpochID = conv_from_epoch_id(EpochID), CSum_type = conv_from_csum_tag(CSum_tag), PB_CSum = #mpb_chunkcsum{type=CSum_type, csum=CSum}, #mpb_ll_request{req_id=ReqID, do_not_alter=2, append_chunk=#mpb_ll_appendchunkreq{ epoch_id=PB_EpochID, - coc_namespace=CoC_Namespace, - coc_locator=CoC_Locator, + placement_key=PKey, prefix=Prefix, chunk=Chunk, csum=PB_CSum, @@ -504,7 +500,7 @@ to_pb_response(ReqID, {low_auth, _, _, _}, __TODO_Resp) -> #mpb_ll_response{req_id=ReqID, generic=#mpb_errorresp{code=1, msg="AUTH not implemented"}}; -to_pb_response(ReqID, {low_append_chunk, _EID, _N, _L, _Pfx, _Ch, _CST, _CS, _CE}, Resp)-> +to_pb_response(ReqID, {low_append_chunk, _EID, _PKey, _Pfx, _Ch, _CST, _CS, _CE}, Resp)-> case Resp of {ok, {Offset, Size, File}} -> Where = #mpb_chunkpos{offset=Offset, @@ -691,7 +687,7 @@ to_pb_response(ReqID, {high_auth, _User, _Pass}, _Resp) -> #mpb_response{req_id=ReqID, generic=#mpb_errorresp{code=1, msg="AUTH not implemented"}}; -to_pb_response(ReqID, {high_append_chunk, _CoC_n, _CoC_l, _Prefix, _Chunk, _TSum, _CE}, Resp)-> +to_pb_response(ReqID, {high_append_chunk, _TODO, _Prefix, _Chunk, _TSum, _CE}, Resp)-> case Resp of {ok, {Offset, Size, File}} -> Where = #mpb_chunkpos{offset=Offset, @@ -815,6 +811,7 @@ conv_to_epoch_id(#mpb_epochid{epoch_number=Epoch, conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch, epoch_csum=CSum, author_server=Author, + chain_name=ChainName, all_members=AllMembers, witnesses=Witnesses, creation_time=CTime, @@ -828,6 +825,7 @@ conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch, #projection_v1{epoch_number=Epoch, epoch_csum=CSum, author_server=to_atom(Author), + chain_name=to_atom(ChainName), all_members=[to_atom(X) || X <- AllMembers], witnesses=[to_atom(X) || X <- Witnesses], creation_time=conv_to_now(CTime), @@ -957,7 +955,7 @@ conv_from_status({error, partial_read}) -> conv_from_status({error, bad_epoch}) -> 'BAD_EPOCH'; conv_from_status(_OOPS) -> - io:format(user, "HEY, ~s:~w got ~w\n", [?MODULE, ?LINE, _OOPS]), + io:format(user, "HEY, ~s:~w got ~p\n", [?MODULE, ?LINE, _OOPS]), 'BAD_JOSS'. conv_to_boolean(undefined) -> @@ -975,6 +973,7 @@ conv_from_boolean(true) -> conv_from_projection_v1(#projection_v1{epoch_number=Epoch, epoch_csum=CSum, author_server=Author, + chain_name=ChainName, all_members=AllMembers, witnesses=Witnesses, creation_time=CTime, @@ -988,6 +987,7 @@ conv_from_projection_v1(#projection_v1{epoch_number=Epoch, #mpb_projectionv1{epoch_number=Epoch, epoch_csum=CSum, author_server=to_list(Author), + chain_name=to_list(ChainName), all_members=[to_list(X) || X <- AllMembers], witnesses=[to_list(X) || X <- Witnesses], creation_time=conv_from_now(CTime), diff --git a/src/machi_projection.erl b/src/machi_projection.erl index f6e7cbc..c977fd9 100644 --- a/src/machi_projection.erl +++ b/src/machi_projection.erl @@ -174,6 +174,7 @@ make_summary(#projection_v1{epoch_number=EpochNum, repairing=Repairing_list, dbg=Dbg, dbg2=Dbg2}) -> [{epoch,EpochNum}, {csum,_CSum4}, +{all, _All_list}, {author,Author}, {mode,CMode},{witnesses, Witness_list}, {upi,UPI_list},{repair,Repairing_list},{down,Down_list}] ++ [{d,Dbg}, {d2,Dbg2}]. diff --git a/src/machi_projection_store.erl b/src/machi_projection_store.erl index ba50544..d1d36d9 100644 --- a/src/machi_projection_store.erl +++ b/src/machi_projection_store.erl @@ -321,7 +321,7 @@ do_proj_write3(ProjType, #projection_v1{epoch_number=Epoch, end. do_proj_write4(ProjType, Proj, Path, Epoch, #state{consistency_mode=CMode}=S) -> - {ok, FH} = file:open(Path, [write, raw, binary]), + {{ok, FH}, Epoch, Path} = {file:open(Path, [write, raw, binary]), Epoch, Path}, ok = file:write(FH, term_to_binary(Proj)), ok = file:sync(FH), ok = file:close(FH), @@ -387,7 +387,6 @@ wait_for_liveness(PidSpec, StartTime, WaitTime) -> undefined -> case timer:now_diff(os:timestamp(), StartTime) div 1000 of X when X < WaitTime -> - io:format(user, "\nYOO ~p ~p\n", [PidSpec, lists:sort(registered())]), timer:sleep(1), wait_for_liveness(PidSpec, StartTime, WaitTime) end; diff --git a/src/machi_proxy_flu1_client.erl b/src/machi_proxy_flu1_client.erl index e4bc0d2..2cbaabd 100644 --- a/src/machi_proxy_flu1_client.erl +++ b/src/machi_proxy_flu1_client.erl @@ -58,9 +58,7 @@ -export([ %% File API append_chunk/4, append_chunk/5, - append_chunk/6, append_chunk/7, append_chunk_extra/5, append_chunk_extra/6, - append_chunk_extra/7, append_chunk_extra/8, read_chunk/6, read_chunk/7, checksum_list/3, checksum_list/4, list_files/2, list_files/3, @@ -113,51 +111,22 @@ append_chunk(PidSpec, EpochID, Prefix, Chunk) -> %% with `Prefix'. append_chunk(PidSpec, EpochID, Prefix, Chunk, Timeout) -> - append_chunk_extra(PidSpec, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, 0, Timeout). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - -append_chunk(PidSpec, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk) -> - append_chunk(PidSpec, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk, infinity). - -%% @doc Append a chunk (binary- or iolist-style) of data to a file -%% with `Prefix'. - -append_chunk(PidSpec, EpochID, CoC_Namespace, CoC_Locator, Prefix, Chunk, Timeout) -> - append_chunk_extra(PidSpec, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, 0, Timeout). + gen_server:call(PidSpec, {req, {append_chunk, EpochID, Prefix, Chunk}}, + Timeout). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix'. append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra) when is_integer(ChunkExtra), ChunkExtra >= 0 -> - append_chunk_extra(PidSpec, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, ChunkExtra, infinity). + append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra, infinity). %% @doc Append a chunk (binary- or iolist-style) of data to a file %% with `Prefix'. append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra, Timeout) -> - append_chunk_extra(PidSpec, EpochID, - ?DEFAULT_COC_NAMESPACE, ?DEFAULT_COC_LOCATOR, - Prefix, Chunk, ChunkExtra, Timeout). - -append_chunk_extra(PidSpec, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra) -> - append_chunk_extra(PidSpec, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, infinity). - -append_chunk_extra(PidSpec, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra, Timeout) -> - gen_server:call(PidSpec, {req, {append_chunk_extra, EpochID, - CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra}}, + gen_server:call(PidSpec, {req, {append_chunk_extra, EpochID, Prefix, + Chunk, ChunkExtra}}, Timeout). %% @doc Read a chunk of data of size `Size' from `File' at `Offset'. @@ -415,12 +384,12 @@ do_req_retry(_Req, 2, Err, S) -> do_req_retry(Req, Depth, _Err, S) -> do_req(Req, Depth + 1, try_connect(disconnect(S))). -make_req_fun({append_chunk_extra, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra}, +make_req_fun({append_chunk, EpochID, Prefix, Chunk}, #state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) -> - fun() -> Mod:append_chunk_extra(Sock, EpochID, CoC_Namespace, CoC_Locator, - Prefix, Chunk, ChunkExtra) - end; + fun() -> Mod:append_chunk(Sock, EpochID, Prefix, Chunk) end; +make_req_fun({append_chunk_extra, EpochID, Prefix, Chunk, ChunkExtra}, + #state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) -> + fun() -> Mod:append_chunk_extra(Sock, EpochID, Prefix, Chunk, ChunkExtra) end; make_req_fun({read_chunk, EpochID, File, Offset, Size, Opts}, #state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) -> fun() -> Mod:read_chunk(Sock, EpochID, File, Offset, Size, Opts) end; diff --git a/src/machi_sup.erl b/src/machi_sup.erl index 729b6a7..3e02578 100644 --- a/src/machi_sup.erl +++ b/src/machi_sup.erl @@ -47,6 +47,8 @@ start_link() -> supervisor:start_link({local, ?SERVER}, ?MODULE, []). init([]) -> + %% {_, Ps} = process_info(self(), links), + %% [unlink(P) || P <- Ps], RestartStrategy = one_for_one, MaxRestarts = 1000, MaxSecondsBetweenRestarts = 3600, @@ -57,8 +59,11 @@ init([]) -> Shutdown = ?SHUTDOWN, Type = supervisor, - FluSup = {machi_flu_sup, {machi_flu_sup, start_link, []}, - Restart, Shutdown, Type, []}, - RanchSup = {ranch_sup, {ranch_sup, start_link, []}, - Restart, Shutdown, supervisor, [ranch_sup]}, - {ok, {SupFlags, [FluSup, RanchSup]}}. + ServerSup = + {machi_flu_sup, {machi_flu_sup, start_link, []}, + Restart, Shutdown, Type, []}, + LifecycleMgr = + {machi_lifecycle_mgr, {machi_lifecycle_mgr, start_link, []}, + Restart, Shutdown, worker, []}, + + {ok, {SupFlags, [ServerSup, LifecycleMgr]}}. diff --git a/src/machi_util.erl b/src/machi_util.erl index aa5f070..8aa1972 100644 --- a/src/machi_util.erl +++ b/src/machi_util.erl @@ -30,13 +30,13 @@ hexstr_to_int/1, int_to_hexstr/2, int_to_hexbin/2, make_binary/1, make_string/1, make_regname/1, - make_config_filename/4, make_config_filename/2, + make_config_filename/2, make_checksum_filename/4, make_checksum_filename/2, - make_data_filename/6, make_data_filename/2, + make_data_filename/4, make_data_filename/2, make_projection_filename/2, is_valid_filename/1, parse_filename/1, - read_max_filenum/4, increment_max_filenum/4, + read_max_filenum/2, increment_max_filenum/2, info_msg/2, verb/1, verb/2, mbytes/1, pretty_time/0, pretty_time/2, @@ -68,20 +68,10 @@ make_regname(Prefix) when is_list(Prefix) -> %% @doc Calculate a config file path, by common convention. --spec make_config_filename(string(), machi_dt:coc_namespace(), machi_dt:coc_locator(), string()) -> - string(). -make_config_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix) -> - Locator_str = int_to_hexstr(CoC_Locator, 32), - lists:flatten(io_lib:format("~s/config/~s^~s^~s", - [DataDir, Prefix, CoC_Namespace, Locator_str])). - -%% @doc Calculate a config file path, by common convention. - -spec make_config_filename(string(), string()) -> string(). -make_config_filename(DataDir, Filename) -> - lists:flatten(io_lib:format("~s/config/~s", - [DataDir, Filename])). +make_config_filename(DataDir, Prefix) -> + lists:flatten(io_lib:format("~s/config/~s", [DataDir, Prefix])). %% @doc Calculate a checksum file path, by common convention. @@ -102,19 +92,17 @@ make_checksum_filename(DataDir, FileName) -> %% @doc Calculate a file data file path, by common convention. --spec make_data_filename(string(), machi_dt:coc_namespace(), machi_dt:coc_locator(), string(), atom()|string()|binary(), integer()|string()) -> +-spec make_data_filename(string(), string(), atom()|string()|binary(), integer()|string()) -> {binary(), string()}. -make_data_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix, SequencerName, FileNum) +make_data_filename(DataDir, Prefix, SequencerName, FileNum) when is_integer(FileNum) -> - Locator_str = int_to_hexstr(CoC_Locator, 32), - File = erlang:iolist_to_binary(io_lib:format("~s^~s^~s^~s^~w", - [Prefix, CoC_Namespace, Locator_str, SequencerName, FileNum])), + File = erlang:iolist_to_binary(io_lib:format("~s^~s^~w", + [Prefix, SequencerName, FileNum])), make_data_filename2(DataDir, File); -make_data_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix, SequencerName, String) +make_data_filename(DataDir, Prefix, SequencerName, String) when is_list(String) -> - Locator_str = int_to_hexstr(CoC_Locator, 32), - File = erlang:iolist_to_binary(io_lib:format("~s^~s^~s^~s^~s", - [Prefix, CoC_Namespace, Locator_str, SequencerName, string])), + File = erlang:iolist_to_binary(io_lib:format("~s^~s^~s", + [Prefix, SequencerName, string])), make_data_filename2(DataDir, File). make_data_filename2(DataDir, File) -> @@ -146,45 +134,34 @@ make_projection_filename(DataDir, File) -> -spec is_valid_filename( Filename :: string() ) -> true | false. is_valid_filename(Filename) -> case parse_filename(Filename) of - {} -> false; - {_,_,_,_,_} -> true + [] -> false; + _ -> true end. %% @doc Given a machi filename, return a set of components in a list. %% The components will be: %%
    %%
  • Prefix
  • -%%
  • CoC Namespace
  • -%%
  • CoC locator
  • %%
  • UUID
  • %%
  • Sequence number
  • %%
%% %% Invalid filenames will return an empty list. --spec parse_filename( Filename :: string() ) -> {} | {string(), machi_dt:coc_namespace(), machi_dt:coc_locator(), string(), string() }. +-spec parse_filename( Filename :: string() ) -> [ string() ]. parse_filename(Filename) -> case string:tokens(Filename, "^") of - [Prefix, CoC_NS, CoC_Loc, UUID, SeqNo] -> - {Prefix, CoC_NS, list_to_integer(CoC_Loc), UUID, SeqNo}; - [Prefix, CoC_Loc, UUID, SeqNo] -> - %% string:tokens() doesn't consider "foo^^bar" as 3 tokens {sigh} - case re:replace(Filename, "[^^]+", "x", [global,{return,binary}]) of - <<"x^^x^x^x">> -> - {Prefix, <<"">>, list_to_integer(CoC_Loc), UUID, SeqNo}; - _ -> - {} - end; - _ -> {} + [_Prefix, _UUID, _SeqNo] = L -> L; + _ -> [] end. %% @doc Read the file size of a config file, which is used as the %% basis for a minimum sequence number. --spec read_max_filenum(string(), machi_dt:coc_namespace(), machi_dt:coc_locator(), string()) -> +-spec read_max_filenum(string(), string()) -> non_neg_integer(). -read_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix) -> - case file:read_file_info(make_config_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix)) of +read_max_filenum(DataDir, Prefix) -> + case file:read_file_info(make_config_filename(DataDir, Prefix)) of {error, enoent} -> 0; {ok, FI} -> @@ -194,11 +171,11 @@ read_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix) -> %% @doc Increase the file size of a config file, which is used as the %% basis for a minimum sequence number. --spec increment_max_filenum(string(), machi_dt:coc_namespace(), machi_dt:coc_locator(), string()) -> +-spec increment_max_filenum(string(), string()) -> ok | {error, term()}. -increment_max_filenum(DataDir, CoC_Namespace, CoC_Locator, Prefix) -> +increment_max_filenum(DataDir, Prefix) -> try - {ok, FH} = file:open(make_config_filename(DataDir, CoC_Namespace, CoC_Locator, Prefix), [append]), + {ok, FH} = file:open(make_config_filename(DataDir, Prefix), [append]), ok = file:write(FH, "x"), ok = file:sync(FH), ok = file:close(FH) diff --git a/src/machi_yessir_client.erl b/src/machi_yessir_client.erl index 1bdef2a..f37b618 100644 --- a/src/machi_yessir_client.erl +++ b/src/machi_yessir_client.erl @@ -180,9 +180,9 @@ checksum_list(#yessir{name=Name,chunk_size=ChunkSize}, _EpochID, File) -> MaxOffset -> C = machi_util:make_tagged_csum(client_sha, make_csum(Name, ChunkSize)), - Cs = [{Offset, ChunkSize, C} || - Offset <- lists:seq(?MINIMUM_OFFSET, MaxOffset, ChunkSize)], - {ok, term_to_binary(Cs)} + Cs = [machi_csum_table:encode_csum_file_entry_bin(Offset, ChunkSize, C) || + Offset <- lists:seq(?MINIMUM_OFFSET, MaxOffset, ChunkSize)], + {ok, Cs} end. %% @doc Fetch the list of chunk checksums for `File'. diff --git a/test/machi_admin_util_test.erl b/test/machi_admin_util_test.erl index 1ebbbf3..54a75f4 100644 --- a/test/machi_admin_util_test.erl +++ b/test/machi_admin_util_test.erl @@ -33,57 +33,49 @@ -define(FLU_C, machi_flu1_client). verify_file_checksums_test_() -> - {setup, - fun() -> os:cmd("rm -rf ./data") end, - fun(_) -> os:cmd("rm -rf ./data") end, - {timeout, 60, fun() -> verify_file_checksums_test2() end} - }. + {timeout, 60, fun() -> verify_file_checksums_test2() end}. verify_file_checksums_test2() -> Host = "localhost", TcpPort = 32958, DataDir = "./data", W_props = [{initial_wedged, false}], - try - machi_test_util:start_flu_package(verify1_flu, TcpPort, DataDir, + machi_flu1_test:start_flu_package(verify1_flu, TcpPort, DataDir, W_props), - Sock1 = ?FLU_C:connect(#p_srvr{address=Host, port=TcpPort}), - try - Prefix = <<"verify_prefix">>, - NumChunks = 10, - [{ok, _} = ?FLU_C:append_chunk(Sock1, ?DUMMY_PV1_EPOCH, - Prefix, <>) || - X <- lists:seq(1, NumChunks)], - {ok, [{_FileSize,File}]} = ?FLU_C:list_files(Sock1, ?DUMMY_PV1_EPOCH), - ?assertEqual({ok, []}, - machi_admin_util:verify_file_checksums_remote( - Host, TcpPort, ?DUMMY_PV1_EPOCH, File)), + Sock1 = ?FLU_C:connect(#p_srvr{address=Host, port=TcpPort}), + try + Prefix = <<"verify_prefix">>, + NumChunks = 10, + [{ok, _} = ?FLU_C:append_chunk(Sock1, ?DUMMY_PV1_EPOCH, + Prefix, <>) || + X <- lists:seq(1, NumChunks)], + {ok, [{_FileSize,File}]} = ?FLU_C:list_files(Sock1, ?DUMMY_PV1_EPOCH), + {ok, []} = machi_admin_util:verify_file_checksums_remote( + Host, TcpPort, ?DUMMY_PV1_EPOCH, File), - %% Clobber the first 3 chunks, which are sizes 1/2/3. - {_, Path} = machi_util:make_data_filename(DataDir,binary_to_list(File)), - {ok, FH} = file:open(Path, [read,write]), - {ok, _} = file:position(FH, ?MINIMUM_OFFSET), - ok = file:write(FH, "y"), - ok = file:write(FH, "yo"), - ok = file:write(FH, "yo!"), - ok = file:close(FH), + %% Clobber the first 3 chunks, which are sizes 1/2/3. + {_, Path} = machi_util:make_data_filename(DataDir,binary_to_list(File)), + {ok, FH} = file:open(Path, [read,write]), + {ok, _} = file:position(FH, ?MINIMUM_OFFSET), + ok = file:write(FH, "y"), + ok = file:write(FH, "yo"), + ok = file:write(FH, "yo!"), + ok = file:close(FH), - %% Check the local flavor of the API: should be 3 bad checksums - {ok, Res1} = machi_admin_util:verify_file_checksums_local( - Host, TcpPort, ?DUMMY_PV1_EPOCH, Path), - 3 = length(Res1), + %% Check the local flavor of the API: should be 3 bad checksums + {ok, Res1} = machi_admin_util:verify_file_checksums_local( + Host, TcpPort, ?DUMMY_PV1_EPOCH, Path), + 3 = length(Res1), - %% Check the remote flavor of the API: should be 3 bad checksums - {ok, Res2} = machi_admin_util:verify_file_checksums_remote( - Host, TcpPort, ?DUMMY_PV1_EPOCH, File), - 3 = length(Res2), + %% Check the remote flavor of the API: should be 3 bad checksums + {ok, Res2} = machi_admin_util:verify_file_checksums_remote( + Host, TcpPort, ?DUMMY_PV1_EPOCH, File), + 3 = length(Res2), - ok - after - catch ?FLU_C:quit(Sock1) - end + ok after - catch machi_test_util:stop_flu_package() + catch ?FLU_C:quit(Sock1), + catch machi_flu1_test:stop_flu_package(verify1_flu) end. -endif. % !PULSE diff --git a/test/machi_ap_repair_eqc.erl b/test/machi_ap_repair_eqc.erl index 23e0e93..0c9f349 100644 --- a/test/machi_ap_repair_eqc.erl +++ b/test/machi_ap_repair_eqc.erl @@ -342,7 +342,7 @@ setup_target(Num, Seed, Verbose) -> setup_chain(Seed, AllListE, FLUNames, MgrNames, Dict) -> ok = shutdown_hard(), [begin - machi_test_util:clean_up_dir(Dir), + machi_flu1_test:clean_up_data_dir(Dir), filelib:ensure_dir(Dir ++ "/not-used") end || {_P, Dir} <- AllListE], [catch ets:delete(T) || T <- tabs()], @@ -407,8 +407,8 @@ stabilize(0, _T) -> stabilize(_CmdsLen, #target{flu_names=FLUNames, mgr_names=MgrNames, verbose=Verbose}) -> machi_partition_simulator:no_partitions(), - wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames, - 100, Verbose), + true = wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames, + 100, Verbose), ok. chain_state_all_ok(FLUNames) -> diff --git a/test/machi_chain_manager1_converge_demo.erl b/test/machi_chain_manager1_converge_demo.erl index 9303701..cee7a78 100644 --- a/test/machi_chain_manager1_converge_demo.erl +++ b/test/machi_chain_manager1_converge_demo.erl @@ -187,15 +187,18 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> end || #p_srvr{name=Name}=P <- Ps], MembersDict = machi_projection:make_members_dict(Ps), Witnesses = proplists:get_value(witnesses, MgrOpts, []), + CMode = case {Witnesses, proplists:get_value(consistency_mode, MgrOpts, + ap_mode)} of + {[_|_], _} -> cp_mode; + {_, cp_mode} -> cp_mode; + {_, ap_mode} -> ap_mode + end, MgrNamez = [begin MgrName = machi_flu_psup:make_mgr_supname(Name), - ok = ?MGR:set_chain_members(MgrName,MembersDict,Witnesses), + ok = ?MGR:set_chain_members(MgrName, ch_demo, 0, CMode, + MembersDict,Witnesses), {Name, MgrName} end || #p_srvr{name=Name} <- Ps], - CpApMode = case Witnesses /= [] of - true -> cp_mode; - false -> ap_mode - end, try [{_, Ma}|_] = MgrNamez, @@ -303,9 +306,9 @@ convergence_demo_testfun(NumFLUs, MgrOpts0) -> [{FLU, true} = {FLU, ?MGR:projection_transitions_are_sane_retrospective(Psx, FLU)} || {FLU, Psx} <- PrivProjs] catch - _Err:_What when CpApMode == cp_mode -> + _Err:_What when CMode == cp_mode -> io:format(user, "none proj skip detected, TODO? ", []); - _Err:_What when CpApMode == ap_mode -> + _Err:_What when CMode == ap_mode -> io:format(user, "PrivProjs ~p\n", [PrivProjs]), exit({line, ?LINE, _Err, _What}) end, @@ -371,9 +374,9 @@ timer:sleep(1234), {FLU, Psx} <- PrivProjs], io:format(user, "\nAll sanity checks pass, hooray!\n", []) catch - _Err:_What when CpApMode == cp_mode -> + _Err:_What when CMode == cp_mode -> io:format(user, "none proj skip detected, TODO? ", []); - _Err:_What when CpApMode == ap_mode -> + _Err:_What when CMode == ap_mode -> io:format(user, "Report ~p\n", [Report]), io:format(user, "PrivProjs ~p\n", [PrivProjs]), exit({line, ?LINE, _Err, _What}) diff --git a/test/machi_chain_manager1_test.erl b/test/machi_chain_manager1_test.erl index 99ecb6a..094443b 100644 --- a/test/machi_chain_manager1_test.erl +++ b/test/machi_chain_manager1_test.erl @@ -273,32 +273,54 @@ make_prop_ets() -> -endif. % EQC +make_advance_fun(FitList, FLUList, MgrList, Num) -> + fun() -> + [begin + [catch machi_fitness:trigger_early_adjustment(Fit, Tgt) || + Fit <- FitList, + Tgt <- FLUList ], + [catch ?MGR:trigger_react_to_env(Mgr) || Mgr <- MgrList], + ok + end || _ <- lists:seq(1, Num)] + end. + smoke0_test() -> + {ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50), + Host = "localhost", TcpPort = 6623, - {[Pa], [M0], _Dirs} = machi_test_util:start_flu_packages( - 1, TcpPort, "./data.", []), + {ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]), + Pa = #p_srvr{name=a, address=Host, port=TcpPort}, + Members_Dict = machi_projection:make_members_dict([Pa]), + %% Egadz, more racing on startup, yay. TODO fix. + timer:sleep(1), {ok, FLUaP} = ?FLU_PC:start_link(Pa), + {ok, M0} = ?MGR:start_link(a, Members_Dict, [{active_mode, false}]), try pong = ?MGR:ping(M0) after + ok = ?MGR:stop(M0), + ok = machi_flu1:stop(FLUa), ok = ?FLU_PC:quit(FLUaP), - machi_test_util:stop_flu_packages() + ok = machi_partition_simulator:stop() end. smoke1_test_() -> {timeout, 1*60, fun() -> smoke1_test2() end}. smoke1_test2() -> + machi_partition_simulator:start_link({1,2,3}, 100, 0), TcpPort = 62777, - MgrOpts = [{active_mode,false}], - try - {Ps, MgrNames, _Dirs} = machi_test_util:start_flu_packages( - 3, TcpPort, "./data.", MgrOpts), - MembersDict = machi_projection:make_members_dict(Ps), - [machi_chain_manager1:set_chain_members(M, MembersDict) || M <- MgrNames], - Ma = hd(MgrNames), + FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}, {c,TcpPort+2,"./data.c"}], + P_s = [#p_srvr{name=Name, address="localhost", port=Port} || + {Name,Port,_Dir} <- FluInfo], - {ok, P1} = ?MGR:test_calc_projection(Ma, false), + [machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo], + FLUs = [element(2, machi_flu1:start_link([{Name,Port,Dir}])) || + {Name,Port,Dir} <- FluInfo], + MembersDict = machi_projection:make_members_dict(P_s), + {ok, M0} = ?MGR:start_link(a, MembersDict, [{active_mode,false}]), + try + {ok, P1} = ?MGR:test_calc_projection(M0, false), % DERP! Check for race with manager's proxy vs. proj listener ok = lists:foldl( fun(_, {_,{true,[{c,ok},{b,ok},{a,ok}]}}) -> @@ -307,32 +329,54 @@ smoke1_test2() -> ok; % Skip remaining! (_, _Else) -> timer:sleep(10), - ?MGR:test_write_public_projection(Ma, P1) + ?MGR:test_write_public_projection(M0, P1) end, not_ok, lists:seq(1, 1000)), %% Writing the exact same projection multiple times returns ok: %% no change! - {_,{true,[{c,ok},{b,ok},{a,ok}]}} = ?MGR:test_write_public_projection(Ma, P1), - {unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(Ma, false), + {_,{true,[{c,ok},{b,ok},{a,ok}]}} = ?MGR:test_write_public_projection(M0, P1), + {unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(M0, false), ok after - machi_test_util:stop_flu_packages() + ok = ?MGR:stop(M0), + [ok = machi_flu1:stop(X) || X <- FLUs], + ok = machi_partition_simulator:stop() end. -nonunanimous_setup_and_fix_test() -> +nonunanimous_setup_and_fix_test_() -> + os:cmd("rm -f /tmp/moomoo.*"), + {timeout, 1*60, fun() -> nonunanimous_setup_and_fix_test2() end}. + +nonunanimous_setup_and_fix_test2() -> + error_logger:tty(false), + machi_partition_simulator:start_link({1,2,3}, 100, 0), TcpPort = 62877, - MgrOpts = [{active_mode,false}], - {Ps, [Ma,Mb], _Dirs} = machi_test_util:start_flu_packages( - 2, TcpPort, "./data.", MgrOpts), - MembersDict = machi_projection:make_members_dict(Ps), - [machi_chain_manager1:set_chain_members(M, MembersDict) || M <- [Ma, Mb]], - - [Proxy_a, Proxy_b] = Proxies = - [element(2, ?FLU_PC:start_link(P)) || P <- Ps], - + FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}, + {c,TcpPort+2,"./data.c"}], + P_s = [#p_srvr{name=Name, address="localhost", port=Port} || + {Name,Port,_Dir} <- FluInfo], + + [machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo], + {ok, SupPid} = machi_flu_sup:start_link(), + Opts = [{active_mode, false}, {initial_wedged, true}], + ChainName = my_little_chain, + [{ok,_}=machi_flu_psup:start_flu_package(Name, Port, Dir, Opts) || + {Name,Port,Dir} <- FluInfo], + Proxies = [Proxy_a, Proxy_b, Proxy_c] = + [element(2,?FLU_PC:start_link(P)) || P <- P_s], + %% MembersDict = machi_projection:make_members_dict(P_s), + MembersDict = machi_projection:make_members_dict(lists:sublist(P_s, 2)), + Mgrs = [Ma,Mb,Mc] = [a_chmgr, b_chmgr, c_chmgr], + MgrProxies = [{Ma, Proxy_a}, {Mb, Proxy_b}, {Mc, Proxy_c}], + Advance = make_advance_fun([a_fitness,b_fitness,c_fitness], + [a,b,c], + [Mgr || {Mgr,_Proxy} <- MgrProxies], + 3), + ok = machi_chain_manager1:set_chain_members(Ma, ChainName, 0, ap_mode, + MembersDict, []), + ok = machi_chain_manager1:set_chain_members(Mb, ChainName, 0, ap_mode, + MembersDict, []), try - ok = machi_chain_manager1:set_chain_members(Ma, MembersDict, []), - ok = machi_chain_manager1:set_chain_members(Mb, MembersDict, []), {ok, P1} = ?MGR:test_calc_projection(Ma, false), P1a = machi_projection:update_checksum( @@ -368,15 +412,121 @@ nonunanimous_setup_and_fix_test() -> {ok, P2pb} = ?FLU_PC:read_latest_projection(Proxy_b, private), P2 = P2pb#projection_v1{dbg2=[]}, - %% Pspam = machi_projection:update_checksum( - %% P1b#projection_v1{epoch_number=?SPAM_PROJ_EPOCH, - %% dbg=[hello_spam]}), - %% ok = ?FLU_PC:write_projection(Proxy_b, public, Pspam), + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("\nSTEP: Add a 3rd member to the chain.\n", []), + MembersDict3 = machi_projection:make_members_dict(P_s), + ok = machi_chain_manager1:set_chain_members( + Ma, ChainName, EpochNum_a, ap_mode, MembersDict3, []), + + Advance(), + {_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Ma), + {_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Mb), + {_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Mc), + [{ok, #projection_v1{upi=[a,b], repairing=[c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- Proxies], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Remove 'a' from the chain.\n", []), + + MembersDict4 = machi_projection:make_members_dict(tl(P_s)), + ok = machi_chain_manager1:set_chain_members( + Mb, ChainName, TheEpoch_3, ap_mode, MembersDict4, []), + + Advance(), + {ok, {true, _}} = ?FLU_PC:wedge_status(Proxy_a), + {_, _, TheEpoch_4} = ?MGR:trigger_react_to_env(Mb), + {_, _, TheEpoch_4} = ?MGR:trigger_react_to_env(Mc), + [{ok, #projection_v1{upi=[b], repairing=[c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Add a to the chain again (a is running).\n", []), + + MembersDict5 = machi_projection:make_members_dict(P_s), + ok = machi_chain_manager1:set_chain_members( + Mb, ChainName, TheEpoch_4, ap_mode, MembersDict5, []), + + Advance(), + {_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Ma), + {_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Mb), + {_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Mc), + [{ok, #projection_v1{upi=[b], repairing=[a,c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- Proxies], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Stop a while a chain member, advance b&c.\n", []), + + ok = machi_flu_psup:stop_flu_package(a), + Advance(), + {_, _, TheEpoch_6} = ?MGR:trigger_react_to_env(Mb), + {_, _, TheEpoch_6} = ?MGR:trigger_react_to_env(Mc), + [{ok, #projection_v1{upi=[b], repairing=[c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Remove 'a' from the chain.\n", []), + + MembersDict7 = machi_projection:make_members_dict(tl(P_s)), + ok = machi_chain_manager1:set_chain_members( + Mb, ChainName, TheEpoch_6, ap_mode, MembersDict7, []), + + Advance(), + {_, _, TheEpoch_7} = ?MGR:trigger_react_to_env(Mb), + {_, _, TheEpoch_7} = ?MGR:trigger_react_to_env(Mc), + [{ok, #projection_v1{upi=[b], repairing=[c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Start a, advance.\n", []), + + [{ok,_}=machi_flu_psup:start_flu_package(Name, Port, Dir, Opts) || + {Name,Port,Dir} <- [hd(FluInfo)]], + Advance(), + {ok, {true, _}} = ?FLU_PC:wedge_status(Proxy_a), + {ok, {false, EpochID_8}} = ?FLU_PC:wedge_status(Proxy_b), + {ok, {false, EpochID_8}} = ?FLU_PC:wedge_status(Proxy_c), + [{ok, #projection_v1{upi=[b], repairing=[c]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Stop a, delete a's data, leave it stopped\n", []), + + ok = machi_flu_psup:stop_flu_package(a), + Advance(), + {_,_,Dir_a} = hd(FluInfo), + [machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- [hd(FluInfo)]], + {ok, {false, _}} = ?FLU_PC:wedge_status(Proxy_b), + {ok, {false, _}} = ?FLU_PC:wedge_status(Proxy_c), + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Add a to the chain again (a is stopped).\n", []), + + MembersDict9 = machi_projection:make_members_dict(P_s), + {_, _, TheEpoch_9} = ?MGR:trigger_react_to_env(Mb), + ok = machi_chain_manager1:set_chain_members( + Mb, ChainName, TheEpoch_9, ap_mode, MembersDict9, []), + Advance(), + {_, _, TheEpoch_9b} = ?MGR:trigger_react_to_env(Mb), + true = (TheEpoch_9b > TheEpoch_9), + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("STEP: Start a, and it joins like it ought to\n", []), + + [{ok,_}=machi_flu_psup:start_flu_package(Name, Port, Dir, Opts) || + {Name,Port,Dir} <- [hd(FluInfo)]], + Advance(), + {ok, {false, {TheEpoch10,_}}} = ?FLU_PC:wedge_status(Proxy_a), + {ok, {false, {TheEpoch10,_}}} = ?FLU_PC:wedge_status(Proxy_b), + {ok, {false, {TheEpoch10,_}}} = ?FLU_PC:wedge_status(Proxy_c), + [{ok, #projection_v1{upi=[b], repairing=[c,a]}} = + ?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- Proxies], ok after + exit(SupPid, normal), [ok = ?FLU_PC:quit(X) || X <- Proxies], - machi_test_util:stop_flu_packages() + ok = machi_partition_simulator:stop(), + error_logger:tty(true) end. unanimous_report_test() -> diff --git a/test/machi_cinfo_test.erl b/test/machi_cinfo_test.erl index 9699df3..dcb611e 100644 --- a/test/machi_cinfo_test.erl +++ b/test/machi_cinfo_test.erl @@ -48,7 +48,7 @@ setup() -> {c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}} ], [os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps], - {ok, SupPid} = machi_sup:start_link(), + {ok, SupPid} = machi_flu_sup:start_link(), %% Only run a, don't run b & c so we have 100% failures talking to them [begin #p_srvr{name=Name, port=Port, props=Dir} = P, diff --git a/test/machi_cr_client_test.erl b/test/machi_cr_client_test.erl index a370436..299a78a 100644 --- a/test/machi_cr_client_test.erl +++ b/test/machi_cr_client_test.erl @@ -58,9 +58,15 @@ setup_smoke_test(Host, PortBase, Os, Witness_list) -> %% 4. Wait until all others are using epoch id from #3. %% %% Damn, this is a pain to make 100% deterministic, bleh. - ok = machi_chain_manager1:set_chain_members(a_chmgr, D, Witness_list), - ok = machi_chain_manager1:set_chain_members(b_chmgr, D, Witness_list), - ok = machi_chain_manager1:set_chain_members(c_chmgr, D, Witness_list), + CMode = if Witness_list == [] -> ap_mode; + Witness_list /= [] -> cp_mode + end, + ok = machi_chain_manager1:set_chain_members(a_chmgr, ch0, 0, CMode, + D, Witness_list), + ok = machi_chain_manager1:set_chain_members(b_chmgr, ch0, 0, CMode, + D, Witness_list), + ok = machi_chain_manager1:set_chain_members(c_chmgr, ch0, 0, CMode, + D, Witness_list), run_ticks([a_chmgr,b_chmgr,c_chmgr]), %% Everyone is settled on the same damn epoch id. {ok, EpochID} = machi_flu1_client:get_latest_epochid(Host, PortBase+0, @@ -96,7 +102,7 @@ run_ticks(MgrList) -> ok. smoke_test2() -> - {ok, SupPid} = machi_sup:start_link(), + {ok, SupPid} = machi_flu_sup:start_link(), error_logger:tty(false), try Prefix = <<"pre">>, @@ -167,9 +173,7 @@ smoke_test2() -> true = is_binary(KludgeBin), {error, bad_arg} = machi_cr_client:checksum_list(C1, <<"!!!!">>), -io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]), - %% Exactly one file right now, e.g., - %% {ok,[{2098202,<<"pre^b144ef13-db4d-4c9f-96e7-caff02dc754f^1">>}]} + %% Exactly one file right now {ok, [_]} = machi_cr_client:list_files(C1), %% Go back and test append_chunk_extra() and write_chunk() @@ -193,9 +197,8 @@ io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]), end || Seq <- lists:seq(1, Extra10)], {ok, {Off11,Size11,File11}} = machi_cr_client:append_chunk(C1, Prefix, Chunk10), - %% %% Double-check that our reserved extra bytes were really honored! - %% true = (Off11 > (Off10 + (Extra10 * Size10))), -io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]), + %% Double-check that our reserved extra bytes were really honored! + true = (Off11 > (Off10 + (Extra10 * Size10))), ok after @@ -208,7 +211,7 @@ io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]), witness_smoke_test_() -> {timeout, 1*60, fun() -> witness_smoke_test2() end}. witness_smoke_test2() -> - SupPid = case machi_sup:start_link() of + SupPid = case machi_flu_sup:start_link() of {ok, P} -> P; {error, {already_started, P1}} -> P1; Other -> error(Other) diff --git a/test/machi_csum_table_test.erl b/test/machi_csum_table_test.erl index f2b7a4f..c168d45 100644 --- a/test/machi_csum_table_test.erl +++ b/test/machi_csum_table_test.erl @@ -2,31 +2,26 @@ -compile(export_all). -include_lib("eunit/include/eunit.hrl"). --define(HDR, {0, 1024, none}). - -cleanup(Dir) -> - os:cmd("rm -rf " ++ Dir). +-define(HDR, {0, 1024, <<0>>}). smoke_test() -> Filename = "./temp-checksum-dumb-file", - _ = cleanup(Filename), + _ = file:delete(Filename), {ok, MC} = machi_csum_table:open(Filename, []), - ?assertEqual([{1024, infinity}], - machi_csum_table:calc_unwritten_bytes(MC)), + [{1024, infinity}] = machi_csum_table:calc_unwritten_bytes(MC), Entry = {Offset, Size, Checksum} = {1064, 34, <<"deadbeef">>}, [] = machi_csum_table:find(MC, Offset, Size), ok = machi_csum_table:write(MC, Offset, Size, Checksum), [{1024, 40}, {1098, infinity}] = machi_csum_table:calc_unwritten_bytes(MC), - ?assertEqual([Entry], machi_csum_table:find(MC, Offset, Size)), - ok = machi_csum_table:trim(MC, Offset, Size, undefined, undefined), - ?assertEqual([{Offset, Size, trimmed}], - machi_csum_table:find(MC, Offset, Size)), + [Entry] = machi_csum_table:find(MC, Offset, Size), + ok = machi_csum_table:trim(MC, Offset, Size), + [{Offset, Size, trimmed}] = machi_csum_table:find(MC, Offset, Size), ok = machi_csum_table:close(MC), ok = machi_csum_table:delete(MC). close_test() -> Filename = "./temp-checksum-dumb-file-2", - _ = cleanup(Filename), + _ = file:delete(Filename), {ok, MC} = machi_csum_table:open(Filename, []), Entry = {Offset, Size, Checksum} = {1064, 34, <<"deadbeef">>}, [] = machi_csum_table:find(MC, Offset, Size), @@ -36,33 +31,32 @@ close_test() -> {ok, MC2} = machi_csum_table:open(Filename, []), [Entry] = machi_csum_table:find(MC2, Offset, Size), - ok = machi_csum_table:trim(MC2, Offset, Size, undefined, undefined), + ok = machi_csum_table:trim(MC2, Offset, Size), [{Offset, Size, trimmed}] = machi_csum_table:find(MC2, Offset, Size), ok = machi_csum_table:delete(MC2). smoke2_test() -> Filename = "./temp-checksum-dumb-file-3", - _ = cleanup(Filename), + _ = file:delete(Filename), {ok, MC} = machi_csum_table:open(Filename, []), Entry = {Offset, Size, Checksum} = {1025, 10, <<"deadbeef">>}, ok = machi_csum_table:write(MC, Offset, Size, Checksum), - ?assertEqual([], machi_csum_table:find(MC, 0, 0)), - ?assertEqual([?HDR], machi_csum_table:find(MC, 0, 1)), + [] = machi_csum_table:find(MC, 0, 0), + [?HDR] = machi_csum_table:find(MC, 0, 1), [Entry] = machi_csum_table:find(MC, Offset, Size), [?HDR] = machi_csum_table:find(MC, 1, 1024), - ?assertEqual([?HDR, Entry], - machi_csum_table:find(MC, 1023, 1024)), + [?HDR, Entry] = machi_csum_table:find(MC, 1023, 1024), [Entry] = machi_csum_table:find(MC, 1024, 1024), [Entry] = machi_csum_table:find(MC, 1025, 1024), - ok = machi_csum_table:trim(MC, Offset, Size, undefined, undefined), + ok = machi_csum_table:trim(MC, Offset, Size), [{Offset, Size, trimmed}] = machi_csum_table:find(MC, Offset, Size), ok = machi_csum_table:close(MC), ok = machi_csum_table:delete(MC). smoke3_test() -> Filename = "./temp-checksum-dumb-file-4", - _ = cleanup(Filename), + _ = file:delete(Filename), {ok, MC} = machi_csum_table:open(Filename, []), Scenario = [%% Command, {Offset, Size, Csum}, LeftNeighbor, RightNeibor @@ -113,19 +107,3 @@ smoke3_test() -> ok = machi_csum_table:delete(MC). %% TODO: add quickcheck test here - -%% Previous implementation --spec all_trimmed2(machi_csum_table:table(), - non_neg_integer(), non_neg_integer()) -> boolean(). -all_trimmed2(CsumT, Left, Right) -> - Chunks = machi_csum_table:find(CsumT, Left, Right), - runthru(Chunks, Left, Right). - -%% @doc make sure all trimmed chunks are continously chained -%% TODO: test with EQC -runthru([], Pos, Pos) -> true; -runthru([], Pos0, Pos) when Pos0 < Pos -> false; -runthru([{Offset0, Size0, trimmed}|T], Offset, Pos) when Offset0 =< Offset -> - runthru(T, Offset0+Size0, Pos); -runthru(_L, _O, _P) -> - false. diff --git a/test/machi_file_proxy_eqc.erl b/test/machi_file_proxy_eqc.erl index dd36787..00d470f 100644 --- a/test/machi_file_proxy_eqc.erl +++ b/test/machi_file_proxy_eqc.erl @@ -38,7 +38,7 @@ eqc_test_() -> {timeout, 60, {spawn, [ - ?_assertEqual(true, eqc:quickcheck(eqc:testing_time(30, ?QC_OUT(prop_ok())))) + {timeout, 30, ?_assertEqual(true, eqc:quickcheck(eqc:testing_time(15, ?QC_OUT(prop_ok()))))} ] }}. diff --git a/test/machi_file_proxy_test.erl b/test/machi_file_proxy_test.erl index a04d880..8c4b60b 100644 --- a/test/machi_file_proxy_test.erl +++ b/test/machi_file_proxy_test.erl @@ -76,67 +76,54 @@ random_binary(Start, End) -> binary:part(random_binary_single(), Start, End) end. -setup() -> - {ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR), - Pid. - -teardown(Pid) -> - catch machi_file_proxy:stop(Pid). - machi_file_proxy_test_() -> clean_up_data_dir(?TESTDIR), - {setup, - fun setup/0, - fun teardown/1, - fun(Pid) -> - [ - ?_assertEqual({error, bad_arg}, machi_file_proxy:read(Pid, -1, -1)), - ?_assertEqual({error, bad_arg}, machi_file_proxy:write(Pid, -1, <<"yo">>)), - ?_assertEqual({error, bad_arg}, machi_file_proxy:append(Pid, [], -1, <<"krep">>)), - ?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1)), - ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, 1)), - ?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1024)), - ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, ?HYOOGE)), - ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, ?HYOOGE, 1)), - {timeout, 10, - ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, random_binary(0, ?HYOOGE)))}, - ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))), - ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1024, <<"fail">>)), - ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, <<"fail">>)), - ?_assertMatch({ok, {[{_, _, _, _}], []}}, machi_file_proxy:read(Pid, 1025, 1000)), - ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, [], 1024, <<"mind the gap">>)), - ?_assertEqual(ok, machi_file_proxy:write(Pid, 2060, [], random_binary(0, 1024))) - ] - end}. + {ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR), + [ + ?_assertEqual({error, bad_arg}, machi_file_proxy:read(Pid, -1, -1)), + ?_assertEqual({error, bad_arg}, machi_file_proxy:write(Pid, -1, <<"yo">>)), + ?_assertEqual({error, bad_arg}, machi_file_proxy:append(Pid, [], -1, <<"krep">>)), + ?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1)), + ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, 1)), + ?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1024)), + ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, ?HYOOGE)), + ?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, ?HYOOGE, 1)), + ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, random_binary(0, ?HYOOGE))), + ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))), + ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1024, <<"fail">>)), + ?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, <<"fail">>)), + ?_assertMatch({ok, {[{_, _, _, _}], []}}, machi_file_proxy:read(Pid, 1025, 1000)), + ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, [], 1024, <<"mind the gap">>)), + ?_assertEqual(ok, machi_file_proxy:write(Pid, 2060, [], random_binary(0, 1024))), + ?_assertException(exit, {normal, _}, machi_file_proxy:stop(Pid)) + ]. multiple_chunks_read_test_() -> clean_up_data_dir(?TESTDIR), - {setup, - fun setup/0, - fun teardown/1, - fun(Pid) -> - [ - ?_assertEqual(ok, machi_file_proxy:trim(Pid, 0, 1, false)), - ?_assertMatch({ok, {[], [{"test", 0, 1}]}}, - machi_file_proxy:read(Pid, 0, 1, - [{needs_trimmed, true}])), - ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))), - ?_assertEqual(ok, machi_file_proxy:write(Pid, 10000, <<"fail">>)), - ?_assertEqual(ok, machi_file_proxy:write(Pid, 20000, <<"fail">>)), - ?_assertEqual(ok, machi_file_proxy:write(Pid, 30000, <<"fail">>)), - %% Freeza - ?_assertEqual(ok, machi_file_proxy:write(Pid, 530000, <<"fail">>)), - ?_assertMatch({ok, {[{"test", 1024, _, _}, - {"test", 10000, <<"fail">>, _}, - {"test", 20000, <<"fail">>, _}, - {"test", 30000, <<"fail">>, _}, - {"test", 530000, <<"fail">>, _}], []}}, - machi_file_proxy:read(Pid, 1024, 530000)), - ?_assertMatch({ok, {[{"test", 1, _, _}], [{"test", 0, 1}]}}, - machi_file_proxy:read(Pid, 0, 1024, - [{needs_trimmed, true}])) - ] - end}. + {ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR), + [ + ?_assertEqual(ok, machi_file_proxy:trim(Pid, 0, 1, false)), + ?_assertMatch({ok, {[], [{"test", 0, 1}]}}, + machi_file_proxy:read(Pid, 0, 1, + [{needs_trimmed, true}])), + ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))), + ?_assertEqual(ok, machi_file_proxy:write(Pid, 10000, <<"fail">>)), + ?_assertEqual(ok, machi_file_proxy:write(Pid, 20000, <<"fail">>)), + ?_assertEqual(ok, machi_file_proxy:write(Pid, 30000, <<"fail">>)), + %% Freeza + ?_assertEqual(ok, machi_file_proxy:write(Pid, 530000, <<"fail">>)), + ?_assertMatch({ok, {[{"test", 1024, _, _}, + {"test", 10000, <<"fail">>, _}, + {"test", 20000, <<"fail">>, _}, + {"test", 30000, <<"fail">>, _}, + {"test", 530000, <<"fail">>, _}], []}}, + machi_file_proxy:read(Pid, 1024, 530000)), + ?_assertMatch({ok, {[{"test", 1, _, _}], [{"test", 0, 1}]}}, + machi_file_proxy:read(Pid, 0, 1024, + [{needs_trimmed, true}])), + ?_assertException(exit, {normal, _}, machi_file_proxy:stop(Pid)) + ]. + -endif. % !PULSE -endif. % TEST. diff --git a/test/machi_flu1_test.erl b/test/machi_flu1_test.erl index 0577033..ea8702a 100644 --- a/test/machi_flu1_test.erl +++ b/test/machi_flu1_test.erl @@ -30,16 +30,73 @@ -define(FLU, machi_flu1). -define(FLU_C, machi_flu1_client). +get_env_vars(App, Ks) -> + Raw = [application:get_env(App, K) || K <- Ks], + Old = lists:zip(Ks, Raw), + {App, Old}. + +clean_up_env_vars({App, Old}) -> + [case Res of + undefined -> + application:unset_env(App, K); + {ok, V} -> + application:set_env(App, K, V) + end || {K, Res} <- Old]. + +filter_env_var({ok, V}) -> V; +filter_env_var(Else) -> Else. + +clean_up_data_dir(DataDir) -> + [begin + Fs = filelib:wildcard(DataDir ++ Glob), + [file:delete(F) || F <- Fs], + [file:del_dir(F) || F <- Fs] + end || Glob <- ["*/*/*/*", "*/*/*", "*/*", "*"] ], + _ = file:del_dir(DataDir), + ok. + +start_flu_package(RegName, TcpPort, DataDir) -> + start_flu_package(RegName, TcpPort, DataDir, []). + +start_flu_package(RegName, TcpPort, DataDir, Props) -> + case proplists:get_value(save_data_dir, Props) of + true -> + ok; + _ -> + clean_up_data_dir(DataDir) + end, + + maybe_start_sup(), + machi_flu_psup:start_flu_package(RegName, TcpPort, DataDir, Props). + +stop_flu_package(FluName) -> + machi_flu_psup:stop_flu_package(FluName), + Pid = whereis(machi_sup), + exit(Pid, normal), + machi_util:wait_for_death(Pid, 100). + +maybe_start_sup() -> + case whereis(machi_sup) of + undefined -> + machi_sup:start_link(), + %% evil but we have to let stuff start up + timer:sleep(10), + maybe_start_sup(); + Pid -> Pid + end. + + -ifndef(PULSE). flu_smoke_test() -> Host = "localhost", - TcpPort = 12957, + TcpPort = 32957, DataDir = "./data", Prefix = <<"prefix!">>, BadPrefix = BadFile = "no/good", + W_props = [{initial_wedged, false}], - {_, _, _} = machi_test_util:start_flu_package(smoke_flu, TcpPort, DataDir, W_props), + start_flu_package(smoke_flu, TcpPort, DataDir, W_props), try Msg = "Hello, world!", Msg = ?FLU_C:echo(Host, TcpPort, Msg), @@ -105,7 +162,7 @@ flu_smoke_test() -> Chunk2 = <<"yo yo">>, Len2 = byte_size(Chunk2), Off2 = ?MINIMUM_OFFSET + 77, - File2 = "smoke-whole-file^^0^1^1", + File2 = "smoke-whole-file^1^1", ok = ?FLU_C:write_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH, File2, Off2, Chunk2), {error, bad_arg} = ?FLU_C:write_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH, @@ -137,21 +194,22 @@ flu_smoke_test() -> ok = ?FLU_C:quit(?FLU_C:connect(#p_srvr{address=Host, port=TcpPort})) after - machi_test_util:stop_flu_package() + stop_flu_package(smoke_flu) end. flu_projection_smoke_test() -> Host = "localhost", - TcpPort = 12959, + TcpPort = 32959, DataDir = "./data.projst", - {_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir), + + start_flu_package(projection_test_flu, TcpPort, DataDir), try [ok = flu_projection_common(Host, TcpPort, T) || T <- [public, private] ] %% , {ok, {false, EpochID1}} = ?FLU_C:wedge_status(Host, TcpPort), %% io:format(user, "EpochID1 ~p\n", [EpochID1]) after - machi_test_util:stop_flu_package() + stop_flu_package(projection_test_flu) end. flu_projection_common(Host, TcpPort, T) -> @@ -179,10 +237,11 @@ flu_projection_common(Host, TcpPort, T) -> bad_checksum_test() -> Host = "localhost", - TcpPort = 12960, + TcpPort = 32960, DataDir = "./data.bct", + Opts = [{initial_wedged, false}], - {_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir, Opts), + start_flu_package(projection_test_flu, TcpPort, DataDir, Opts), try Prefix = <<"some prefix">>, Chunk1 = <<"yo yo yo">>, @@ -192,15 +251,16 @@ bad_checksum_test() -> Prefix, Chunk1_badcs), ok after - machi_test_util:stop_flu_package() + stop_flu_package(projection_test_flu) end. witness_test() -> Host = "localhost", - TcpPort = 12961, + TcpPort = 32961, DataDir = "./data.witness", + Opts = [{initial_wedged, false}, {witness_mode, true}], - {_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir, Opts), + start_flu_package(projection_test_flu, TcpPort, DataDir, Opts), try Prefix = <<"some prefix">>, Chunk1 = <<"yo yo yo">>, @@ -232,7 +292,7 @@ witness_test() -> ok after - machi_test_util:stop_flu_package() + stop_flu_package(projection_test_flu) end. %% The purpose of timing_pb_encoding_test_ and timing_bif_encoding_test_ is diff --git a/test/machi_flu_psup_test.erl b/test/machi_flu_psup_test.erl index 6456965..fd93b42 100644 --- a/test/machi_flu_psup_test.erl +++ b/test/machi_flu_psup_test.erl @@ -38,12 +38,12 @@ smoke_test_() -> {timeout, 5*60, fun() -> smoke_test2() end}. smoke_test2() -> - Ps = [{a,#p_srvr{name=a, address="localhost", port=5550, props="./data.a"}}, - {b,#p_srvr{name=b, address="localhost", port=5551, props="./data.b"}}, - {c,#p_srvr{name=c, address="localhost", port=5552, props="./data.c"}} + Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}}, + {b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}}, + {c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}} ], [os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps], - {ok, SupPid} = machi_sup:start_link(), + {ok, SupPid} = machi_flu_sup:start_link(), try %% Only run a, don't run b & c so we have 100% failures talking to them [begin @@ -66,15 +66,15 @@ partial_stop_restart_test_() -> {timeout, 5*60, fun() -> partial_stop_restart2() end}. partial_stop_restart2() -> - Ps = [{a,#p_srvr{name=a, address="localhost", port=5560, props="./data.a"}}, - {b,#p_srvr{name=b, address="localhost", port=5561, props="./data.b"}}, - {c,#p_srvr{name=c, address="localhost", port=5562, props="./data.c"}} + Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}}, + {b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}}, + {c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}} ], ChMgrs = [machi_flu_psup:make_mgr_supname(P#p_srvr.name) || {_,P} <-Ps], PStores = [machi_flu_psup:make_proj_supname(P#p_srvr.name) || {_,P} <-Ps], Dict = orddict:from_list(Ps), [os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps], - {ok, SupPid} = machi_sup:start_link(), + {ok, SupPid} = machi_flu_sup:start_link(), DbgProps = [{initial_wedged, true}], Start = fun({_,P}) -> #p_srvr{name=Name, port=Port, props=Dir} = P, @@ -173,6 +173,19 @@ partial_stop_restart2() -> ok end. +p_srvr_rec_test() -> + P = #p_srvr{name=a, address="localhost", port=1024, props=[yo]}, + [P] = machi_flu_sup:sanitize_p_srvr_records([P]), + [P] = machi_flu_sup:sanitize_p_srvr_records([P,P]), + [] = machi_flu_sup:sanitize_p_srvr_records([nope]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{proto_mod=does_not_exist}]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{proto_mod="lists"}]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{address=7}]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{port=5}]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{port=foo}]), + [] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{props=foo}]), + ok. + -endif. % !PULSE -endif. % TEST diff --git a/test/machi_lifecycle_mgr_test.erl b/test/machi_lifecycle_mgr_test.erl new file mode 100644 index 0000000..8b17861 --- /dev/null +++ b/test/machi_lifecycle_mgr_test.erl @@ -0,0 +1,307 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007-2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(machi_lifecycle_mgr_test). +-compile(export_all). + +-ifdef(TEST). +-ifndef(PULSE). + +-include_lib("eunit/include/eunit.hrl"). + +-include("machi.hrl"). +-include("machi_projection.hrl"). + +-define(MGR, machi_chain_manager1). + +setup() -> + catch application:stop(machi), + {ok, SupPid} = machi_sup:start_link(), + error_logger:tty(false), + Dir = "./" ++ atom_to_list(?MODULE) ++ ".datadir", + machi_flu1_test:clean_up_data_dir(Dir ++ "/*/*"), + machi_flu1_test:clean_up_data_dir(Dir), + Envs = [{flu_data_dir, Dir ++ "/data/flu"}, + {flu_config_dir, Dir ++ "/etc/flu-config"}, + {chain_config_dir, Dir ++ "/etc/chain-config"}, + {platform_data_dir, Dir ++ "/data"}, + {platform_etc_dir, Dir ++ "/etc"}, + {not_used_pending, Dir ++ "/etc/pending"} + ], + EnvKeys = [K || {K,_V} <- Envs], + undefined = application:get_env(machi, yo), + Cleanup = machi_flu1_test:get_env_vars(machi, EnvKeys ++ [yo]), + [begin + filelib:ensure_dir(V ++ "/unused"), + application:set_env(machi, K, V) + end || {K, V} <- Envs], + {SupPid, Dir, Cleanup}. + +cleanup({SupPid, Dir, Cleanup}) -> + exit(SupPid, normal), + machi_util:wait_for_death(SupPid, 100), + error_logger:tty(true), + catch application:stop(machi), + machi_flu1_test:clean_up_data_dir(Dir ++ "/*/*"), + machi_flu1_test:clean_up_data_dir(Dir), + machi_flu1_test:clean_up_env_vars(Cleanup), + undefined = application:get_env(machi, yo), + ok. + +smoke_test_() -> + {timeout, 60, fun() -> smoke_test2() end}. + +smoke_test2() -> + YoCleanup = setup(), + try + Prefix = <<"pre">>, + Chunk1 = <<"yochunk">>, + Host = "localhost", + PortBase = 60120, + + Pa = #p_srvr{name=a,address="localhost",port=PortBase+0}, + Pb = #p_srvr{name=b,address="localhost",port=PortBase+1}, + Pc = #p_srvr{name=c,address="localhost",port=PortBase+2}, + %% Pstore_a = machi_flu1:make_projection_server_regname(a), + %% Pstore_b = machi_flu1:make_projection_server_regname(b), + %% Pstore_c = machi_flu1:make_projection_server_regname(c), + Pstores = [Pstore_a, Pstore_b, Pstore_c] = + [machi_flu1:make_projection_server_regname(a), + machi_flu1:make_projection_server_regname(b), + machi_flu1:make_projection_server_regname(c)], + ChMgrs = [ChMgr_a, ChMgr_b, ChMgr_c] = + [machi_chain_manager1:make_chmgr_regname(a), + machi_chain_manager1:make_chmgr_regname(b), + machi_chain_manager1:make_chmgr_regname(c)], + Fits = [Fit_a, Fit_b, Fit_c] = + [machi_flu_psup:make_fitness_regname(a), + machi_flu_psup:make_fitness_regname(b), + machi_flu_psup:make_fitness_regname(c)], + Advance = machi_chain_manager1_test:make_advance_fun( + Fits, [a,b,c], ChMgrs, 3), + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("\nSTEP: Start 3 FLUs, no chain.\n", []), + + [machi_lifecycle_mgr:make_pending_config(P) || P <- [Pa,Pb,Pc] ], + {[_,_,_],[]} = machi_lifecycle_mgr:process_pending(), + [{ok, #projection_v1{epoch_number=0}} = + machi_projection_store:read_latest_projection(PSTORE, private) + || PSTORE <- Pstores], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("\nSTEP: Start chain = [a,b,c]\n", []), + + C1 = #chain_def_v1{name=cx, mode=ap_mode, full=[Pa,Pb,Pc], + local_run=[a,b,c]}, + machi_lifecycle_mgr:make_pending_config(C1), + {[],[_]} = machi_lifecycle_mgr:process_pending(), + Advance(), + [{ok, #projection_v1{all_members=[a,b,c]}} = + machi_projection_store:read_latest_projection(PSTORE, private) + || PSTORE <- Pstores], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("\nSTEP: Reset chain = [b,c]\n", []), + + C2 = #chain_def_v1{name=cx, mode=ap_mode, full=[Pb,Pc], + old_full=[a,b,c], old_witnesses=[], + local_stop=[a], local_run=[b,c]}, + machi_lifecycle_mgr:make_pending_config(C2), + {[],[_]} = machi_lifecycle_mgr:process_pending(), + Advance(), + %% a should be down + {'EXIT', _} = (catch machi_projection_store:read_latest_projection( + hd(Pstores), private)), + [{ok, #projection_v1{all_members=[b,c]}} = + machi_projection_store:read_latest_projection(PSTORE, private) + || PSTORE <- tl(Pstores)], + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + io:format("\nSTEP: Reset chain = []\n", []), + + C3 = #chain_def_v1{name=cx, mode=ap_mode, full=[], + old_full=[b,c], old_witnesses=[], + local_stop=[b,c], local_run=[]}, + machi_lifecycle_mgr:make_pending_config(C3), + {[],[_]} = machi_lifecycle_mgr:process_pending(), + Advance(), + %% a,b,c should be down + [{'EXIT', _} = (catch machi_projection_store:read_latest_projection( + PSTORE, private)) + || PSTORE <- Pstores], + + ok + after + cleanup(YoCleanup) + end. + +ast_tuple_syntax_test() -> + T = fun(L) -> machi_lifecycle_mgr:check_ast_tuple_syntax(L) end, + Canon1 = [ {host, "localhost", []}, + {host, "localhost", [{client_interface, "1.2.3.4"}, + {admin_interface, "5.6.7.8"}]}, + {flu, 'fx', "foohost", 4000, []}, + switch_old_and_new, + {chain, 'cy', ['fx', 'fy'], [{foo,"yay"},{bar,baz}]} ], + + {_Good,[]=_Bad} = T(Canon1), + Canon1_norm = machi_lifecycle_mgr:normalize_ast_tuple_syntax(Canon1), + true = (length(Canon1) == length(Canon1_norm)), + {Canon1_norm_b, []} = T(Canon1_norm), + true = (length(Canon1_norm) == length(Canon1_norm_b)), + + {[],[_,_,_,_]} = + T([ {host, 'localhost', []}, + {host, 'localhost', yo}, + {host, "localhost", [{client_interface, 77.88293829832}]}, + {host, "localhost", [{client_interface, "1.2.3.4"}, + {bummer, "5.6.7.8"}]} ]), + {[],[_,_,_,_,_,_]} = + T([ {flu, 'fx', 'foohost', 4000, []}, + {flu, 'fx', <<"foohost">>, 4000, []}, + {flu, 'fx', "foohost", -4000, []}, + {flu, 'fx', "foohost", 40009999, []}, + {flu, 'fx', "foohost", 4000, gack}, + {flu, 'fx', "foohost", 4000, [22]} ]), + {[],[_,_,_]} = + T([ {chain, 'cy', ["fx", "fy"], [foo,{bar,baz}]}, + yoloyolo, + {chain, "cy", ["fx", 27], oops,arity,way,way,way,too,big,x} + ]). + +ast_run_test() -> + PortBase = 20300, + R1 = [ + {host, "localhost", "localhost", "localhost", []}, + {flu, 'f0', "localhost", PortBase+0, []}, + {flu, 'f1', "localhost", PortBase+1, []}, + {chain, 'ca', ['f0'], []}, + {chain, 'cb', ['f1'], []}, + switch_old_and_new, + {flu, 'f2', "localhost", PortBase+2, []}, + {flu, 'f3', "localhost", PortBase+3, []}, + {flu, 'f4', "localhost", PortBase+4, []}, + {chain, 'ca', ['f0', 'f2'], []}, + {chain, 'cc', ['f3', 'f4'], []} + ], + + {ok, Env1} = machi_lifecycle_mgr:run_ast(R1), + %% Uncomment to examine the Env trees. + %% Y1 = {lists:sort(gb_trees:to_list(element(1, Env1))), + %% lists:sort(gb_trees:to_list(element(2, Env1))), + %% element(3, Env1)}, + %% io:format(user, "\nY1 ~p\n", [Y1]), + + Negative_after_R1 = + [ + {host, "localhost", "foo", "foo", []}, % dupe host + {flu, 'f1', "other", PortBase+9999999, []}, % bogus port # (syntax) + {flu, 'f1', "other", PortBase+888, []}, % dupe flu name + {flu, 'f7', "localhost", PortBase+1, []}, % dupe host+port + {chain, 'ca', ['f7'], []}, % unknown flu + {chain, 'cc', ['f0'], []}, % flu previously assigned + {chain, 'ca', cp_mode, ['f0', 'f1', 'f2'], [], []} % mode change + ], + [begin + %% io:format(user, "dbg: Neg ~p\n", [Neg]), + {error, _} = machi_lifecycle_mgr:run_ast(R1 ++ [Neg]) + end || Neg <- Negative_after_R1], + + %% The 'run' phase doesn't blow smoke. What about 'diff'? + {X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, "localhost"), + %% There's only one host, "localhost", so 'all' should be exactly equal. + {X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, all), + %% io:format(user, "X1b: ~p\n", [X1b]), + + %% Append to the R1 scenario: for chain cc: add f5, remove f4 + %% Expect: see pattern matching below on X2b. + R2 = (R1 -- [switch_old_and_new]) ++ + [switch_old_and_new, + {flu, 'f5', "localhost", PortBase+5, []}, + {chain, 'cc', ['f3','f5'], []}], + {ok, Env2} = machi_lifecycle_mgr:run_ast(R2), + {_X2a, X2b} = machi_lifecycle_mgr:diff_env(Env2, "localhost"), + %% io:format(user, "X2b: ~p\n", [X2b]), + F5_port = PortBase+5, + [#p_srvr{name='f5',address="localhost",port=F5_port}, + #chain_def_v1{name='cc', + full=[#p_srvr{name='f3'},#p_srvr{name='f5'}], witnesses=[], + old_full=[f3,f4], old_witnesses=[], + local_run=[f5], local_stop=[f4]}] = X2b, + + ok. + +ast_then_apply_test_() -> + {timeout, 60, fun() -> ast_then_apply_test2() end}. + +ast_then_apply_test2() -> + YoCleanup = setup(), + try + PortBase = 20400, + NumChains = 4, + ChainLen = 3, + FLU_num = NumChains * ChainLen, + FLU_defs = [{flu, list_to_atom("f"++integer_to_list(X)), + "localhost", PortBase+X, []} || X <- lists:seq(1,FLU_num)], + FLU_names = [FLU || {flu,FLU,_,_,_} <- FLU_defs], + Ch_defs = [{chain, list_to_atom("c"++integer_to_list(X)), + lists:sublist(FLU_names, X, 3), + []} || X <- lists:seq(1, FLU_num, 3)], + + R1 = [switch_old_and_new, + {host, "localhost", "localhost", "localhost", []}] + ++ FLU_defs ++ Ch_defs, + {ok, Env1} = machi_lifecycle_mgr:run_ast(R1), + {_X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, "localhost"), + %% io:format(user, "X1b ~p\n", [X1b]), + [machi_lifecycle_mgr:make_pending_config(X) || X <- X1b], + {PassFLUs, PassChains} = machi_lifecycle_mgr:process_pending(), + true = (length(PassFLUs) == length(FLU_defs)), + true = (length(PassChains) == length(Ch_defs)), + + %% Kick the chain managers into doing something useful right now. + Pstores = [list_to_atom(atom_to_list(X) ++ "_pstore") || X <- FLU_names], + Fits = [list_to_atom(atom_to_list(X) ++ "_fitness") || X <- FLU_names], + ChMgrs = [list_to_atom(atom_to_list(X) ++ "_chmgr") || X <- FLU_names], + Advance = machi_chain_manager1_test:make_advance_fun( + Fits, FLU_names, ChMgrs, 3), + Advance(), + + %% Sanity check: everyone is configured properly. + [begin + {ok, #projection_v1{epoch_number=Epoch, all_members=All, + chain_name=ChainName, upi=UPI}} = + machi_projection_store:read_latest_projection(PStore, private), + %% io:format(user, "~p: epoch ~p all ~p\n", [PStore, Epoch, All]), + true = Epoch > 0, + ChainLen = length(All), + true = (length(UPI) > 0), + {chain, _, Full, []} = lists:keyfind(ChainName, 2, Ch_defs), + true = lists:sort(Full) == lists:sort(All) + end || PStore <- Pstores], + + ok + after + cleanup(YoCleanup) + end. + +-endif. % !PULSE +-endif. % TEST diff --git a/test/machi_merkle_tree_test.erl b/test/machi_merkle_tree_test.erl index 96d4933..922f0e2 100644 --- a/test/machi_merkle_tree_test.erl +++ b/test/machi_merkle_tree_test.erl @@ -42,9 +42,7 @@ basic_test() -> T2 = machi_merkle_tree:build_tree(D1), ?assertNotEqual(T1#naive.root, T2#naive.root), - ?assertEqual(true, length(machi_merkle_tree:naive_diff(T1, T2)) == 1 - orelse - Filesize > ChunkSize). + ?assertEqual(1, length(machi_merkle_tree:naive_diff(T1, T2))). make_leaf_nodes(Filesize) -> diff --git a/test/machi_pb_high_client_test.erl b/test/machi_pb_high_client_test.erl index 16b125c..1283d00 100644 --- a/test/machi_pb_high_client_test.erl +++ b/test/machi_pb_high_client_test.erl @@ -34,15 +34,23 @@ smoke_test_() -> {timeout, 5*60, fun() -> smoke_test2() end}. smoke_test2() -> - PortBase = 5720, + Port = 5720, + Ps = [#p_srvr{name=a, address="localhost", port=Port, props="./data.a"} + ], + D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]), ok = application:set_env(machi, max_file_size, 1024*1024), + + [os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps], + {ok, SupPid} = machi_flu_sup:start_link(), try - {Ps, MgrNames, Dirs} = machi_test_util:start_flu_packages( - 1, PortBase, "./data.", []), - D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]), - M0 = hd(MgrNames), - ok = machi_chain_manager1:set_chain_members(M0, D), - [machi_chain_manager1:trigger_react_to_env(M0) || _ <-lists:seq(1,5)], + [begin + #p_srvr{name=Name, port=Port, props=Dir} = P, + {ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, []) + end || P <- Ps], + ok = machi_chain_manager1:set_chain_members(a_chmgr, D), + [machi_chain_manager1:trigger_react_to_env(a_chmgr) || _ <-lists:seq(1,5)], + {ok, PQQ} = machi_projection_store:read_latest_projection(a_pstore, public), + io:format(user, "a's proj: ~w\n", [machi_projection:make_summary(PQQ)]), {ok, Clnt} = ?C:start_link(Ps), try @@ -55,17 +63,16 @@ smoke_test2() -> %% a separate test module? Or separate test func? {error, _} = ?C:auth(Clnt, "foo", "bar"), - CoC_n = "", % CoC_namespace (not implemented) - CoC_l = 0, % CoC_locator (not implemented) + PK = <<>>, Prefix = <<"prefix">>, Chunk1 = <<"Hello, chunk!">>, {ok, {Off1, Size1, File1}} = - ?C:append_chunk(Clnt, CoC_n, CoC_l, Prefix, Chunk1, none, 0), + ?C:append_chunk(Clnt, PK, Prefix, Chunk1, none, 0), true = is_binary(File1), Chunk2 = "It's another chunk", CSum2 = {client_sha, machi_util:checksum_chunk(Chunk2)}, {ok, {Off2, Size2, File2}} = - ?C:append_chunk(Clnt, CoC_n, CoC_l, Prefix, Chunk2, CSum2, 1024), + ?C:append_chunk(Clnt, PK, Prefix, Chunk2, CSum2, 1024), Chunk3 = ["This is a ", <<"test,">>, 32, [["Hello, world!"]]], File3 = File2, Off3 = Off2 + iolist_size(Chunk2), @@ -88,8 +95,7 @@ smoke_test2() -> File1Bin = binary_to_list(File1), [begin - #p_srvr{name=Name, props=Props} = P, - Dir = proplists:get_value(data_dir, Props), + #p_srvr{name=Name, port=Port, props=Dir} = P, ?assertEqual({ok, [File1Bin]}, file:list_dir(filename:join([Dir, "data"]))), FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]), @@ -110,15 +116,13 @@ smoke_test2() -> LargeBytes = binary:copy(<<"x">>, 1024*1024), LBCsum = {client_sha, machi_util:checksum_chunk(LargeBytes)}, {ok, {Offx, Sizex, Filex}} = - ?C:append_chunk(Clnt, CoC_n, CoC_l, - Prefix, LargeBytes, LBCsum, 0), + ?C:append_chunk(Clnt, PK, Prefix, LargeBytes, LBCsum, 0), ok = ?C:trim_chunk(Clnt, Filex, Offx, Sizex), %% Make sure everything was trimmed File = binary_to_list(Filex), [begin - #p_srvr{name=Name, props=Props} = P, - Dir = proplists:get_value(data_dir, Props), + #p_srvr{name=Name, port=_Port, props=Dir} = P, ?assertEqual({ok, []}, file:list_dir(filename:join([Dir, "data"]))), FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]), @@ -135,7 +139,10 @@ smoke_test2() -> (catch ?C:quit(Clnt)) end after - machi_test_util:stop_flu_packages() + exit(SupPid, normal), + [os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps], + machi_util:wait_for_death(SupPid, 100), + ok end. -endif. % !PULSE diff --git a/test/machi_projection_store_test.erl b/test/machi_projection_store_test.erl index eab42a9..665553b 100644 --- a/test/machi_projection_store_test.erl +++ b/test/machi_projection_store_test.erl @@ -33,7 +33,7 @@ smoke_test() -> Dir = "./data.a", Os = [{ignore_stability_time, true}, {active_mode, false}], os:cmd("rm -rf " ++ Dir), - machi_test_util:start_flu_package(a, PortBase, "./data.a", Os), + machi_flu1_test:start_flu_package(a, PortBase, "./data.a", Os), try P1 = machi_projection:new(1, a, [], [], [], [], []), @@ -58,7 +58,7 @@ smoke_test() -> ok after - machi_test_util:stop_flu_package() + machi_flu1_test:stop_flu_package(a) end. -endif. % !PULSE diff --git a/test/machi_proxy_flu1_client_test.erl b/test/machi_proxy_flu1_client_test.erl index 439b1a7..3adfad5 100644 --- a/test/machi_proxy_flu1_client_test.erl +++ b/test/machi_proxy_flu1_client_test.erl @@ -32,14 +32,16 @@ api_smoke_test() -> RegName = api_smoke_flu, + Host = "localhost", TcpPort = 57124, DataDir = "./data.api_smoke_flu", W_props = [{active_mode, false},{initial_wedged, false}], Prefix = <<"prefix">>, + machi_flu1_test:start_flu_package(RegName, TcpPort, DataDir, W_props), + try - {[I], _, _} = machi_test_util:start_flu_package( - RegName, TcpPort, DataDir, W_props), + I = #p_srvr{name=RegName, address=Host, port=TcpPort}, {ok, Prox1} = ?MUT:start_link(I), try FakeEpoch = ?DUMMY_PV1_EPOCH, @@ -47,13 +49,13 @@ api_smoke_test() -> FakeEpoch, Prefix, <<"data">>, infinity) || _ <- lists:seq(1,5)], %% Stop the FLU, what happens? - machi_test_util:stop_flu_package(), + machi_flu1_test:stop_flu_package(RegName), [{error,partition} = ?MUT:append_chunk(Prox1, FakeEpoch, Prefix, <<"data-stopped1">>, infinity) || _ <- lists:seq(1,3)], %% Start the FLU again, we should be able to do stuff immediately - machi_test_util:start_flu_package(RegName, TcpPort, DataDir, - [no_cleanup|W_props]), + machi_flu1_test:start_flu_package(RegName, TcpPort, DataDir, + [save_data_dir|W_props]), MyChunk = <<"my chunk data">>, {ok, {MyOff,MySize,MyFile}} = ?MUT:append_chunk(Prox1, FakeEpoch, Prefix, MyChunk, @@ -70,7 +72,7 @@ api_smoke_test() -> {error, bad_checksum} = ?MUT:append_chunk(Prox1, FakeEpoch, Prefix, MyChunk_badcs), {error, bad_checksum} = ?MUT:write_chunk(Prox1, FakeEpoch, - <<"foo-file^^0^1^1">>, 99832, + <<"foo-file^1^1">>, 99832, MyChunk_badcs), %% Put kick_projection_reaction() in the middle of the test so @@ -100,7 +102,7 @@ api_smoke_test() -> _ = (catch ?MUT:quit(Prox1)) end after - (catch machi_test_util:stop_flu_package()) + (catch machi_flu1_test:stop_flu_package(RegName)) end. flu_restart_test_() -> @@ -108,13 +110,14 @@ flu_restart_test_() -> flu_restart_test2() -> RegName = a, + Host = "localhost", TcpPort = 57125, DataDir = "./data.api_smoke_flu2", W_props = [{initial_wedged, false}, {active_mode, false}], + machi_flu1_test:start_flu_package(RegName, TcpPort, DataDir, W_props), try - {[I], _, _} = machi_test_util:start_flu_package( - RegName, TcpPort, DataDir, W_props), + I = #p_srvr{name=RegName, address=Host, port=TcpPort}, {ok, Prox1} = ?MUT:start_link(I), try FakeEpoch = ?DUMMY_PV1_EPOCH, @@ -134,7 +137,7 @@ flu_restart_test2() -> {ok, EpochID} = ?MUT:get_epoch_id(Prox1), {ok, EpochID} = ?MUT:get_latest_epochid(Prox1, public), {ok, EpochID} = ?MUT:get_latest_epochid(Prox1, private), - ok = machi_test_util:stop_flu_package(), timer:sleep(50), + ok = machi_flu1_test:stop_flu_package(RegName), timer:sleep(50), %% Now that the last proxy op was successful and only %% after did we stop the FLU, let's check that both the @@ -148,7 +151,7 @@ flu_restart_test2() -> ExpectedOps = [ - fun(run) -> ?assertEqual({ok, EpochID}, ?MUT:get_epoch_id(Prox1)), + fun(run) -> {ok, EpochID} = ?MUT:get_epoch_id(Prox1), ok; (line) -> io:format("line ~p, ", [?LINE]); (stop) -> ?MUT:get_epoch_id(Prox1) end, @@ -290,13 +293,13 @@ flu_restart_test2() -> ], [begin - machi_test_util:start_flu_package( + machi_flu1_test:start_flu_package( RegName, TcpPort, DataDir, - [no_cleanup|W_props]), + [save_data_dir|W_props]), _ = Fun(line), ok = Fun(run), ok = Fun(run), - ok = machi_test_util:stop_flu_package(), + ok = machi_flu1_test:stop_flu_package(RegName), {error, partition} = Fun(stop), {error, partition} = Fun(stop), ok @@ -306,8 +309,8 @@ flu_restart_test2() -> _ = (catch ?MUT:quit(Prox1)) end after - (catch machi_test_util:stop_flu_package()) + (catch machi_flu1_test:stop_flu_package(RegName)) end. - + -endif. % !PULSE -endif. % TEST diff --git a/test/machi_test_util.erl b/test/machi_test_util.erl deleted file mode 100644 index ff908b7..0000000 --- a/test/machi_test_util.erl +++ /dev/null @@ -1,111 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(machi_test_util). --compile(export_all). - --ifdef(TEST). --ifndef(PULSE). - --include_lib("eunit/include/eunit.hrl"). - --include("machi.hrl"). --include("machi_projection.hrl"). - --define(FLU, machi_flu1). --define(FLU_C, machi_flu1_client). - --spec start_flu_package(atom(), inet:port_number(), string()) -> - {Ps::[#p_srvr{}], MgrNames::[atom()], Dirs::[string()]}. -start_flu_package(FluName, TcpPort, DataDir) -> - start_flu_package(FluName, TcpPort, DataDir, []). - --spec start_flu_package(atom(), inet:port_number(), string(), list()) -> - {Ps::[#p_srvr{}], MgrNames::[atom()], Dirs::[string()]}. -start_flu_package(FluName, TcpPort, DataDir, Props) -> - MgrName = machi_flu_psup:make_mgr_supname(FluName), - FluInfo = [{#p_srvr{name=FluName, address="localhost", port=TcpPort, - props=[{chmgr, MgrName}, {data_dir, DataDir} | Props]}, - DataDir, MgrName}], - start_flu_packages(FluInfo). - --spec start_flu_packages(pos_integer(), inet:port_number(), string(), list()) -> - {Ps::[#p_srvr{}], MgrNames::[atom()], Dirs::[string()]}. -start_flu_packages(FluCount, BaseTcpPort, DirPrefix, Props) -> - FluInfo = flu_info(FluCount, BaseTcpPort, DirPrefix, Props), - start_flu_packages(FluInfo). - -start_flu_packages(FluInfo) -> - _ = stop_machi_sup(), - clean_up(FluInfo), - {ok, _SupPid} = machi_sup:start_link(), - [{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, Props) || - {#p_srvr{name=Name, port=Port, props=Props}, Dir, _} <- FluInfo], - {Ps, Dirs, MgrNames} = lists:unzip3(FluInfo), - {Ps, MgrNames, Dirs}. - -stop_flu_package() -> - stop_flu_packages(). - -stop_flu_packages() -> - stop_machi_sup(). - -flu_info(FluCount, BaseTcpPort, DirPrefix, Props) -> - [begin - FLUNameStr = [$a + I - 1], - FLUName = list_to_atom(FLUNameStr), - MgrName = machi_flu_psup:make_mgr_supname(FLUName), - DataDir = DirPrefix ++ "/data.eqc." ++ FLUNameStr, - {#p_srvr{name=FLUName, address="localhost", port=BaseTcpPort + I, - props=[{chmgr, MgrName}, {data_dir, DataDir} | Props]}, - DataDir, MgrName} - end || I <- lists:seq(1, FluCount)]. - -stop_machi_sup() -> - case whereis(machi_sup) of - undefined -> ok; - Pid -> - catch exit(whereis(machi_sup), normal), - machi_util:wait_for_death(Pid, 30) - end. - -clean_up(FluInfo) -> - _ = [begin - case proplists:get_value(no_cleanup, Props) of - true -> ok; - _ -> - _ = machi_flu1:stop(FLUName), - clean_up_dir(Dir) - end - end || {#p_srvr{name=FLUName, props=Props}, Dir, _} <- FluInfo], - ok. - -clean_up_dir(Dir) -> - [begin - Fs = filelib:wildcard(Dir ++ Glob), - [file:delete(F) || F <- Fs], - [file:del_dir(F) || F <- Fs] - end || Glob <- ["*/*/*/*", "*/*/*", "*/*", "*"] ], - _ = file:del_dir(Dir), - ok. - --endif. % !PULSE --endif. % TEST -