Compare commits
No commits in common. "master" and "slf/chain-manager/cp-mode3" have entirely different histories.
master
...
slf/chain-
103 changed files with 5514 additions and 15035 deletions
23
.gitignore
vendored
23
.gitignore
vendored
|
@ -2,32 +2,11 @@ prototype/chain-manager/patch.*
|
||||||
.eqc-info
|
.eqc-info
|
||||||
.eunit
|
.eunit
|
||||||
deps
|
deps
|
||||||
dev
|
*.plt
|
||||||
erl_crash.dump
|
erl_crash.dump
|
||||||
eqc
|
|
||||||
.concrete/DEV_MODE
|
.concrete/DEV_MODE
|
||||||
.rebar
|
.rebar
|
||||||
edoc
|
edoc
|
||||||
|
|
||||||
# Dialyzer stuff
|
|
||||||
.dialyzer-last-run.txt
|
|
||||||
.ebin.native
|
|
||||||
.local_dialyzer_plt
|
|
||||||
dialyzer_unhandled_warnings
|
|
||||||
dialyzer_warnings
|
|
||||||
*.plt
|
|
||||||
|
|
||||||
# PB artifacts for Erlang
|
# PB artifacts for Erlang
|
||||||
include/machi_pb.hrl
|
include/machi_pb.hrl
|
||||||
|
|
||||||
# Release packaging
|
|
||||||
rel/machi
|
|
||||||
rel/vars/dev*vars.config
|
|
||||||
|
|
||||||
# Misc Scott cruft
|
|
||||||
*.patch
|
|
||||||
current_counterexample.eqc
|
|
||||||
foo*
|
|
||||||
RUNLOG*
|
|
||||||
typescript*
|
|
||||||
*.swp
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
language: erlang
|
|
||||||
notifications:
|
|
||||||
email: scott@basho.com
|
|
||||||
script: "priv/test-for-gh-pr.sh"
|
|
||||||
otp_release:
|
|
||||||
- 17.5
|
|
||||||
## No, Dialyzer is too different between 17 & 18: - 18.1
|
|
230
FAQ.md
230
FAQ.md
|
@ -11,14 +11,14 @@
|
||||||
|
|
||||||
+ [1 Questions about Machi in general](#n1)
|
+ [1 Questions about Machi in general](#n1)
|
||||||
+ [1.1 What is Machi?](#n1.1)
|
+ [1.1 What is Machi?](#n1.1)
|
||||||
+ [1.2 What is a Machi chain?](#n1.2)
|
+ [1.2 What is a Machi "cluster of clusters"?](#n1.2)
|
||||||
+ [1.3 What is a Machi cluster?](#n1.3)
|
+ [1.2.1 This "cluster of clusters" idea needs a better name, don't you agree?](#n1.2.1)
|
||||||
+ [1.4 What is Machi like when operating in "eventually consistent" mode?](#n1.4)
|
+ [1.3 What is Machi like when operating in "eventually consistent"/"AP mode"?](#n1.3)
|
||||||
+ [1.5 What is Machi like when operating in "strongly consistent" mode?](#n1.5)
|
+ [1.4 What is Machi like when operating in "strongly consistent"/"CP mode"?](#n1.4)
|
||||||
+ [1.6 What does Machi's API look like?](#n1.6)
|
+ [1.5 What does Machi's API look like?](#n1.5)
|
||||||
+ [1.7 What licensing terms are used by Machi?](#n1.7)
|
+ [1.6 What licensing terms are used by Machi?](#n1.6)
|
||||||
+ [1.8 Where can I find the Machi source code and documentation? Can I contribute?](#n1.8)
|
+ [1.7 Where can I find the Machi source code and documentation? Can I contribute?](#n1.7)
|
||||||
+ [1.9 What is Machi's expected release schedule, packaging, and operating system/OS distribution support?](#n1.9)
|
+ [1.8 What is Machi's expected release schedule, packaging, and operating system/OS distribution support?](#n1.8)
|
||||||
+ [2 Questions about Machi relative to {{something else}}](#n2)
|
+ [2 Questions about Machi relative to {{something else}}](#n2)
|
||||||
+ [2.1 How is Machi better than Hadoop?](#n2.1)
|
+ [2.1 How is Machi better than Hadoop?](#n2.1)
|
||||||
+ [2.2 How does Machi differ from HadoopFS/HDFS?](#n2.2)
|
+ [2.2 How does Machi differ from HadoopFS/HDFS?](#n2.2)
|
||||||
|
@ -28,15 +28,13 @@
|
||||||
+ [3 Machi's specifics](#n3)
|
+ [3 Machi's specifics](#n3)
|
||||||
+ [3.1 What technique is used to replicate Machi's files? Can other techniques be used?](#n3.1)
|
+ [3.1 What technique is used to replicate Machi's files? Can other techniques be used?](#n3.1)
|
||||||
+ [3.2 Does Machi have a reliance on a coordination service such as ZooKeeper or etcd?](#n3.2)
|
+ [3.2 Does Machi have a reliance on a coordination service such as ZooKeeper or etcd?](#n3.2)
|
||||||
+ [3.3 Are there any presentations available about Humming Consensus](#n3.3)
|
+ [3.3 Is it true that there's an allegory written to describe humming consensus?](#n3.3)
|
||||||
+ [3.4 Is it true that there's an allegory written to describe Humming Consensus?](#n3.4)
|
+ [3.4 How is Machi tested?](#n3.4)
|
||||||
+ [3.5 How is Machi tested?](#n3.5)
|
+ [3.5 Does Machi require shared disk storage? e.g. iSCSI, NBD (Network Block Device), Fibre Channel disks](#n3.5)
|
||||||
+ [3.6 Does Machi require shared disk storage? e.g. iSCSI, NBD (Network Block Device), Fibre Channel disks](#n3.6)
|
+ [3.6 Does Machi require or assume that servers with large numbers of disks must use RAID-0/1/5/6/10/50/60 to create a single block device?](#n3.6)
|
||||||
+ [3.7 Does Machi require or assume that servers with large numbers of disks must use RAID-0/1/5/6/10/50/60 to create a single block device?](#n3.7)
|
+ [3.7 What language(s) is Machi written in?](#n3.7)
|
||||||
+ [3.8 What language(s) is Machi written in?](#n3.8)
|
+ [3.8 Does Machi use the Erlang/OTP network distribution system (aka "disterl")?](#n3.8)
|
||||||
+ [3.9 Can Machi run on Windows? Can Machi run on 32-bit platforms?](#n3.9)
|
+ [3.9 Can I use HTTP to write/read stuff into/from Machi?](#n3.9)
|
||||||
+ [3.10 Does Machi use the Erlang/OTP network distribution system (aka "disterl")?](#n3.10)
|
|
||||||
+ [3.11 Can I use HTTP to write/read stuff into/from Machi?](#n3.11)
|
|
||||||
|
|
||||||
<!-- ENDOUTLINE -->
|
<!-- ENDOUTLINE -->
|
||||||
|
|
||||||
|
@ -46,13 +44,13 @@
|
||||||
<a name="n1.1">
|
<a name="n1.1">
|
||||||
### 1.1. What is Machi?
|
### 1.1. What is Machi?
|
||||||
|
|
||||||
Very briefly, Machi is a very simple append-only blob/file store.
|
Very briefly, Machi is a very simple append-only file store.
|
||||||
|
|
||||||
Machi is
|
Machi is
|
||||||
"dumber" than many other file stores (i.e., lacking many features
|
"dumber" than many other file stores (i.e., lacking many features
|
||||||
found in other file stores) such as HadoopFS or a simple NFS or CIFS file
|
found in other file stores) such as HadoopFS or simple NFS or CIFS file
|
||||||
server.
|
server.
|
||||||
However, Machi is a distributed blob/file store, which makes it different
|
However, Machi is a distributed file store, which makes it different
|
||||||
(and, in some ways, more complicated) than a simple NFS or CIFS file
|
(and, in some ways, more complicated) than a simple NFS or CIFS file
|
||||||
server.
|
server.
|
||||||
|
|
||||||
|
@ -84,39 +82,45 @@ For a much longer answer, please see the
|
||||||
[Machi high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-machi.pdf).
|
[Machi high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-machi.pdf).
|
||||||
|
|
||||||
<a name="n1.2">
|
<a name="n1.2">
|
||||||
### 1.2. What is a Machi chain?
|
### 1.2. What is a Machi "cluster of clusters"?
|
||||||
|
|
||||||
A Machi chain is a small number of machines that maintain a common set
|
Machi's design is based on using small, well-understood and provable
|
||||||
of replicated files. A typical chain is of length 2 or 3. For
|
(mathematically) techniques to maintain multiple file copies without
|
||||||
critical data that must be available despite several simultaneous
|
data loss or data corruption. At its lowest level, Machi contains no
|
||||||
server failures, a chain length of 6 or 7 might be used.
|
support for distribution/partitioning/sharding of files across many
|
||||||
|
servers. A typical, fully-functional Machi cluster will likely be two
|
||||||
|
or three machines.
|
||||||
|
|
||||||
<a name="n1.3">
|
However, Machi is designed to be an excellent building block for
|
||||||
### 1.3. What is a Machi cluster?
|
building larger systems. A deployment of Machi "cluster of clusters"
|
||||||
|
will use the "random slicing" technique for partitioning files across
|
||||||
|
multiple Machi clusters that, as individuals, are unaware of the
|
||||||
|
larger cluster-of-clusters scheme.
|
||||||
|
|
||||||
A Machi cluster is a collection of Machi chains that
|
The cluster-of-clusters management service will be fully decentralized
|
||||||
partitions/shards/distributes files (based on file name) across the
|
|
||||||
collection of chains. Machi uses the "random slicing" algorithm (a
|
|
||||||
variation of consistent hashing) to define the mapping of file name to
|
|
||||||
chain name.
|
|
||||||
|
|
||||||
The cluster management service will be fully decentralized
|
|
||||||
and run as a separate software service installed on each Machi
|
and run as a separate software service installed on each Machi
|
||||||
cluster. This manager will appear to the local Machi server as simply
|
cluster. This manager will appear to the local Machi server as simply
|
||||||
another Machi file client. The cluster managers will take
|
another Machi file client. The cluster-of-clusters managers will take
|
||||||
care of file migration as the cluster grows and shrinks in capacity
|
care of file migration as the cluster grows and shrinks in capacity
|
||||||
and in response to day-to-day changes in workload.
|
and in response to day-to-day changes in workload.
|
||||||
|
|
||||||
Though the cluster manager has not yet been implemented,
|
Though the cluster-of-clusters manager has not yet been implemented,
|
||||||
its design is fully decentralized and capable of operating despite
|
its design is fully decentralized and capable of operating despite
|
||||||
multiple partial failure of its member chains. We expect this
|
multiple partial failure of its member clusters. We expect this
|
||||||
design to scale easily to at least one thousand servers.
|
design to scale easily to at least one thousand servers.
|
||||||
|
|
||||||
Please see the
|
Please see the
|
||||||
[Machi source repository's 'doc' directory for more details](https://github.com/basho/machi/tree/master/doc/).
|
[Machi source repository's 'doc' directory for more details](https://github.com/basho/machi/tree/master/doc/).
|
||||||
|
|
||||||
<a name="n1.4">
|
<a name="n1.2.1">
|
||||||
### 1.4. What is Machi like when operating in "eventually consistent" mode?
|
#### 1.2.1. This "cluster of clusters" idea needs a better name, don't you agree?
|
||||||
|
|
||||||
|
Yes. Please help us: we are bad at naming things.
|
||||||
|
For proof that naming things is hard, see
|
||||||
|
[http://martinfowler.com/bliki/TwoHardThings.html](http://martinfowler.com/bliki/TwoHardThings.html)
|
||||||
|
|
||||||
|
<a name="n1.3">
|
||||||
|
### 1.3. What is Machi like when operating in "eventually consistent"/"AP mode"?
|
||||||
|
|
||||||
Machi's operating mode dictates how a Machi cluster will react to
|
Machi's operating mode dictates how a Machi cluster will react to
|
||||||
network partitions. A network partition may be caused by:
|
network partitions. A network partition may be caused by:
|
||||||
|
@ -126,30 +130,37 @@ network partitions. A network partition may be caused by:
|
||||||
* An extreme server software "hang" or "pause", e.g. caused by OS
|
* An extreme server software "hang" or "pause", e.g. caused by OS
|
||||||
scheduling problems such as a failing/stuttering disk device.
|
scheduling problems such as a failing/stuttering disk device.
|
||||||
|
|
||||||
The consistency semantics of file operations while in eventual
|
"AP mode" refers to the "A" and "P" properties of the "CAP
|
||||||
consistency mode during and after network partitions are:
|
conjecture", meaning that the cluster will be "Available" and
|
||||||
|
"Partition tolerant".
|
||||||
|
|
||||||
|
The consistency semantics of file operations while in "AP mode" are
|
||||||
|
eventually consistent during and after network partitions:
|
||||||
|
|
||||||
* File write operations are permitted by any client on the "same side"
|
* File write operations are permitted by any client on the "same side"
|
||||||
of the network partition.
|
of the network partition.
|
||||||
* File read operations are successful for any file contents where the
|
* File read operations are successful for any file contents where the
|
||||||
client & server are on the "same side" of the network partition.
|
client & server are on the "same side" of the network partition.
|
||||||
* File read operations will probably fail for any file contents where the
|
|
||||||
client & server are on "different sides" of the network partition.
|
|
||||||
* After the network partition(s) is resolved, files are merged
|
* After the network partition(s) is resolved, files are merged
|
||||||
together from "all sides" of the partition(s).
|
together from "all sides" of the partition(s).
|
||||||
* Unique files are copied in their entirety.
|
* Unique files are copied in their entirety.
|
||||||
* Byte ranges within the same file are merged. This is possible
|
* Byte ranges within the same file are merged. This is possible
|
||||||
due to Machi's restrictions on file naming and file offset
|
due to Machi's restrictions on file naming (files names are
|
||||||
assignment. Both file names and file offsets are always chosen
|
alwoys assigned by Machi servers) and file offset assignments
|
||||||
by Machi servers according to rules which guarantee safe
|
(byte offsets are also always chosen by Machi servers according
|
||||||
mergeability. Server-assigned names are a characteristic of a
|
to rules which guarantee safe mergeability.).
|
||||||
"blob store".
|
|
||||||
|
|
||||||
<a name="n1.5">
|
<a name="n1.4">
|
||||||
### 1.5. What is Machi like when operating in "strongly consistent" mode?
|
### 1.4. What is Machi like when operating in "strongly consistent"/"CP mode"?
|
||||||
|
|
||||||
The consistency semantics of file operations while in strongly
|
Machi's operating mode dictates how a Machi cluster will react to
|
||||||
consistency mode during and after network partitions are:
|
network partitions.
|
||||||
|
"CP mode" refers to the "C" and "P" properties of the "CAP
|
||||||
|
conjecture", meaning that the cluster will be "Consistent" and
|
||||||
|
"Partition tolerant".
|
||||||
|
|
||||||
|
The consistency semantics of file operations while in "CP mode" are
|
||||||
|
strongly consistent during and after network partitions:
|
||||||
|
|
||||||
* File write operations are permitted by any client on the "same side"
|
* File write operations are permitted by any client on the "same side"
|
||||||
of the network partition if and only if a quorum majority of Machi servers
|
of the network partition if and only if a quorum majority of Machi servers
|
||||||
|
@ -164,19 +175,19 @@ consistency mode during and after network partitions are:
|
||||||
|
|
||||||
Machi's design can provide the illusion of quorum minority write
|
Machi's design can provide the illusion of quorum minority write
|
||||||
availability if the cluster is configured to operate with "witness
|
availability if the cluster is configured to operate with "witness
|
||||||
servers". (This feaure partially implemented, as of December 2015.)
|
servers". (This feaure is not implemented yet, as of June 2015.)
|
||||||
See Section 11 of
|
See Section 11 of
|
||||||
[Machi chain manager high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-chain-mgr.pdf)
|
[Machi chain manager high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-chain-mgr.pdf)
|
||||||
for more details.
|
for more details.
|
||||||
|
|
||||||
<a name="n1.6">
|
<a name="n1.5">
|
||||||
### 1.6. What does Machi's API look like?
|
### 1.5. What does Machi's API look like?
|
||||||
|
|
||||||
The Machi API only contains a handful of API operations. The function
|
The Machi API only contains a handful of API operations. The function
|
||||||
arguments shown below (in simplifed form) use Erlang-style type annotations.
|
arguments shown below use Erlang-style type annotations.
|
||||||
|
|
||||||
append_chunk(Prefix:binary(), Chunk:binary(), CheckSum:binary()).
|
append_chunk(Prefix:binary(), Chunk:binary()).
|
||||||
append_chunk_extra(Prefix:binary(), Chunk:binary(), CheckSum:binary(), ExtraSpace:non_neg_integer()).
|
append_chunk_extra(Prefix:binary(), Chunk:binary(), ExtraSpace:non_neg_integer()).
|
||||||
read_chunk(File:binary(), Offset:non_neg_integer(), Size:non_neg_integer()).
|
read_chunk(File:binary(), Offset:non_neg_integer(), Size:non_neg_integer()).
|
||||||
|
|
||||||
checksum_list(File:binary()).
|
checksum_list(File:binary()).
|
||||||
|
@ -194,22 +205,17 @@ Internally, there is a more complex protocol used by individual
|
||||||
cluster members to manage file contents and to repair damaged/missing
|
cluster members to manage file contents and to repair damaged/missing
|
||||||
files. See Figure 3 in
|
files. See Figure 3 in
|
||||||
[Machi high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-machi.pdf)
|
[Machi high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-machi.pdf)
|
||||||
for more description.
|
for more details.
|
||||||
|
|
||||||
The definitions of both the "high level" external protocol and "low
|
<a name="n1.6">
|
||||||
level" internal protocol are in a
|
### 1.6. What licensing terms are used by Machi?
|
||||||
[Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview)
|
|
||||||
definition at [./src/machi.proto](./src/machi.proto).
|
|
||||||
|
|
||||||
<a name="n1.7">
|
|
||||||
### 1.7. What licensing terms are used by Machi?
|
|
||||||
|
|
||||||
All Machi source code and documentation is licensed by
|
All Machi source code and documentation is licensed by
|
||||||
[Basho Technologies, Inc.](http://www.basho.com/)
|
[Basho Technologies, Inc.](http://www.basho.com/)
|
||||||
under the [Apache Public License version 2](https://github.com/basho/machi/tree/master/LICENSE).
|
under the [Apache Public License version 2](https://github.com/basho/machi/tree/master/LICENSE).
|
||||||
|
|
||||||
<a name="n1.8">
|
<a name="n1.7">
|
||||||
### 1.8. Where can I find the Machi source code and documentation? Can I contribute?
|
### 1.7. Where can I find the Machi source code and documentation? Can I contribute?
|
||||||
|
|
||||||
All Machi source code and documentation can be found at GitHub:
|
All Machi source code and documentation can be found at GitHub:
|
||||||
[https://github.com/basho/machi](https://github.com/basho/machi).
|
[https://github.com/basho/machi](https://github.com/basho/machi).
|
||||||
|
@ -223,11 +229,11 @@ ideas for improvement, please see our contributing & collaboration
|
||||||
guidelines at
|
guidelines at
|
||||||
[https://github.com/basho/machi/blob/master/CONTRIBUTING.md](https://github.com/basho/machi/blob/master/CONTRIBUTING.md).
|
[https://github.com/basho/machi/blob/master/CONTRIBUTING.md](https://github.com/basho/machi/blob/master/CONTRIBUTING.md).
|
||||||
|
|
||||||
<a name="n1.9">
|
<a name="n1.8">
|
||||||
### 1.9. What is Machi's expected release schedule, packaging, and operating system/OS distribution support?
|
### 1.8. What is Machi's expected release schedule, packaging, and operating system/OS distribution support?
|
||||||
|
|
||||||
Basho expects that Machi's first major product release will take place
|
Basho expects that Machi's first release will take place near the end
|
||||||
during the 2nd quarter of 2016.
|
of calendar year 2015.
|
||||||
|
|
||||||
Basho's official support for operating systems (e.g. Linux, FreeBSD),
|
Basho's official support for operating systems (e.g. Linux, FreeBSD),
|
||||||
operating system packaging (e.g. CentOS rpm/yum package management,
|
operating system packaging (e.g. CentOS rpm/yum package management,
|
||||||
|
@ -302,15 +308,15 @@ file's writable phase).
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td> Does not have any file distribution/partitioning/sharding across
|
<td> Does not have any file distribution/partitioning/sharding across
|
||||||
Machi chains: in a single Machi chain, all files are replicated by
|
Machi clusters: in a single Machi cluster, all files are replicated by
|
||||||
all servers in the chain. The "random slicing" technique is used
|
all servers in the cluster. The "cluster of clusters" concept is used
|
||||||
to distribute/partition/shard files across multiple Machi clusters.
|
to distribute/partition/shard files across multiple Machi clusters.
|
||||||
<td> File distribution/partitioning/sharding is performed
|
<td> File distribution/partitioning/sharding is performed
|
||||||
automatically by the HDFS "name node".
|
automatically by the HDFS "name node".
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td> Machi requires no central "name node" for single chain use or
|
<td> Machi requires no central "name node" for single cluster use.
|
||||||
for multi-chain cluster use.
|
Machi requires no central "name node" for "cluster of clusters" use
|
||||||
<td> Requires a single "namenode" server to maintain file system contents
|
<td> Requires a single "namenode" server to maintain file system contents
|
||||||
and file content mapping. (May be deployed with a "secondary
|
and file content mapping. (May be deployed with a "secondary
|
||||||
namenode" to reduce unavailability when the primary namenode fails.)
|
namenode" to reduce unavailability when the primary namenode fails.)
|
||||||
|
@ -476,8 +482,8 @@ difficult to adapt to Machi's design goals:
|
||||||
* Both protocols use quorum majority consensus, which requires a
|
* Both protocols use quorum majority consensus, which requires a
|
||||||
minimum of *2F + 1* working servers to tolerate *F* failures. For
|
minimum of *2F + 1* working servers to tolerate *F* failures. For
|
||||||
example, to tolerate 2 server failures, quorum majority protocols
|
example, to tolerate 2 server failures, quorum majority protocols
|
||||||
require a minimum of 5 servers. To tolerate the same number of
|
require a minium of 5 servers. To tolerate the same number of
|
||||||
failures, Chain Replication requires a minimum of only 3 servers.
|
failures, Chain replication requires only 3 servers.
|
||||||
* Machi's use of "humming consensus" to manage internal server
|
* Machi's use of "humming consensus" to manage internal server
|
||||||
metadata state would also (probably) require conversion to Paxos or
|
metadata state would also (probably) require conversion to Paxos or
|
||||||
Raft. (Or "outsourced" to a service such as ZooKeeper.)
|
Raft. (Or "outsourced" to a service such as ZooKeeper.)
|
||||||
|
@ -494,17 +500,7 @@ Humming consensus is described in the
|
||||||
[Machi chain manager high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-chain-mgr.pdf).
|
[Machi chain manager high level design doc](https://github.com/basho/machi/tree/master/doc/high-level-chain-mgr.pdf).
|
||||||
|
|
||||||
<a name="n3.3">
|
<a name="n3.3">
|
||||||
### 3.3. Are there any presentations available about Humming Consensus
|
### 3.3. Is it true that there's an allegory written to describe humming consensus?
|
||||||
|
|
||||||
Scott recently (November 2015) gave a presentation at the
|
|
||||||
[RICON 2015 conference](http://ricon.io) about one of the techniques
|
|
||||||
used by Machi; "Managing Chain Replication Metadata with
|
|
||||||
Humming Consensus" is available online now.
|
|
||||||
* [slides (PDF format)](http://ricon.io/speakers/slides/Scott_Fritchie_Ricon_2015.pdf)
|
|
||||||
* [video](https://www.youtube.com/watch?v=yR5kHL1bu1Q)
|
|
||||||
|
|
||||||
<a name="n3.4">
|
|
||||||
### 3.4. Is it true that there's an allegory written to describe Humming Consensus?
|
|
||||||
|
|
||||||
Yes. In homage to Leslie Lamport's original paper about the Paxos
|
Yes. In homage to Leslie Lamport's original paper about the Paxos
|
||||||
protocol, "The Part-time Parliamant", there is an allegorical story
|
protocol, "The Part-time Parliamant", there is an allegorical story
|
||||||
|
@ -515,8 +511,8 @@ The full story, full of wonder and mystery, is called
|
||||||
There is also a
|
There is also a
|
||||||
[short followup blog posting](http://www.snookles.com/slf-blog/2015/03/20/on-humming-consensus-an-allegory-part-2/).
|
[short followup blog posting](http://www.snookles.com/slf-blog/2015/03/20/on-humming-consensus-an-allegory-part-2/).
|
||||||
|
|
||||||
<a name="n3.5">
|
<a name="n3.4">
|
||||||
### 3.5. How is Machi tested?
|
### 3.4. How is Machi tested?
|
||||||
|
|
||||||
While not formally proven yet, Machi's implementation of Chain
|
While not formally proven yet, Machi's implementation of Chain
|
||||||
Replication and of humming consensus have been extensively tested with
|
Replication and of humming consensus have been extensively tested with
|
||||||
|
@ -541,20 +537,16 @@ change several times during any single test case) and a random series
|
||||||
of cluster operations, an event trace of all cluster activity is used
|
of cluster operations, an event trace of all cluster activity is used
|
||||||
to verify that no safety-critical rules have been violated.
|
to verify that no safety-critical rules have been violated.
|
||||||
|
|
||||||
All test code is available in the [./test](./test) subdirectory.
|
<a name="n3.5">
|
||||||
Modules that use QuickCheck will use a file suffix of `_eqc`, for
|
### 3.5. Does Machi require shared disk storage? e.g. iSCSI, NBD (Network Block Device), Fibre Channel disks
|
||||||
example, [./test/machi_ap_repair_eqc.erl](./test/machi_ap_repair_eqc.erl).
|
|
||||||
|
|
||||||
<a name="n3.6">
|
|
||||||
### 3.6. Does Machi require shared disk storage? e.g. iSCSI, NBD (Network Block Device), Fibre Channel disks
|
|
||||||
|
|
||||||
No, Machi's design assumes that each Machi server is a fully
|
No, Machi's design assumes that each Machi server is a fully
|
||||||
independent hardware and assumes only standard local disks (Winchester
|
independent hardware and assumes only standard local disks (Winchester
|
||||||
and/or SSD style) with local-only interfaces (e.g. SATA, SCSI, PCI) in
|
and/or SSD style) with local-only interfaces (e.g. SATA, SCSI, PCI) in
|
||||||
each machine.
|
each machine.
|
||||||
|
|
||||||
<a name="n3.7">
|
<a name="n3.6">
|
||||||
### 3.7. Does Machi require or assume that servers with large numbers of disks must use RAID-0/1/5/6/10/50/60 to create a single block device?
|
### 3.6. Does Machi require or assume that servers with large numbers of disks must use RAID-0/1/5/6/10/50/60 to create a single block device?
|
||||||
|
|
||||||
No. When used with servers with multiple disks, the intent is to
|
No. When used with servers with multiple disks, the intent is to
|
||||||
deploy multiple Machi servers per machine: one Machi server per disk.
|
deploy multiple Machi servers per machine: one Machi server per disk.
|
||||||
|
@ -572,13 +564,10 @@ deploy multiple Machi servers per machine: one Machi server per disk.
|
||||||
placement relative to 12 servers is smaller than a placement problem
|
placement relative to 12 servers is smaller than a placement problem
|
||||||
of managing 264 seprate disks (if each of 12 servers has 22 disks).
|
of managing 264 seprate disks (if each of 12 servers has 22 disks).
|
||||||
|
|
||||||
<a name="n3.8">
|
<a name="n3.7">
|
||||||
### 3.8. What language(s) is Machi written in?
|
### 3.7. What language(s) is Machi written in?
|
||||||
|
|
||||||
So far, Machi is written in Erlang, mostly. Machi uses at least one
|
So far, Machi is written in 100% Erlang.
|
||||||
library, [ELevelDB](https://github.com/basho/eleveldb), that is
|
|
||||||
implemented both in C++ and in Erlang, using Erlang NIFs (Native
|
|
||||||
Interface Functions) to allow Erlang code to call C++ functions.
|
|
||||||
|
|
||||||
In the event that we encounter a performance problem that cannot be
|
In the event that we encounter a performance problem that cannot be
|
||||||
solved within the Erlang/OTP runtime environment, all of Machi's
|
solved within the Erlang/OTP runtime environment, all of Machi's
|
||||||
|
@ -587,16 +576,8 @@ in C, Java, or other "gotta go fast fast FAST!!" programming
|
||||||
language. We expect that the Chain Replication manager and other
|
language. We expect that the Chain Replication manager and other
|
||||||
critical "control plane" software will remain in Erlang.
|
critical "control plane" software will remain in Erlang.
|
||||||
|
|
||||||
<a name="n3.9">
|
<a name="n3.8">
|
||||||
### 3.9. Can Machi run on Windows? Can Machi run on 32-bit platforms?
|
### 3.8. Does Machi use the Erlang/OTP network distribution system (aka "disterl")?
|
||||||
|
|
||||||
The ELevelDB NIF does not compile or run correctly on Erlang/OTP
|
|
||||||
Windows platforms, nor does it compile correctly on 32-bit platforms.
|
|
||||||
Machi should support all 64-bit UNIX-like platforms that are supported
|
|
||||||
by Erlang/OTP and ELevelDB.
|
|
||||||
|
|
||||||
<a name="n3.10">
|
|
||||||
### 3.10. Does Machi use the Erlang/OTP network distribution system (aka "disterl")?
|
|
||||||
|
|
||||||
No, Machi doesn't use Erlang/OTP's built-in distributed message
|
No, Machi doesn't use Erlang/OTP's built-in distributed message
|
||||||
passing system. The code would be *much* simpler if we did use
|
passing system. The code would be *much* simpler if we did use
|
||||||
|
@ -607,16 +588,19 @@ bit-twiddling magicSPEED ... without also having to find a replacement
|
||||||
for disterl. (Or without having to re-invent disterl's features in
|
for disterl. (Or without having to re-invent disterl's features in
|
||||||
another language.)
|
another language.)
|
||||||
|
|
||||||
All wire protocols used by Machi are defined & implemented using
|
<a name="artisanal-protocol">
|
||||||
[Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview).
|
In the first drafts of the Machi code, the inter-node communication
|
||||||
The definition file can be found at [./src/machi.proto](./src/machi.proto).
|
uses a hand-crafted, artisanal, mostly ASCII protocol as part of a
|
||||||
|
"demo day" quick & dirty prototype. Work is underway (summer of 2015)
|
||||||
|
to replace that protocol gradually with a well-structured,
|
||||||
|
well-documented protocol based on Protocol Buffers data serialization.
|
||||||
|
|
||||||
<a name="n3.11">
|
<a name="n3.9">
|
||||||
### 3.11. Can I use HTTP to write/read stuff into/from Machi?
|
### 3.9. Can I use HTTP to write/read stuff into/from Machi?
|
||||||
|
|
||||||
Short answer: No, not yet.
|
Yes, sort of. For as long as the legacy of
|
||||||
|
Machi's first internal protocol & code still
|
||||||
Longer answer: No, but it was possible as a hack, many months ago, see
|
survives, it's possible to use a
|
||||||
[primitive/hack'y HTTP interface that is described in this source code commit log](https://github.com/basho/machi/commit/6cebf397232cba8e63c5c9a0a8c02ba391b20fef).
|
[primitive/hack'y HTTP interface that is described in this source code commit log](https://github.com/basho/machi/commit/6cebf397232cba8e63c5c9a0a8c02ba391b20fef).
|
||||||
Please note that commit `6cebf397232cba8e63c5c9a0a8c02ba391b20fef` is
|
Please note that commit `6cebf397232cba8e63c5c9a0a8c02ba391b20fef` is
|
||||||
required to try using this feature: the code has since bit-rotted and
|
required to try using this feature: the code has since bit-rotted and
|
||||||
|
|
108
Makefile
108
Makefile
|
@ -1,94 +1,54 @@
|
||||||
REPO ?= machi
|
REBAR_BIN := $(shell which rebar)
|
||||||
PKG_REVISION ?= $(shell git describe --tags)
|
ifeq ($(REBAR_BIN),)
|
||||||
PKG_BUILD = 1
|
REBAR_BIN = ./rebar
|
||||||
BASE_DIR = $(shell pwd)
|
|
||||||
ERLANG_BIN = $(shell dirname $(shell which erl))
|
|
||||||
REBAR := $(shell which rebar)
|
|
||||||
ifeq ($(REBAR),)
|
|
||||||
REBAR = $(BASE_DIR)/rebar
|
|
||||||
endif
|
endif
|
||||||
OVERLAY_VARS ?=
|
|
||||||
EUNIT_OPTS = -v
|
|
||||||
|
|
||||||
.PHONY: rel stagedevrel deps package pkgclean edoc
|
.PHONY: rel deps package pkgclean edoc
|
||||||
|
|
||||||
all: deps compile
|
all: deps compile
|
||||||
|
|
||||||
compile:
|
compile:
|
||||||
$(REBAR) compile
|
$(REBAR_BIN) compile
|
||||||
|
|
||||||
## Make reltool happy by creating a fake entry in the deps dir for
|
|
||||||
## machi, because reltool really wants to have a path with
|
|
||||||
## "machi/ebin" at the end, but we also don't want infinite recursion
|
|
||||||
## if we just symlink "deps/machi" -> ".."
|
|
||||||
|
|
||||||
generate:
|
|
||||||
rm -rf deps/machi
|
|
||||||
mkdir deps/machi
|
|
||||||
ln -s ../../ebin deps/machi
|
|
||||||
ln -s ../../src deps/machi
|
|
||||||
$(REBAR) generate $(OVERLAY_VARS) 2>&1 | grep -v 'command does not apply to directory'
|
|
||||||
|
|
||||||
deps:
|
deps:
|
||||||
$(REBAR) get-deps
|
$(REBAR_BIN) get-deps
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(REBAR) -r clean
|
$(REBAR_BIN) -r clean
|
||||||
|
|
||||||
|
test: deps compile eunit
|
||||||
|
|
||||||
|
eunit:
|
||||||
|
$(REBAR_BIN) -v skip_deps=true eunit
|
||||||
|
|
||||||
edoc: edoc-clean
|
edoc: edoc-clean
|
||||||
$(REBAR) skip_deps=true doc
|
$(REBAR_BIN) skip_deps=true doc
|
||||||
|
|
||||||
edoc-clean:
|
edoc-clean:
|
||||||
rm -f edoc/*.png edoc/*.html edoc/*.css edoc/edoc-info
|
rm -f edoc/*.png edoc/*.html edoc/*.css edoc/edoc-info
|
||||||
|
|
||||||
pulse: compile
|
pulse: compile
|
||||||
@echo Sorry, PULSE test needs maintenance. -SLF
|
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
|
||||||
#env USE_PULSE=1 $(REBAR) skip_deps=true clean compile
|
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit -v
|
||||||
#env USE_PULSE=1 $(REBAR) skip_deps=true -D PULSE eunit -v
|
|
||||||
|
|
||||||
##
|
APPS = kernel stdlib sasl erts ssl compiler eunit crypto
|
||||||
## Release targets
|
|
||||||
##
|
|
||||||
rel: deps compile generate
|
|
||||||
|
|
||||||
relclean:
|
|
||||||
rm -rf rel/$(REPO)
|
|
||||||
|
|
||||||
stage : rel
|
|
||||||
$(foreach dep,$(wildcard deps/*), rm -rf rel/$(REPO)/lib/$(shell basename $(dep))* && ln -sf $(abspath $(dep)) rel/$(REPO)/lib;)
|
|
||||||
|
|
||||||
##
|
|
||||||
## Developer targets
|
|
||||||
##
|
|
||||||
## devN - Make a dev build for node N
|
|
||||||
## stagedevN - Make a stage dev build for node N (symlink libraries)
|
|
||||||
## devrel - Make a dev build for 1..$DEVNODES
|
|
||||||
## stagedevrel Make a stagedev build for 1..$DEVNODES
|
|
||||||
##
|
|
||||||
## Example, make a 68 node devrel cluster
|
|
||||||
## make stagedevrel DEVNODES=68
|
|
||||||
|
|
||||||
.PHONY : stagedevrel devrel
|
|
||||||
DEVNODES ?= 3
|
|
||||||
|
|
||||||
# 'seq' is not available on all *BSD, so using an alternate in awk
|
|
||||||
SEQ = $(shell awk 'BEGIN { for (i = 1; i < '$(DEVNODES)'; i++) printf("%i ", i); print i ;exit(0);}')
|
|
||||||
|
|
||||||
$(eval stagedevrel : $(foreach n,$(SEQ),stagedev$(n)))
|
|
||||||
$(eval devrel : $(foreach n,$(SEQ),dev$(n)))
|
|
||||||
|
|
||||||
dev% : all
|
|
||||||
mkdir -p dev
|
|
||||||
rel/gen_dev $@ rel/vars/dev_vars.config.src rel/vars/$@_vars.config
|
|
||||||
(cd rel && ../rebar generate target_dir=../dev/$@ overlay_vars=vars/$@_vars.config)
|
|
||||||
|
|
||||||
stagedev% : dev%
|
|
||||||
$(foreach dep,$(wildcard deps/*), rm -rf dev/$^/lib/$(shell basename $(dep))* && ln -sf $(abspath $(dep)) dev/$^/lib;)
|
|
||||||
|
|
||||||
devclean: clean
|
|
||||||
rm -rf dev
|
|
||||||
|
|
||||||
DIALYZER_APPS = kernel stdlib sasl erts ssl compiler eunit crypto public_key syntax_tools
|
|
||||||
PLT = $(HOME)/.machi_dialyzer_plt
|
PLT = $(HOME)/.machi_dialyzer_plt
|
||||||
|
|
||||||
include tools.mk
|
build_plt: deps compile
|
||||||
|
dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin
|
||||||
|
|
||||||
|
DIALYZER_DEP_APPS = ebin/machi_pb.beam deps/protobuffs/ebin
|
||||||
|
DIALYZER_FLAGS = -Wno_return -Wrace_conditions -Wunderspecs
|
||||||
|
|
||||||
|
dialyzer: deps compile
|
||||||
|
dialyzer $(DIALYZER_FLAGS) --plt $(PLT) ebin $(DIALYZER_DEP_APPS) | \
|
||||||
|
egrep -v -f ./filter-dialyzer-dep-warnings
|
||||||
|
|
||||||
|
dialyzer-test: deps compile
|
||||||
|
echo Force rebar to recompile .eunit dir w/o running tests > /dev/null
|
||||||
|
rebar skip_deps=true eunit suite=lamport_clock
|
||||||
|
dialyzer $(DIALYZER_FLAGS) --plt $(PLT) .eunit $(DIALYZER_DEP_APPS) | \
|
||||||
|
egrep -v -f ./filter-dialyzer-dep-warnings
|
||||||
|
|
||||||
|
clean_plt:
|
||||||
|
rm $(PLT)
|
||||||
|
|
227
README.md
227
README.md
|
@ -1,136 +1,59 @@
|
||||||
# Machi: a distributed, decentralized blob/large file store
|
# Machi
|
||||||
|
|
||||||
[Travis-CI](http://travis-ci.org/basho/machi) :: ![Travis-CI](https://secure.travis-ci.org/basho/machi.png)
|
Our goal is a robust & reliable, distributed, highly available(*),
|
||||||
|
large file store based upon write-once registers, append-only files,
|
||||||
|
Chain Replication, and client-server style architecture. All members
|
||||||
|
of the cluster store all of the files. Distributed load
|
||||||
|
balancing/sharding of files is __outside__ of the scope of this
|
||||||
|
system. However, it is a high priority that this system be able to
|
||||||
|
integrate easily into systems that do provide distributed load
|
||||||
|
balancing, e.g., Riak Core. Although strong consistency is a major
|
||||||
|
feature of Chain Replication, first use cases will focus mainly on
|
||||||
|
eventual consistency features --- strong consistency design will be
|
||||||
|
discussed in a separate design document (read more below).
|
||||||
|
|
||||||
Outline
|
The ability for Machi to maintain strong consistency will make it
|
||||||
|
attractive as a toolkit for building things like CORFU and Tango as
|
||||||
|
well as better-known open source software such as Kafka's file
|
||||||
|
replication. (See the bibliography of the [Machi high level design
|
||||||
|
doc](./doc/high-level-machi.pdf) for further references.)
|
||||||
|
|
||||||
1. [Why another blob/file store?](#sec1)
|
(*) Capable of operating in "AP mode" or "CP mode" relative to the
|
||||||
2. [Where to learn more about Machi](#sec2)
|
CAP Theorem.
|
||||||
3. [Development status summary](#sec3)
|
|
||||||
4. [Contributing to Machi's development](#sec4)
|
|
||||||
|
|
||||||
<a name="sec1">
|
## Status: mid-June 2015: work is underway
|
||||||
## 1. Why another blob/file store?
|
|
||||||
|
|
||||||
Our goal is a robust & reliable, distributed, highly available, large
|
The two major design documents for Machi are now ready or nearly ready
|
||||||
file and blob store. Such stores already exist, both in the open source world
|
for internal Basho and external party review. Please see the
|
||||||
and in the commercial world. Why reinvent the wheel? We believe
|
[doc](./doc) directory's [README](./doc) for details
|
||||||
there are three reasons, ordered by decreasing rarity.
|
|
||||||
|
|
||||||
1. We want end-to-end checksums for all file data, from the initial
|
* Machi high level design
|
||||||
file writer to every file reader, anywhere, all the time.
|
* Machi chain self-management design
|
||||||
2. We need flexibility to trade consistency for availability:
|
|
||||||
e.g. weak consistency in exchange for being available in cases
|
|
||||||
of partial system failure.
|
|
||||||
3. We want to manage file replicas in a way that's provably correct
|
|
||||||
and also easy to test.
|
|
||||||
|
|
||||||
Criteria #3 is difficult to find in the open source world but perhaps
|
The work of implementing first draft of Machi is now underway. The
|
||||||
not impossible.
|
code from the [prototype/demo-day-hack](prototype/demo-day-hack/) directory is
|
||||||
|
being used as the initial scaffolding.
|
||||||
|
|
||||||
If we have app use cases where availability is more important than
|
* The chain manager is ready for "AP mode" use in eventual
|
||||||
consistency, then systems that meet criteria #2 are also rare.
|
consistency use cases.
|
||||||
Most file stores provide only strong consistency and therefore
|
|
||||||
have unavoidable, unavailable behavior when parts of the system
|
|
||||||
fail.
|
|
||||||
What if we want a file store that is always available to write new
|
|
||||||
file data and attempts best-effort file reads?
|
|
||||||
|
|
||||||
If we really do care about data loss and/or data corruption, then we
|
* All Machi client/server protocols are based on
|
||||||
really want both #3 and #1. Unfortunately, systems that meet
|
[Protocol Buffers](https://developers.google.com/protocol-buffers/docs/overview).
|
||||||
criteria #1 are _very rare_. (Nonexistant?)
|
* The current specification for Machi's protocols can be found at
|
||||||
Why? This is 2015. We have decades of research that shows
|
[https://github.com/basho/machi/blob/master/src/machi.proto](https://github.com/basho/machi/blob/master/src/machi.proto).
|
||||||
that computer hardware can (and
|
* The Machi PB protocol is not yet stable. Expect change!
|
||||||
indeed does) corrupt data at nearly every level of the modern
|
* The Erlang language client implementation of the high-level
|
||||||
client/server application stack. Systems with end-to-end data
|
protocol flavor is very brittle (e.g., very little error
|
||||||
corruption detection should be ubiquitous today. Alas, they are not.
|
handling yet).
|
||||||
|
* The Erlang language client implementation of the low-level
|
||||||
|
protocol flavor are still a work-in-progress ... but they are
|
||||||
|
more robust than the high-level library's implementation.
|
||||||
|
|
||||||
Machi is an effort to change the deplorable state of the world, one
|
If you'd like to work on a protocol such as Thrift, UBF,
|
||||||
Erlang function at a time.
|
msgpack over UDP, or some other protocol, let us know by
|
||||||
|
[opening an issue](./issues/new) to discuss it.
|
||||||
|
|
||||||
<a name="sec2">
|
## Contributing to Machi: source code, documentation, etc.
|
||||||
## 2. Where to learn more about Machi
|
|
||||||
|
|
||||||
The two major design documents for Machi are now mostly stable.
|
|
||||||
Please see the [doc](./doc) directory's [README](./doc) for details.
|
|
||||||
|
|
||||||
We also have a
|
|
||||||
[Frequently Asked Questions (FAQ) list](./FAQ.md).
|
|
||||||
|
|
||||||
Scott recently (November 2015) gave a presentation at the
|
|
||||||
[RICON 2015 conference](http://ricon.io) about one of the techniques
|
|
||||||
used by Machi; "Managing Chain Replication Metadata with
|
|
||||||
Humming Consensus" is available online now.
|
|
||||||
* [slides (PDF format)](http://ricon.io/speakers/slides/Scott_Fritchie_Ricon_2015.pdf)
|
|
||||||
* [video](https://www.youtube.com/watch?v=yR5kHL1bu1Q)
|
|
||||||
|
|
||||||
See later in this document for how to run the Humming Consensus demos,
|
|
||||||
including the network partition simulator.
|
|
||||||
|
|
||||||
<a name="sec3">
|
|
||||||
## 3. Development status summary
|
|
||||||
|
|
||||||
Mid-March 2016: The Machi development team has been downsized in
|
|
||||||
recent months, and the pace of development has slowed. Here is a
|
|
||||||
summary of the status of Machi's major components.
|
|
||||||
|
|
||||||
* Humming Consensus and the chain manager
|
|
||||||
* No new safety bugs have been found by model-checking tests.
|
|
||||||
* A new document,
|
|
||||||
[Hands-on experiments with Machi and Humming Consensus](doc/humming-consensus-demo.md)
|
|
||||||
is now available. It is a tutorial for setting up a 3 virtual
|
|
||||||
machine Machi cluster and how to demonstrate the chain manager's
|
|
||||||
reactions to server stops & starts, crashes & restarts, and pauses
|
|
||||||
(simulated by `SIGSTOP` and `SIGCONT`).
|
|
||||||
* The chain manager can still make suboptimal-but-safe choices for
|
|
||||||
chain transitions when a server hangs/pauses temporarily.
|
|
||||||
* Recent chain manager changes have made the instability window
|
|
||||||
much shorter when the slow/paused server resumes execution.
|
|
||||||
* Scott believes that a modest change to the chain manager's
|
|
||||||
calculation of a new projection can reduce flapping in this (and
|
|
||||||
many other cases) less likely. Currently, the new local
|
|
||||||
projection is calculated using only local state (i.e., the chain
|
|
||||||
manager's internal state + the fitness server's state).
|
|
||||||
However, if the "latest" projection read from the public
|
|
||||||
projection stores were also input to the new projection
|
|
||||||
calculation function, then many obviously bad projections can be
|
|
||||||
avoided without needing rounds of Humming Consensus to
|
|
||||||
demonstrate that a bad projection is bad.
|
|
||||||
|
|
||||||
* FLU/data server process
|
|
||||||
* All known correctness bugs have been fixed.
|
|
||||||
* Performance has not yet been measured. Performance measurement
|
|
||||||
and enhancements are scheduled to start in the middle of March 2016.
|
|
||||||
(This will include a much-needed update to the `basho_bench` driver.)
|
|
||||||
|
|
||||||
* Access protocols and client libraries
|
|
||||||
* The protocol used by both external clients and internally (instead
|
|
||||||
of using Erlang's native message passing mechanisms) is based on
|
|
||||||
Protocol Buffers.
|
|
||||||
* (Machi PB protocol specification: ./src/machi.proto)[./src/machi.proto]
|
|
||||||
* At the moment, the PB specification contains two protocols.
|
|
||||||
Sometime in the near future, the spec will be split to separate
|
|
||||||
the external client API (the "high" protocol) from the internal
|
|
||||||
communication API (the "low" protocol).
|
|
||||||
|
|
||||||
* Recent conference talks about Machi
|
|
||||||
* Erlang Factory San Francisco 2016
|
|
||||||
[the slides and video recording](http://www.erlang-factory.com/sfbay2016/scott-lystig-fritchie)
|
|
||||||
will be available a few weeks after the conference ends on March
|
|
||||||
11, 2016.
|
|
||||||
* Ricon 2015
|
|
||||||
* [The slides](http://ricon.io/archive/2015/slides/Scott_Fritchie_Ricon_2015.pdf)
|
|
||||||
* and the [video recording](https://www.youtube.com/watch?v=yR5kHL1bu1Q&index=13&list=PL9Jh2HsAWHxIc7Tt2M6xez_TOP21GBH6M)
|
|
||||||
are now available.
|
|
||||||
* If you would like to run the Humming Consensus code (with or without
|
|
||||||
the network partition simulator) as described in the RICON 2015
|
|
||||||
presentation, please see the
|
|
||||||
[Humming Consensus demo doc](./doc/humming_consensus_demo.md).
|
|
||||||
|
|
||||||
<a name="sec4">
|
|
||||||
## 4. Contributing to Machi's development
|
|
||||||
|
|
||||||
### 4.1 License
|
|
||||||
|
|
||||||
Basho Technologies, Inc. as committed to licensing all work for Machi
|
Basho Technologies, Inc. as committed to licensing all work for Machi
|
||||||
under the
|
under the
|
||||||
|
@ -146,29 +69,57 @@ We invite all contributors to review the
|
||||||
[CONTRIBUTING.md](./CONTRIBUTING.md) document for guidelines for
|
[CONTRIBUTING.md](./CONTRIBUTING.md) document for guidelines for
|
||||||
working with the Basho development team.
|
working with the Basho development team.
|
||||||
|
|
||||||
### 4.2 Development environment requirements
|
## A brief survey of this directories in this repository
|
||||||
|
|
||||||
|
* A list of Frequently Asked Questions, a.k.a.
|
||||||
|
[the Machi FAQ](./FAQ.md).
|
||||||
|
|
||||||
|
* The [doc](./doc/) directory: home for major documents about Machi:
|
||||||
|
high level design documents as well as exploration of features still
|
||||||
|
under design & review within Basho.
|
||||||
|
|
||||||
|
* The `ebin` directory: used for compiled application code
|
||||||
|
|
||||||
|
* The `include`, `src`, and `test` directories: contain the header
|
||||||
|
files, source files, and test code for Machi, respectively.
|
||||||
|
|
||||||
|
* The [prototype](./prototype/) directory: contains proof of concept
|
||||||
|
code, scaffolding libraries, and other exploratory code. Curious
|
||||||
|
readers should see the [prototype/README.md](./prototype/README.md)
|
||||||
|
file for more explanation of the small sub-projects found here.
|
||||||
|
|
||||||
|
## Development environment requirements
|
||||||
|
|
||||||
All development to date has been done with Erlang/OTP version 17 on OS
|
All development to date has been done with Erlang/OTP version 17 on OS
|
||||||
X. The only known limitations for using R16 are minor type
|
X. The only known limitations for using R16 are minor type
|
||||||
specification difference between R16 and 17, but we strongly suggest
|
specification difference between R16 and 17, but we strongly suggest
|
||||||
continuing development using version 17.
|
continuing development using version 17.
|
||||||
|
|
||||||
We also assume that you have the standard UNIX/Linux developer
|
We also assume that you have the standard UNIX/Linux developers
|
||||||
tool chain for C and C++ applications. Also, we assume
|
tool chain for C and C++ applications. Specifically, we assume `make`
|
||||||
that Git and GNU Make are available.
|
is available. The utility used to compile the Machi source code,
|
||||||
The utility used to compile the Machi source code,
|
|
||||||
`rebar`, is pre-compiled and included in the repo.
|
`rebar`, is pre-compiled and included in the repo.
|
||||||
For more details, please see the
|
|
||||||
[Machi development environment prerequisites doc](./doc/dev-prerequisites.md).
|
|
||||||
|
|
||||||
Machi has a dependency on the
|
There are no known OS limits at this time: any platform that supports
|
||||||
[ELevelDB](https://github.com/basho/eleveldb) library. ELevelDB only
|
Erlang/OTP should be sufficient for Machi. This may change over time
|
||||||
supports UNIX/Linux OSes and 64-bit versions of Erlang/OTP only; we
|
(e.g., adding NIFs which can make full portability to Windows OTP
|
||||||
apologize to Windows-based and 32-bit-based Erlang developers for this
|
environments difficult), but it hasn't happened yet.
|
||||||
restriction.
|
|
||||||
|
|
||||||
### 4.3 New protocols and features
|
## Contributions
|
||||||
|
|
||||||
|
Basho encourages contributions to Riak from the community. Here’s how
|
||||||
|
to get started.
|
||||||
|
|
||||||
|
* Fork the appropriate sub-projects that are affected by your change.
|
||||||
|
* Create a topic branch for your change and checkout that branch.
|
||||||
|
git checkout -b some-topic-branch
|
||||||
|
* Make your changes and run the test suite if one is provided. (see below)
|
||||||
|
* Commit your changes and push them to your fork.
|
||||||
|
* Open pull-requests for the appropriate projects.
|
||||||
|
* Contributors will review your pull request, suggest changes, and merge it when it’s ready and/or offer feedback.
|
||||||
|
* To report a bug or issue, please open a new issue against this repository.
|
||||||
|
|
||||||
|
-The Machi team at Basho,
|
||||||
|
[Scott Lystig Fritchie](mailto:scott@basho.com), technical lead, and
|
||||||
|
[Matt Brender](mailto:mbrender@basho.com), your developer advocate.
|
||||||
|
|
||||||
If you'd like to work on a protocol such as Thrift, UBF,
|
|
||||||
msgpack over UDP, or some other protocol, let us know by
|
|
||||||
[opening an issue to discuss it](./issues/new).
|
|
||||||
|
|
|
@ -55,60 +55,3 @@ func, and pattern match Erlang style in that func.
|
||||||
|
|
||||||
** TODO Move the FLU server to gen_server behavior?
|
** TODO Move the FLU server to gen_server behavior?
|
||||||
|
|
||||||
|
|
||||||
* DONE Chain manager CP mode, Plan B
|
|
||||||
** SKIP Maybe? Change ch_mgr to use middleworker
|
|
||||||
**** DONE Is it worthwhile? Is the parallelism so important? No, probably.
|
|
||||||
**** SKIP Move middleworker func to utility module?
|
|
||||||
** DONE Add new proc to psup group
|
|
||||||
*** DONE Name: machi_fitness
|
|
||||||
** DONE ch_mgr keeps its current proc struct: i.e. same 1 proc as today
|
|
||||||
** NO chmgr asks hosed mgr for hosed list @ start of react_to_env
|
|
||||||
** DONE For all hosed, do *async*: try to read latest proj.
|
|
||||||
*** NO If OK, inform hosed mgr: status change will be used by next HC iter.
|
|
||||||
*** NO If fail, no change, because that server is already known to be hosed
|
|
||||||
*** DONE For all non-hosed, continue as the chain manager code does today
|
|
||||||
*** DONE Any new errors are added to UpNodes/DownNodes tracking as used today
|
|
||||||
*** DONE At end of react loop, if UpNodes list differs, inform hosed mgr.
|
|
||||||
|
|
||||||
* DONE fitness_mon, the fitness monitor
|
|
||||||
** DONE Map key & val sketch
|
|
||||||
|
|
||||||
Logical sketch:
|
|
||||||
|
|
||||||
Map key: ObservingServerName::atom()
|
|
||||||
|
|
||||||
Map val: { ObservingServerLastModTime::now(),
|
|
||||||
UnfitList::list(ServerName::atom()),
|
|
||||||
AdminDownList::list(ServerName::atom()),
|
|
||||||
Props::proplist() }
|
|
||||||
|
|
||||||
Implementation sketch:
|
|
||||||
|
|
||||||
1. Use CRDT map.
|
|
||||||
2. If map key is not atom, then atom->string or atom->binary is fine.
|
|
||||||
3. For map value, is it possible CRDT LWW type?
|
|
||||||
|
|
||||||
** DONE Investigate riak_dt data structure definition, manipulating, etc.
|
|
||||||
** DONE Add dependency on riak_dt
|
|
||||||
** DONE Update is an entire dict from Observer O
|
|
||||||
*** DONE Merge my pending map + update map + my last mod time + my unfit list
|
|
||||||
*** DONE if merged /= pending:
|
|
||||||
**** DONE Schedule async tick (more)
|
|
||||||
|
|
||||||
Tick message contains list of servers with differing state as of this
|
|
||||||
instant in time... we want to avoid triggering decisions about
|
|
||||||
fitness/unfitness for other servers where we might have received less
|
|
||||||
than a full time period's worth of waiting.
|
|
||||||
|
|
||||||
**** DONE Spam merged map to All_list -- [Me]
|
|
||||||
**** DONE Set pending <- merged
|
|
||||||
|
|
||||||
*** DONE When we receive an async tick
|
|
||||||
**** DONE set active map <- pending map for all servers in ticks list
|
|
||||||
**** DONE Send ch_mgr a react_to_env tick trigger
|
|
||||||
*** DONE react_to_env tick trigger actions
|
|
||||||
**** DONE Filter active map to remove stale entries (i.e. no update in 1 hour)
|
|
||||||
**** DONE If time since last map spam is too long, spam our *pending* map
|
|
||||||
**** DONE Proceed with normal react processing, using *active* map for AllHosed!
|
|
||||||
|
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
### The auto-generated code of machi_pb.beam has some complaints, not fixed yet.
|
|
||||||
machi_pb.erl:0:
|
|
||||||
##################################################
|
|
||||||
######## Specific types #####################
|
|
||||||
##################################################
|
|
||||||
Unknown types:
|
|
||||||
basho_bench_config:get/2
|
|
||||||
machi_partition_simulator:get/1
|
|
||||||
hamcrest:matchspec/0
|
|
||||||
##################################################
|
|
||||||
######## Specific messages #####################
|
|
||||||
##################################################
|
|
||||||
machi_chain_manager1.erl:2473: The created fun has no local return
|
|
||||||
machi_chain_manager1.erl:2184: The pattern <_P1, P2, Else = {'expected_author2', UPI1_tail, _}> can never match the type <#projection_v1{epoch_number::'undefined' | non_neg_integer(),epoch_csum::'undefined' | binary(),author_server::atom(),chain_name::atom(),all_members::'undefined' | [atom()],witnesses::[atom()],creation_time::'undefined' | {non_neg_integer(),non_neg_integer(),non_neg_integer()},mode::'ap_mode' | 'cp_mode',upi::'undefined' | [atom()],repairing::'undefined' | [atom()],down::'undefined' | [atom()],dbg::'undefined' | [any()],dbg2::'undefined' | [any()],members_dict::'undefined' | [{_,_}]},#projection_v1{epoch_number::'undefined' | non_neg_integer(),epoch_csum::binary(),author_server::atom(),chain_name::atom(),all_members::'undefined' | [atom()],witnesses::[atom()],creation_time::'undefined' | {non_neg_integer(),non_neg_integer(),non_neg_integer()},mode::'ap_mode' | 'cp_mode',upi::'undefined' | [atom()],repairing::'undefined' | [atom()],down::'undefined' | [atom()],dbg::'undefined' | [any()],dbg2::'undefined' | [any()],members_dict::'undefined' | [{_,_}]},'true'>
|
|
||||||
machi_chain_manager1.erl:2233: The pattern <_P1 = {'projection_v1', _, _, _, _, _, _, _, 'cp_mode', UPI1, Repairing1, _, _, _, _}, _P2 = {'projection_v1', _, _, _, _, _, _, _, 'cp_mode', UPI2, Repairing2, _, _, _, _}, Else = {'epoch_not_si', EpochX, 'not_gt', EpochY}> can never match the type <#projection_v1{epoch_number::'undefined' | non_neg_integer(),epoch_csum::'undefined' | binary(),author_server::atom(),chain_name::atom(),all_members::'undefined' | [atom()],witnesses::[atom()],creation_time::'undefined' | {non_neg_integer(),non_neg_integer(),non_neg_integer()},mode::'ap_mode' | 'cp_mode',upi::'undefined' | [atom()],repairing::'undefined' | [atom()],down::'undefined' | [atom()],dbg::'undefined' | [any()],dbg2::'undefined' | [any()],members_dict::'undefined' | [{_,_}]},#projection_v1{epoch_number::'undefined' | non_neg_integer(),epoch_csum::binary(),author_server::atom(),chain_name::atom(),all_members::'undefined' | [atom()],witnesses::[atom()],creation_time::'undefined' | {non_neg_integer(),non_neg_integer(),non_neg_integer()},mode::'ap_mode' | 'cp_mode',upi::'undefined' | [atom()],repairing::'undefined' | [atom()],down::'undefined' | [atom()],dbg::'undefined' | [any()],dbg2::'undefined' | [any()],members_dict::'undefined' | [{_,_}]},'true'>
|
|
|
@ -6,6 +6,20 @@ Erlang documentation, please use this link:
|
||||||
|
|
||||||
## Documents in this directory
|
## Documents in this directory
|
||||||
|
|
||||||
|
### chain-self-management-sketch.org
|
||||||
|
|
||||||
|
[chain-self-management-sketch.org](chain-self-management-sketch.org)
|
||||||
|
is a mostly-deprecated draft of
|
||||||
|
an introduction to the
|
||||||
|
self-management algorithm proposed for Machi. Most material has been
|
||||||
|
moved to the [high-level-chain-mgr.pdf](high-level-chain-mgr.pdf) document.
|
||||||
|
|
||||||
|
### cluster-of-clusters (directory)
|
||||||
|
|
||||||
|
This directory contains the sketch of the "cluster of clusters" design
|
||||||
|
strawman for partitioning/distributing/sharding files across a large
|
||||||
|
number of independent Machi clusters.
|
||||||
|
|
||||||
### high-level-machi.pdf
|
### high-level-machi.pdf
|
||||||
|
|
||||||
[high-level-machi.pdf](high-level-machi.pdf)
|
[high-level-machi.pdf](high-level-machi.pdf)
|
||||||
|
@ -36,9 +50,9 @@ introduction to the Humming Consensus algorithm. Its abstract:
|
||||||
> of file updates to all replica servers in a Machi cluster. Chain
|
> of file updates to all replica servers in a Machi cluster. Chain
|
||||||
> Replication is a variation of primary/backup replication where the
|
> Replication is a variation of primary/backup replication where the
|
||||||
> order of updates between the primary server and each of the backup
|
> order of updates between the primary server and each of the backup
|
||||||
> servers is strictly ordered into a single "chain". Management of
|
> servers is strictly ordered into a single ``chain''. Management of
|
||||||
> Chain Replication's metadata, e.g., "What is the current order of
|
> Chain Replication's metadata, e.g., ``What is the current order of
|
||||||
> servers in the chain?", remains an open research problem. The
|
> servers in the chain?'', remains an open research problem. The
|
||||||
> current state of the art for Chain Replication metadata management
|
> current state of the art for Chain Replication metadata management
|
||||||
> relies on an external oracle (e.g., ZooKeeper) or the Elastic
|
> relies on an external oracle (e.g., ZooKeeper) or the Elastic
|
||||||
> Replication algorithm.
|
> Replication algorithm.
|
||||||
|
@ -46,7 +60,7 @@ introduction to the Humming Consensus algorithm. Its abstract:
|
||||||
> This document describes the Machi chain manager, the component
|
> This document describes the Machi chain manager, the component
|
||||||
> responsible for managing Chain Replication metadata state. The chain
|
> responsible for managing Chain Replication metadata state. The chain
|
||||||
> manager uses a new technique, based on a variation of CORFU, called
|
> manager uses a new technique, based on a variation of CORFU, called
|
||||||
> "humming consensus".
|
> ``humming consensus''.
|
||||||
> Humming consensus does not require active participation by all or even
|
> Humming consensus does not require active participation by all or even
|
||||||
> a majority of participants to make decisions. Machi's chain manager
|
> a majority of participants to make decisions. Machi's chain manager
|
||||||
> bases its logic on humming consensus to make decisions about how to
|
> bases its logic on humming consensus to make decisions about how to
|
||||||
|
@ -57,18 +71,3 @@ introduction to the Humming Consensus algorithm. Its abstract:
|
||||||
> decision during that epoch. When a differing decision is discovered,
|
> decision during that epoch. When a differing decision is discovered,
|
||||||
> new time epochs are proposed in which a new consensus is reached and
|
> new time epochs are proposed in which a new consensus is reached and
|
||||||
> disseminated to all available participants.
|
> disseminated to all available participants.
|
||||||
|
|
||||||
### chain-self-management-sketch.org
|
|
||||||
|
|
||||||
[chain-self-management-sketch.org](chain-self-management-sketch.org)
|
|
||||||
is a mostly-deprecated draft of
|
|
||||||
an introduction to the
|
|
||||||
self-management algorithm proposed for Machi. Most material has been
|
|
||||||
moved to the [high-level-chain-mgr.pdf](high-level-chain-mgr.pdf) document.
|
|
||||||
|
|
||||||
### cluster (directory)
|
|
||||||
|
|
||||||
This directory contains the sketch of the cluster design
|
|
||||||
strawman for partitioning/distributing/sharding files across a large
|
|
||||||
number of independent Machi chains.
|
|
||||||
|
|
||||||
|
|
BIN
doc/cluster-of-clusters/migration-3to4.png
Normal file
BIN
doc/cluster-of-clusters/migration-3to4.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.7 KiB |
BIN
doc/cluster-of-clusters/migration-4.png
Normal file
BIN
doc/cluster-of-clusters/migration-4.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.8 KiB |
435
doc/cluster-of-clusters/name-game-sketch.org
Normal file
435
doc/cluster-of-clusters/name-game-sketch.org
Normal file
|
@ -0,0 +1,435 @@
|
||||||
|
-*- mode: org; -*-
|
||||||
|
#+TITLE: Machi cluster-of-clusters "name game" sketch
|
||||||
|
#+AUTHOR: Scott
|
||||||
|
#+STARTUP: lognotedone hidestars indent showall inlineimages
|
||||||
|
#+SEQ_TODO: TODO WORKING WAITING DONE
|
||||||
|
#+COMMENT: M-x visual-line-mode
|
||||||
|
#+COMMENT: Also, disable auto-fill-mode
|
||||||
|
|
||||||
|
* 1. "Name Games" with random-slicing style consistent hashing
|
||||||
|
|
||||||
|
Our goal: to distribute lots of files very evenly across a cluster of
|
||||||
|
Machi clusters (hereafter called a "cluster of clusters" or "CoC").
|
||||||
|
|
||||||
|
* 2. Assumptions
|
||||||
|
|
||||||
|
** Basic familiarity with Machi high level design and Machi's "projection"
|
||||||
|
|
||||||
|
The [[https://github.com/basho/machi/blob/master/doc/high-level-machi.pdf][Machi high level design document]] contains all of the basic
|
||||||
|
background assumed by the rest of this document.
|
||||||
|
|
||||||
|
** Familiarity with the Machi cluster-of-clusters/CoC concept
|
||||||
|
|
||||||
|
This isn't yet well-defined (April 2015). However, it's clear from
|
||||||
|
the [[https://github.com/basho/machi/blob/master/doc/high-level-machi.pdf][Machi high level design document]] that Machi alone does not support
|
||||||
|
any kind of file partitioning/distribution/sharding across multiple
|
||||||
|
small Machi clusters. There must be another layer above a Machi cluster to
|
||||||
|
provide such partitioning services.
|
||||||
|
|
||||||
|
The name "cluster of clusters" orignated within Basho to avoid
|
||||||
|
conflicting use of the word "cluster". A Machi cluster is usually
|
||||||
|
synonymous with a single Chain Replication chain and a single set of
|
||||||
|
machines (e.g. 2-5 machines). However, in the not-so-far future, we
|
||||||
|
expect much more complicated patterns of Chain Replication to be used
|
||||||
|
in real-world deployments.
|
||||||
|
|
||||||
|
"Cluster of clusters" is clunky and long, but we haven't found a good
|
||||||
|
substitute yet. If you have a good suggestion, please contact us!
|
||||||
|
~^_^~
|
||||||
|
|
||||||
|
Using the [[https://github.com/basho/machi/tree/master/prototype/demo-day-hack][cluster-of-clusters quick-and-dirty prototype]] as an
|
||||||
|
architecture sketch, let's now assume that we have ~N~ independent Machi
|
||||||
|
clusters. We wish to provide partitioned/distributed file storage
|
||||||
|
across all ~N~ clusters. We call the entire collection of ~N~ Machi
|
||||||
|
clusters a "cluster of clusters", or abbreviated "CoC".
|
||||||
|
|
||||||
|
** Continue CoC prototype's assumption: a Machi cluster is unaware of CoC
|
||||||
|
|
||||||
|
Let's continue with an assumption that an individual Machi cluster
|
||||||
|
inside of the cluster-of-clusters is completely unaware of the
|
||||||
|
cluster-of-clusters layer.
|
||||||
|
|
||||||
|
We may need to break this assumption sometime in the future? It isn't
|
||||||
|
quite clear yet, sorry.
|
||||||
|
|
||||||
|
** Analogy: "neighborhood : city :: Machi : cluster-of-clusters"
|
||||||
|
|
||||||
|
Analogy: The word "machi" in Japanese means small town or
|
||||||
|
neighborhood. As the Tokyo Metropolitan Area is built from many
|
||||||
|
machis and smaller cities, therefore a big, partitioned file store can
|
||||||
|
be built out of many small Machi clusters.
|
||||||
|
|
||||||
|
** The reader is familiar with the random slicing technique
|
||||||
|
|
||||||
|
I'd done something very-very-nearly-identical for the Hibari database
|
||||||
|
6 years ago. But the Hibari technique was based on stuff I did at
|
||||||
|
Sendmail, Inc, so it felt old news to me. {shrug}
|
||||||
|
|
||||||
|
The Hibari documentation has a brief photo illustration of how random
|
||||||
|
slicing works, see [[http://hibari.github.io/hibari-doc/hibari-sysadmin-guide.en.html#chain-migration][Hibari Sysadmin Guide, chain migration]]
|
||||||
|
|
||||||
|
For a comprehensive description, please see these two papers:
|
||||||
|
|
||||||
|
#+BEGIN_QUOTE
|
||||||
|
Reliable and Randomized Data Distribution Strategies for Large Scale Storage Systems
|
||||||
|
Alberto Miranda et al.
|
||||||
|
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.226.5609
|
||||||
|
(short version, HIPC'11)
|
||||||
|
|
||||||
|
Random Slicing: Efficient and Scalable Data Placement for Large-Scale
|
||||||
|
Storage Systems
|
||||||
|
Alberto Miranda et al.
|
||||||
|
DOI: http://dx.doi.org/10.1145/2632230 (long version, ACM Transactions
|
||||||
|
on Storage, Vol. 10, No. 3, Article 9, 2014)
|
||||||
|
#+END_QUOTE
|
||||||
|
|
||||||
|
** We use random slicing to map CoC file names -> Machi cluster ID/name
|
||||||
|
|
||||||
|
We will use a single random slicing map. This map (called ~Map~ in
|
||||||
|
the descriptions below), together with the random slicing hash
|
||||||
|
function (called ~rs_hash()~ below), will be used to map:
|
||||||
|
|
||||||
|
#+BEGIN_QUOTE
|
||||||
|
CoC client-visible file name -> Machi cluster ID/name/thingie
|
||||||
|
#+END_QUOTE
|
||||||
|
|
||||||
|
** Machi cluster ID/name management: TBD, but, really, should be simple
|
||||||
|
|
||||||
|
The mapping from:
|
||||||
|
|
||||||
|
#+BEGIN_QUOTE
|
||||||
|
Machi CoC member ID/name/thingie -> ???
|
||||||
|
#+END_QUOTE
|
||||||
|
|
||||||
|
... remains To Be Determined. But, really, this is going to be pretty
|
||||||
|
simple. The ID/name/thingie will probably be a human-friendly,
|
||||||
|
printable ASCII string, and the "???" will probably be a single Machi
|
||||||
|
cluster projection data structure.
|
||||||
|
|
||||||
|
The Machi projection is enough information to contact any member of
|
||||||
|
that cluster and, if necessary, request the most up-to-date projection
|
||||||
|
information required to use that cluster.
|
||||||
|
|
||||||
|
It's likely that the projection given by this map will be out-of-date,
|
||||||
|
so the client must be ready to use the standard Machi procedure to
|
||||||
|
request the cluster's current projection, in any case.
|
||||||
|
|
||||||
|
* 3. A simple illustration
|
||||||
|
|
||||||
|
I'm borrowing an illustration from the HibariDB documentation here,
|
||||||
|
but it fits my purposes quite well. (And I originally created that
|
||||||
|
image, and the use license is OK.)
|
||||||
|
|
||||||
|
#+CAPTION: Illustration of 'Map', using four Machi clusters
|
||||||
|
|
||||||
|
[[./migration-4.png]]
|
||||||
|
|
||||||
|
Assume that we have a random slicing map called ~Map~. This particular
|
||||||
|
~Map~ maps the unit interval onto 4 Machi clusters:
|
||||||
|
|
||||||
|
| Hash range | Cluster ID |
|
||||||
|
|-------------+------------|
|
||||||
|
| 0.00 - 0.25 | Cluster1 |
|
||||||
|
| 0.25 - 0.33 | Cluster4 |
|
||||||
|
| 0.33 - 0.58 | Cluster2 |
|
||||||
|
| 0.58 - 0.66 | Cluster4 |
|
||||||
|
| 0.66 - 0.91 | Cluster3 |
|
||||||
|
| 0.91 - 1.00 | Cluster4 |
|
||||||
|
|
||||||
|
Then, if we had CoC file name "~foo~", the hash ~SHA("foo")~ maps to about
|
||||||
|
0.05 on the unit interval. So, according to ~Map~, the value of
|
||||||
|
~rs_hash("foo",Map) = Cluster1~. Similarly, ~SHA("hello")~ is about
|
||||||
|
0.67 on the unit interval, so ~rs_hash("hello",Map) = Cluster3~.
|
||||||
|
|
||||||
|
* 4. An additional assumption: clients will want some control over file placement
|
||||||
|
|
||||||
|
We will continue to use the 4-cluster diagram from the previous
|
||||||
|
section.
|
||||||
|
|
||||||
|
When a client wishes to append data to a Machi file, the Machi server
|
||||||
|
chooses the file name & byte offset for storing that data. This
|
||||||
|
feature is why Machi's eventual consistency operating mode is so
|
||||||
|
nifty: it allows us to merge together files safely at any time because
|
||||||
|
any two client append operations will always write to different files
|
||||||
|
& different offsets.
|
||||||
|
|
||||||
|
** Our new assumption: client control over initial file placement
|
||||||
|
|
||||||
|
The CoC management scheme may decide that files need to migrate to
|
||||||
|
other clusters. The reason could be for storage load or I/O load
|
||||||
|
balancing reasons. It could be because a cluster is being
|
||||||
|
decomissioned by its owners. There are many legitimate reasons why a
|
||||||
|
file that is initially created on cluster ID X has been moved to
|
||||||
|
cluster ID Y.
|
||||||
|
|
||||||
|
However, there are also legitimate reasons for why the client would want
|
||||||
|
control over the choice of Machi cluster when the data is first
|
||||||
|
written. The single biggest reason is load balancing. Assuming that
|
||||||
|
the client (or the CoC management layer acting on behalf of the CoC
|
||||||
|
client) knows the current utilization across the participating Machi
|
||||||
|
clusters, then it may be very helpful to send new append() requests to
|
||||||
|
under-utilized clusters.
|
||||||
|
|
||||||
|
** Cool! Except for a couple of problems...
|
||||||
|
|
||||||
|
If the client wants to store some data
|
||||||
|
on Cluster2 and therefore sends an ~append("foo",CoolData)~ request to
|
||||||
|
the head of Cluster2 (which the client magically knows how to
|
||||||
|
contact), then the result will look something like
|
||||||
|
~{ok,"foo.s923.z47",ByteOffset}~.
|
||||||
|
|
||||||
|
Therefore, the file name "~foo.s923.z47~" must be used by any Machi
|
||||||
|
CoC client in order to retrieve the CoolData bytes.
|
||||||
|
|
||||||
|
*** Problem #1: "foo.s923.z47" doesn't always map via random slicing to Cluster2
|
||||||
|
|
||||||
|
... if we ignore the problem of "CoC files may be redistributed in the
|
||||||
|
future", then we still have a problem.
|
||||||
|
|
||||||
|
In fact, the value of ~ps_hash("foo.s923.z47",Map)~ is Cluster1.
|
||||||
|
|
||||||
|
*** Problem #2: We want CoC files to move around automatically
|
||||||
|
|
||||||
|
If the CoC client stores two pieces of information, the file name
|
||||||
|
"~foo.s923.z47~" and the Cluster ID Cluster2, then what happens when the
|
||||||
|
cluster-of-clusters system decides to rebalance files across all
|
||||||
|
machines? The CoC manager may decide to move our file to Cluster66.
|
||||||
|
|
||||||
|
How will a future CoC client wishes to retrieve CoolData when Cluster2
|
||||||
|
no longer stores the required file?
|
||||||
|
|
||||||
|
**** When migrating the file, we could put a "pointer" on Cluster2 that points to the new location, Cluster66.
|
||||||
|
|
||||||
|
This scheme is a bit brittle, even if all of the pointers are always
|
||||||
|
created 100% correctly. Also, if Cluster2 is ever unavailable, then
|
||||||
|
we cannot fetch our CoolData, even though the file moved away from
|
||||||
|
Cluster2 several years ago.
|
||||||
|
|
||||||
|
The scheme would also introduce extra round-trips to the servers
|
||||||
|
whenever we try to read a file where we do not know the most
|
||||||
|
up-to-date cluster ID for.
|
||||||
|
|
||||||
|
**** We could store a pointer to file "foo.s923.z47"'s location in an LDAP database!
|
||||||
|
|
||||||
|
Or we could store it in Riak. Or in another, external database. We'd
|
||||||
|
rather not create such an external dependency, however. Furthermore,
|
||||||
|
we would also have the same problem of updating this external database
|
||||||
|
each time that a file is moved/rebalanced across the CoC.
|
||||||
|
|
||||||
|
* 5. Proposal: Break the opacity of Machi file names, slightly
|
||||||
|
|
||||||
|
Assuming that Machi keeps the scheme of creating file names (in
|
||||||
|
response to ~append()~ and ~sequencer_new_range()~ calls) based on a
|
||||||
|
predictable client-supplied prefix and an opaque suffix, e.g.,
|
||||||
|
|
||||||
|
~append("foo",CoolData) -> {ok,"foo.s923.z47",ByteOffset}.~
|
||||||
|
|
||||||
|
... then we propose that all CoC and Machi parties be aware of this
|
||||||
|
naming scheme, i.e. that Machi assigns file names based on:
|
||||||
|
|
||||||
|
~ClientSuppliedPrefix ++ "." ++ SomeOpaqueFileNameSuffix~
|
||||||
|
|
||||||
|
The Machi system doesn't care about the file name -- a Machi server
|
||||||
|
will treat the entire file name as an opaque thing. But this document
|
||||||
|
is called the "Name Game" for a reason!
|
||||||
|
|
||||||
|
What if the CoC client could peek inside of the opaque file name
|
||||||
|
suffix in order to remove (or add) the CoC location information that
|
||||||
|
we need?
|
||||||
|
|
||||||
|
** The details: legend
|
||||||
|
|
||||||
|
- ~T~ = the target CoC member/Cluster ID chosen at the time of ~append()~
|
||||||
|
- ~p~ = file prefix, chosen by the CoC client (This is exactly the Machi client-chosen file prefix).
|
||||||
|
- ~s.z~ = the Machi file server opaque file name suffix (Which we
|
||||||
|
happen to know is a combination of sequencer ID plus file serial
|
||||||
|
number. This implementation may change, for example, to use a
|
||||||
|
standard GUID string (rendered into ASCII hexadecimal digits) instead.)
|
||||||
|
- ~K~ = the CoC placement key
|
||||||
|
|
||||||
|
We use a variation of ~rs_hash()~, called ~rs_hash_with_float()~. The
|
||||||
|
former uses a string as its 1st argument; the latter uses a floating
|
||||||
|
point number as its 1st argument. Both return a cluster ID name
|
||||||
|
thingie.
|
||||||
|
|
||||||
|
#+BEGIN_SRC erlang
|
||||||
|
%% type specs, Erlang style
|
||||||
|
-spec rs_hash(string(), rs_hash:map()) -> rs_hash:cluster_id().
|
||||||
|
-spec rs_hash_with_float(float(), rs_hash:map()) -> rs_hash:cluster_id().
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
NOTE: Use of floating point terms is not required. For example,
|
||||||
|
integer arithmetic could be used, if using a sufficiently large
|
||||||
|
interval to create an even & smooth distribution of hashes across the
|
||||||
|
expected maximum number of clusters.
|
||||||
|
|
||||||
|
For example, if the maximum CoC cluster size would be 4,000 individual
|
||||||
|
Machi clusters, then a minimum of 12 bits of integer space is required
|
||||||
|
to assign one integer per Machi cluster. However, for load balancing
|
||||||
|
purposes, a finer grain of (for example) 100 integers per Machi
|
||||||
|
cluster would permit file migration to move increments of
|
||||||
|
approximately 1% of single Machi cluster's storage capacity. A
|
||||||
|
minimum of 19 bits of hash space would be necessary to accomodate
|
||||||
|
these constraints.
|
||||||
|
|
||||||
|
** The details: CoC file write
|
||||||
|
|
||||||
|
1. CoC client chooses ~p~ and ~T~ (i.e., the file prefix & target cluster)
|
||||||
|
2. CoC client knows the CoC ~Map~
|
||||||
|
3. CoC client calculates a value ~K~ such that ~rs_hash_with_float(K,Map) = T~, using the method described below.
|
||||||
|
4. CoC client requests @ cluster ~T~: ~append_chunk(p,...) -> {ok,p.K.s.z,ByteOffset}~
|
||||||
|
5. CoC stores/uses the file name ~p.K.s.z~.
|
||||||
|
|
||||||
|
** The details: CoC file read
|
||||||
|
|
||||||
|
1. CoC client knows the file name ~p.K.s.z~ and parses it to find
|
||||||
|
~K~'s value.
|
||||||
|
2. CoC client knows the CoC ~Map~
|
||||||
|
3. Coc calculates ~rs_hash_with_float(K,Map) = T~
|
||||||
|
4. CoC client requests @ cluster ~T~: ~read_chunk(p.K.s.z,...) ->~ ... success!
|
||||||
|
|
||||||
|
** The details: calculating 'K', the CoC placement key
|
||||||
|
|
||||||
|
1. We know ~Map~, the current CoC mapping.
|
||||||
|
2. We look inside of ~Map~, and we find all of the unit interval ranges
|
||||||
|
that map to our desired target cluster ~T~. Let's call this list
|
||||||
|
~MapList = [Range1=(start,end],Range2=(start,end],...]~.
|
||||||
|
3. In our example, ~T=Cluster2~. The example ~Map~ contains a single
|
||||||
|
unit interval range for ~Cluster2~, ~[(0.33,0.58]]~.
|
||||||
|
4. Choose a uniformally random number ~r~ on the unit interval.
|
||||||
|
5. Calculate placement key ~K~ by mapping ~r~ onto the concatenation
|
||||||
|
of the CoC hash space range intervals in ~MapList~. For example,
|
||||||
|
if ~r=0.5~, then ~K = 0.33 + 0.5*(0.58-0.33) = 0.455~, which is
|
||||||
|
exactly in the middle of the ~(0.33,0.58]~ interval.
|
||||||
|
6. If necessary, encode ~K~ in a file name-friendly manner, e.g., convert it to hexadecimal ASCII digits to create file name ~p.K.s.z~.
|
||||||
|
|
||||||
|
** The details: calculating 'K', an alternative method
|
||||||
|
|
||||||
|
If the Law of Large Numbers and our random number generator do not create the kind of smooth & even distribution of files across the CoC as we wish, an alternative method of calculating ~K~ follows.
|
||||||
|
|
||||||
|
If each server in each Machi cluster keeps track of the CoC ~Map~ and also of all values of ~K~ for all files that it stores, then we can simply ask a cluster member to recommend a value of ~K~ that is least represented by existing files.
|
||||||
|
|
||||||
|
* 6. File migration (aka rebalancing/reparitioning/redistribution)
|
||||||
|
|
||||||
|
** What is "file migration"?
|
||||||
|
|
||||||
|
As discussed in section 5, the client can have good reason for wanting
|
||||||
|
to have some control of the initial location of the file within the
|
||||||
|
cluster. However, the cluster manager has an ongoing interest in
|
||||||
|
balancing resources throughout the lifetime of the file. Disks will
|
||||||
|
get full, hardware will change, read workload will fluctuate,
|
||||||
|
etc etc.
|
||||||
|
|
||||||
|
This document uses the word "migration" to describe moving data from
|
||||||
|
one CoC cluster to another. In other systems, this process is
|
||||||
|
described with words such as rebalancing, repartitioning, and
|
||||||
|
resharding. For Riak Core applications, the mechanisms are "handoff"
|
||||||
|
and "ring resizing". See the [[http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html#Balancer][Hadoop file balancer]] for another example.
|
||||||
|
|
||||||
|
A simple variation of the Random Slicing hash algorithm can easily
|
||||||
|
accomodate Machi's need to migrate files without interfering with
|
||||||
|
availability. Machi's migration task is much simpler due to the
|
||||||
|
immutable nature of Machi file data.
|
||||||
|
|
||||||
|
** Change to Random Slicing
|
||||||
|
|
||||||
|
The map used by the Random Slicing hash algorithm needs a few simple
|
||||||
|
changes to make file migration straightforward.
|
||||||
|
|
||||||
|
- Add a "generation number", a strictly increasing number (similar to
|
||||||
|
a Machi cluster's "epoch number") that reflects the history of
|
||||||
|
changes made to the Random Slicing map
|
||||||
|
- Use a list of Random Slicing maps instead of a single map, one map
|
||||||
|
per possibility that files may not have been migrated yet out of
|
||||||
|
that map.
|
||||||
|
|
||||||
|
As an example:
|
||||||
|
|
||||||
|
#+CAPTION: Illustration of 'Map', using four Machi clusters
|
||||||
|
|
||||||
|
[[./migration-3to4.png]]
|
||||||
|
|
||||||
|
And the new Random Slicing map might look like this:
|
||||||
|
|
||||||
|
| Generation number | 7 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| SubMap | 1 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| Hash range | Cluster ID |
|
||||||
|
|-------------------+------------|
|
||||||
|
| 0.00 - 0.33 | Cluster1 |
|
||||||
|
| 0.33 - 0.66 | Cluster2 |
|
||||||
|
| 0.66 - 1.00 | Cluster3 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| SubMap | 2 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| Hash range | Cluster ID |
|
||||||
|
|-------------------+------------|
|
||||||
|
| 0.00 - 0.25 | Cluster1 |
|
||||||
|
| 0.25 - 0.33 | Cluster4 |
|
||||||
|
| 0.33 - 0.58 | Cluster2 |
|
||||||
|
| 0.58 - 0.66 | Cluster4 |
|
||||||
|
| 0.66 - 0.91 | Cluster3 |
|
||||||
|
| 0.91 - 1.00 | Cluster4 |
|
||||||
|
|
||||||
|
When a new Random Slicing map contains a single submap, then its use
|
||||||
|
is identical to the original Random Slicing algorithm. If the map
|
||||||
|
contains multiple submaps, then the access rules change a bit:
|
||||||
|
|
||||||
|
- Write operations always go to the latest/largest submap.
|
||||||
|
- Read operations attempt to read from all unique submaps.
|
||||||
|
- Skip searching submaps that refer to the same cluster ID.
|
||||||
|
- In this example, unit interval value 0.10 is mapped to Cluster1
|
||||||
|
by both submaps.
|
||||||
|
- Read from latest/largest submap to oldest/smallest submap.
|
||||||
|
- If not found in any submap, search a second time (to handle races
|
||||||
|
with file copying between submaps).
|
||||||
|
- If the requested data is found, optionally copy it directly to the
|
||||||
|
latest submap (as a variation of read repair which really simply
|
||||||
|
accelerates the migration process and can reduce the number of
|
||||||
|
operations required to query servers in multiple submaps).
|
||||||
|
|
||||||
|
The cluster-of-clusters manager is responsible for:
|
||||||
|
|
||||||
|
- Managing the various generations of the CoC Random Slicing maps,
|
||||||
|
including distributing them to CoC clients.
|
||||||
|
- Managing the processes that are responsible for copying "cold" data,
|
||||||
|
i.e., files data that is not regularly accessed, to its new submap
|
||||||
|
location.
|
||||||
|
- When migration of a file to its new cluster is confirmed successful,
|
||||||
|
delete it from the old cluster.
|
||||||
|
|
||||||
|
In example map #7, the CoC manager will copy files with unit interval
|
||||||
|
assignments in ~(0.25,0.33]~, ~(0.58,0.66]~, and ~(0.91,1.00]~ from their
|
||||||
|
old locations in cluster IDs Cluster1/2/3 to their new cluster,
|
||||||
|
Cluster4. When the CoC manager is satisfied that all such files have
|
||||||
|
been copied to Cluster4, then the CoC manager can create and
|
||||||
|
distribute a new map, such as:
|
||||||
|
|
||||||
|
| Generation number | 8 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| SubMap | 1 |
|
||||||
|
|-------------------+------------|
|
||||||
|
| Hash range | Cluster ID |
|
||||||
|
|-------------------+------------|
|
||||||
|
| 0.00 - 0.25 | Cluster1 |
|
||||||
|
| 0.25 - 0.33 | Cluster4 |
|
||||||
|
| 0.33 - 0.58 | Cluster2 |
|
||||||
|
| 0.58 - 0.66 | Cluster4 |
|
||||||
|
| 0.66 - 0.91 | Cluster3 |
|
||||||
|
| 0.91 - 1.00 | Cluster4 |
|
||||||
|
|
||||||
|
One limitation of HibariDB that I haven't fixed is not being able to
|
||||||
|
perform more than one migration at a time. The trade-off is that such
|
||||||
|
migration is difficult enough across two submaps; three or more
|
||||||
|
submaps becomes even more complicated.
|
||||||
|
|
||||||
|
Fortunately for Machi, its file data is immutable and therefore can
|
||||||
|
easily manage many migrations in parallel, i.e., its submap list may
|
||||||
|
be several maps long, each one for an in-progress file migration.
|
||||||
|
|
||||||
|
* Acknowledgements
|
||||||
|
|
||||||
|
The source for the "migration-4.png" and "migration-3to4.png" images
|
||||||
|
come from the [[http://hibari.github.io/hibari-doc/images/migration-3to4.png][HibariDB documentation]].
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
#FIG 3.2 Produced by xfig version 3.2.5b
|
|
||||||
Landscape
|
|
||||||
Center
|
|
||||||
Inches
|
|
||||||
Letter
|
|
||||||
94.00
|
|
||||||
Single
|
|
||||||
-2
|
|
||||||
1200 2
|
|
||||||
6 7425 2700 8700 3300
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 645 7425 2895 After\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 255 1215 7425 3210 Migration\001
|
|
||||||
-6
|
|
||||||
6 7425 450 8700 1050
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 780 7425 675 Before\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 255 1215 7425 990 Migration\001
|
|
||||||
-6
|
|
||||||
6 75 1425 6900 2325
|
|
||||||
6 4875 1425 6900 2325
|
|
||||||
6 5400 1575 6375 2175
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 165 390 5400 1800 Not\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 225 945 5400 2100 migrated\001
|
|
||||||
-6
|
|
||||||
2 2 1 2 0 7 50 -1 -1 6.000 0 0 -1 0 0 5
|
|
||||||
4950 1500 6825 1500 6825 2250 4950 2250 4950 1500
|
|
||||||
-6
|
|
||||||
6 2475 1425 4500 2325
|
|
||||||
6 3000 1575 3975 2175
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 165 390 3000 1800 Not\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 225 945 3000 2100 migrated\001
|
|
||||||
-6
|
|
||||||
2 2 1 2 0 7 50 -1 -1 6.000 0 0 -1 0 0 5
|
|
||||||
2550 1500 4425 1500 4425 2250 2550 2250 2550 1500
|
|
||||||
-6
|
|
||||||
6 75 1425 2100 2325
|
|
||||||
6 600 1575 1575 2175
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 165 390 600 1800 Not\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 225 945 600 2100 migrated\001
|
|
||||||
-6
|
|
||||||
2 2 1 2 0 7 50 -1 -1 6.000 0 0 -1 0 0 5
|
|
||||||
150 1500 2025 1500 2025 2250 150 2250 150 1500
|
|
||||||
-6
|
|
||||||
-6
|
|
||||||
2 1 0 2 0 7 50 -1 -1 6.000 0 0 -1 1 0 2
|
|
||||||
1 1 3.00 60.00 120.00
|
|
||||||
150 4200 150 3750
|
|
||||||
2 1 0 2 0 7 50 -1 -1 6.000 0 0 -1 1 0 2
|
|
||||||
1 1 3.00 60.00 120.00
|
|
||||||
3750 4200 3750 3750
|
|
||||||
2 1 0 2 0 7 50 -1 -1 6.000 0 0 -1 1 0 2
|
|
||||||
1 1 3.00 60.00 120.00
|
|
||||||
2025 4200 2025 3750
|
|
||||||
2 1 0 2 0 7 50 -1 -1 6.000 0 0 -1 1 0 2
|
|
||||||
1 1 3.00 60.00 120.00
|
|
||||||
7350 4200 7350 3750
|
|
||||||
2 1 0 2 0 7 50 -1 -1 6.000 0 0 -1 1 0 2
|
|
||||||
1 1 3.00 60.00 120.00
|
|
||||||
5550 4200 5550 3750
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
2550 0 2550 1500 150 1500 150 0 2550 0
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
4950 0 4950 1500 2550 1500 2550 0 4950 0
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
7350 0 7350 1500 4950 1500 4950 0 7350 0
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
150 2250 2025 2250 2025 3750 150 3750 150 2250
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
4425 2250 4950 2250 4950 3750 4425 3750 4425 2250
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
4950 2250 6825 2250 6825 3750 4950 3750 4950 2250
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
6825 2250 7350 2250 7350 3750 6825 3750 6825 2250
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
2025 2250 2550 2250 2550 3750 2025 3750 2025 2250
|
|
||||||
2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
|
|
||||||
2550 2250 4425 2250 4425 3750 2550 3750 2550 2250
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 480 75 4500 0.00\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 480 6825 4500 1.00\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 480 1725 4500 0.25\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 480 3525 4500 0.50\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 195 480 5250 4500 0.75\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 450 1275 ~33% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 2925 1275 ~33% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 5250 1275 ~33% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 180 495 2025 3525 ~8%\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 300 3525 ~25% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 2625 3525 ~25% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 180 495 4425 3525 ~8%\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 240 1710 5025 3525 ~25% total keys\001
|
|
||||||
4 0 0 50 -1 2 14 0.0000 4 180 495 6825 3525 ~8%\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 195 2175 3075 4\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 195 4575 3075 4\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 195 6975 3075 4\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 600 600 Chain1\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 3000 600 Chain2\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 5400 600 Chain3\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 285 2100 2625 C\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 285 4500 2625 C\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 285 6900 2625 C\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 525 2850 Chain1\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 2925 2850 Chain2\001
|
|
||||||
4 0 0 50 -1 2 24 0.0000 4 270 1245 5325 2850 Chain3\001
|
|
||||||
4 0 0 50 -1 2 18 0.0000 4 240 4350 1350 4875 Cluster locator, on the unit interval\001
|
|
Binary file not shown.
Before Width: | Height: | Size: 7.6 KiB |
Binary file not shown.
Before Width: | Height: | Size: 7.4 KiB |
|
@ -1,481 +0,0 @@
|
||||||
-*- mode: org; -*-
|
|
||||||
#+TITLE: Machi cluster "name game" sketch
|
|
||||||
#+AUTHOR: Scott
|
|
||||||
#+STARTUP: lognotedone hidestars indent showall inlineimages
|
|
||||||
#+SEQ_TODO: TODO WORKING WAITING DONE
|
|
||||||
#+COMMENT: M-x visual-line-mode
|
|
||||||
#+COMMENT: Also, disable auto-fill-mode
|
|
||||||
|
|
||||||
* 1. "Name Games" with random-slicing style consistent hashing
|
|
||||||
|
|
||||||
Our goal: to distribute lots of files very evenly across a large
|
|
||||||
collection of individual, small Machi chains.
|
|
||||||
|
|
||||||
* 2. Assumptions
|
|
||||||
|
|
||||||
** Basic familiarity with Machi high level design and Machi's "projection"
|
|
||||||
|
|
||||||
The [[https://github.com/basho/machi/blob/master/doc/high-level-machi.pdf][Machi high level design document]] contains all of the basic
|
|
||||||
background assumed by the rest of this document.
|
|
||||||
|
|
||||||
** Analogy: "neighborhood : city :: Machi chain : Machi cluster"
|
|
||||||
|
|
||||||
Analogy: The word "machi" in Japanese means small town or
|
|
||||||
neighborhood. As the Tokyo Metropolitan Area is built from many
|
|
||||||
machis and smaller cities, therefore a big, partitioned file store can
|
|
||||||
be built out of many small Machi chains.
|
|
||||||
|
|
||||||
** Familiarity with the Machi chain concept
|
|
||||||
|
|
||||||
It's clear (I hope!) from
|
|
||||||
the [[https://github.com/basho/machi/blob/master/doc/high-level-machi.pdf][Machi high level design document]] that Machi alone does not support
|
|
||||||
any kind of file partitioning/distribution/sharding across multiple
|
|
||||||
small Machi chains. There must be another layer above a Machi chain to
|
|
||||||
provide such partitioning services.
|
|
||||||
|
|
||||||
Using the [[https://github.com/basho/machi/tree/master/prototype/demo-day-hack][cluster quick-and-dirty prototype]] as an
|
|
||||||
architecture sketch, let's now assume that we have ~n~ independent Machi
|
|
||||||
chains. We assume that each of these chains has the same
|
|
||||||
chain length in the nominal case, e.g. chain length of 3.
|
|
||||||
We wish to provide partitioned/distributed file storage
|
|
||||||
across all ~n~ chains. We call the entire collection of ~n~ Machi
|
|
||||||
chains a "cluster".
|
|
||||||
|
|
||||||
We may wish to have several types of Machi clusters. For example:
|
|
||||||
|
|
||||||
+ Chain length of 1 for "don't care if it gets lost,
|
|
||||||
store stuff very very cheaply" data.
|
|
||||||
+ Chain length of 2 for normal data.
|
|
||||||
+ Equivalent to quorum replication's reliability with 3 copies.
|
|
||||||
+ Chain length of 7 for critical, unreplaceable data.
|
|
||||||
+ Equivalent to quorum replication's reliability with 15 copies.
|
|
||||||
|
|
||||||
Each of these types of chains will have a name ~N~ in the
|
|
||||||
namespace. The role of the cluster namespace will be demonstrated in
|
|
||||||
Section 3 below.
|
|
||||||
|
|
||||||
** Continue an early assumption: a Machi chain is unaware of clustering
|
|
||||||
|
|
||||||
Let's continue with an assumption that an individual Machi chain
|
|
||||||
inside of a cluster is completely unaware of the cluster layer.
|
|
||||||
|
|
||||||
** The reader is familiar with the random slicing technique
|
|
||||||
|
|
||||||
I'd done something very-very-nearly-like-this for the Hibari database
|
|
||||||
6 years ago. But the Hibari technique was based on stuff I did at
|
|
||||||
Sendmail, Inc, in 2000, so this technique feels like old news to me.
|
|
||||||
{shrug}
|
|
||||||
|
|
||||||
The following section provides an illustrated example.
|
|
||||||
Very quickly, the random slicing algorithm is:
|
|
||||||
|
|
||||||
- Hash a string onto the unit interval [0.0, 1.0)
|
|
||||||
- Calculate h(unit interval point, Map) -> bin, where ~Map~ divides
|
|
||||||
the unit interval into bins (or partitions or shards).
|
|
||||||
|
|
||||||
Machi's adaptation is in step 1: we do not hash any strings. Instead, we
|
|
||||||
simply choose a number on the unit interval. This number is called
|
|
||||||
the "cluster locator number".
|
|
||||||
|
|
||||||
As described later in this doc, Machi file names are structured into
|
|
||||||
several components. One component of the file name contains the cluster
|
|
||||||
locator number; we use the number as-is for step 2 above.
|
|
||||||
|
|
||||||
*** For more information about Random Slicing
|
|
||||||
|
|
||||||
For a comprehensive description of random slicing, please see the
|
|
||||||
first two papers. For a quicker summary, please see the third
|
|
||||||
reference.
|
|
||||||
|
|
||||||
#+BEGIN_QUOTE
|
|
||||||
Reliable and Randomized Data Distribution Strategies for Large Scale Storage Systems
|
|
||||||
Alberto Miranda et al.
|
|
||||||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.226.5609
|
|
||||||
(short version, HIPC'11)
|
|
||||||
|
|
||||||
Random Slicing: Efficient and Scalable Data Placement for Large-Scale
|
|
||||||
Storage Systems
|
|
||||||
Alberto Miranda et al.
|
|
||||||
DOI: http://dx.doi.org/10.1145/2632230 (long version, ACM Transactions
|
|
||||||
on Storage, Vol. 10, No. 3, Article 9, 2014)
|
|
||||||
|
|
||||||
[[http://hibari.github.io/hibari-doc/hibari-sysadmin-guide.en.html#chain-migration][Hibari Sysadmin Guide, chain migration section]].
|
|
||||||
http://hibari.github.io/hibari-doc/hibari-sysadmin-guide.en.html#chain-migration
|
|
||||||
#+END_QUOTE
|
|
||||||
|
|
||||||
* 3. A simple illustration
|
|
||||||
|
|
||||||
We use a variation of the Random Slicing hash that we will call
|
|
||||||
~rs_hash_with_float()~. The Erlang-style function type is shown
|
|
||||||
below.
|
|
||||||
|
|
||||||
#+BEGIN_SRC erlang
|
|
||||||
%% type specs, Erlang-style
|
|
||||||
-spec rs_hash_with_float(float(), rs_hash:map()) -> rs_hash:chain_id().
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
I'm borrowing an illustration from the HibariDB documentation here,
|
|
||||||
but it fits my purposes quite well. (I am the original creator of that
|
|
||||||
image, and also the use license is compatible.)
|
|
||||||
|
|
||||||
#+CAPTION: Illustration of 'Map', using four Machi chains
|
|
||||||
|
|
||||||
[[./migration-4.png]]
|
|
||||||
|
|
||||||
Assume that we have a random slicing map called ~Map~. This particular
|
|
||||||
~Map~ maps the unit interval onto 4 Machi chains:
|
|
||||||
|
|
||||||
| Hash range | Chain ID |
|
|
||||||
|-------------+----------|
|
|
||||||
| 0.00 - 0.25 | Chain1 |
|
|
||||||
| 0.25 - 0.33 | Chain4 |
|
|
||||||
| 0.33 - 0.58 | Chain2 |
|
|
||||||
| 0.58 - 0.66 | Chain4 |
|
|
||||||
| 0.66 - 0.91 | Chain3 |
|
|
||||||
| 0.91 - 1.00 | Chain4 |
|
|
||||||
|
|
||||||
Assume that the system chooses a cluster locator of 0.05.
|
|
||||||
According to ~Map~, the value of
|
|
||||||
~rs_hash_with_float(0.05,Map) = Chain1~.
|
|
||||||
Similarly, ~rs_hash_with_float(0.26,Map) = Chain4~.
|
|
||||||
|
|
||||||
This example should look very similar to Hibari's technique.
|
|
||||||
The Hibari documentation has a brief photo illustration of how random
|
|
||||||
slicing works, see [[http://hibari.github.io/hibari-doc/hibari-sysadmin-guide.en.html#chain-migration][Hibari Sysadmin Guide, chain migration]].
|
|
||||||
|
|
||||||
* 4. Use of the cluster namespace: name separation plus chain type
|
|
||||||
|
|
||||||
Let us assume that the cluster framework provides several different types
|
|
||||||
of chains:
|
|
||||||
|
|
||||||
| Chain length | Namespace | Consistency Mode | Comment |
|
|
||||||
|--------------+--------------+------------------+----------------------------------|
|
|
||||||
| 3 | ~normal~ | eventual | Normal storage redundancy & cost |
|
|
||||||
| 2 | ~reduced~ | eventual | Reduced cost storage |
|
|
||||||
| 1 | ~risky~ | eventual | Really, really cheap storage |
|
|
||||||
| 7 | ~paranoid~ | eventual | Safety-critical storage |
|
|
||||||
| 3 | ~sequential~ | strong | Strong consistency |
|
|
||||||
|--------------+--------------+------------------+----------------------------------|
|
|
||||||
|
|
||||||
The client may want to choose the amount of redundancy that its
|
|
||||||
application requires: normal, reduced cost, or perhaps even a single
|
|
||||||
copy. The cluster namespace is used by the client to signal this
|
|
||||||
intention.
|
|
||||||
|
|
||||||
Further, the cluster administrators may wish to use the namespace to
|
|
||||||
provide separate storage for different applications. Jane's
|
|
||||||
application may use the namespace "jane-normal" and Bob's app uses
|
|
||||||
"bob-reduced". Administrators may definine separate groups of
|
|
||||||
chains on separate servers to serve these two applications.
|
|
||||||
|
|
||||||
* 5. In its lifetime, a file may be moved to different chains
|
|
||||||
|
|
||||||
The cluster management scheme may decide that files need to migrate to
|
|
||||||
other chains -- i.e., file that is initially created on chain ID ~X~
|
|
||||||
has been moved to chain ID ~Y~.
|
|
||||||
|
|
||||||
+ For storage load or I/O load balancing reasons.
|
|
||||||
+ Because a chain is being decommissioned by the sysadmin.
|
|
||||||
|
|
||||||
* 6. Floating point is not required ... it is merely convenient for explanation
|
|
||||||
|
|
||||||
NOTE: Use of floating point terms is not required. For example,
|
|
||||||
integer arithmetic could be used, if using a sufficiently large
|
|
||||||
interval to create an even & smooth distribution of hashes across the
|
|
||||||
expected maximum number of chains.
|
|
||||||
|
|
||||||
For example, if the maximum cluster size would be 4,000 individual
|
|
||||||
Machi chains, then a minimum of 12 bits of integer space is required
|
|
||||||
to assign one integer per Machi chain. However, for load balancing
|
|
||||||
purposes, a finer grain of (for example) 100 integers per Machi
|
|
||||||
chain would permit file migration to move increments of
|
|
||||||
approximately 1% of single Machi chain's storage capacity. A
|
|
||||||
minimum of 12+7=19 bits of hash space would be necessary to accommodate
|
|
||||||
these constraints.
|
|
||||||
|
|
||||||
It is likely that Machi's final implementation will choose a 24 bit
|
|
||||||
integer (or perhaps 32 bits) to represent the cluster locator.
|
|
||||||
|
|
||||||
* 7. Proposal: Break the opacity of Machi file names, slightly.
|
|
||||||
|
|
||||||
Machi assigns file names based on:
|
|
||||||
|
|
||||||
~ClientSuppliedPrefix ++ "^" ++ SomeOpaqueFileNameSuffix~
|
|
||||||
|
|
||||||
What if some parts of the system could peek inside of the opaque file name
|
|
||||||
suffix in order to look at the cluster location information that we might
|
|
||||||
code in the filename suffix?
|
|
||||||
|
|
||||||
We break the system into parts that speak two levels of protocols,
|
|
||||||
"high" and "low".
|
|
||||||
|
|
||||||
+ The high level protocol is used outside of the Machi cluster
|
|
||||||
+ The low level protocol is used inside of the Machi cluster
|
|
||||||
|
|
||||||
Both protocols are based on a Protocol Buffers specification and
|
|
||||||
implementation. Other protocols, such as HTTP, will be added later.
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
+-----------------------+
|
|
||||||
| Machi external client |
|
|
||||||
| e.g. Riak CS |
|
|
||||||
+-----------------------+
|
|
||||||
^
|
|
||||||
| Machi "high" API
|
|
||||||
| ProtoBuffs protocol Machi cluster boundary: outside
|
|
||||||
.........................................................................
|
|
||||||
| Machi cluster boundary: inside
|
|
||||||
v
|
|
||||||
+--------------------------+ +------------------------+
|
|
||||||
| Machi "high" API service | | Machi HTTP API service |
|
|
||||||
+--------------------------+ +------------------------+
|
|
||||||
^ |
|
|
||||||
| +------------------------+
|
|
||||||
v v
|
|
||||||
+------------------------+
|
|
||||||
| Cluster bridge service |
|
|
||||||
+------------------------+
|
|
||||||
^
|
|
||||||
| Machi "low" API
|
|
||||||
| ProtoBuffs protocol
|
|
||||||
+----------------------------------------+----+----+
|
|
||||||
| | | |
|
|
||||||
v v v v
|
|
||||||
+-------------------------+ ... other chains...
|
|
||||||
| Chain C1 (logical view) |
|
|
||||||
| +--------------+ |
|
|
||||||
| | FLU server 1 | |
|
|
||||||
| | +--------------+ |
|
|
||||||
| +--| FLU server 2 | |
|
|
||||||
| +--------------+ | In reality, API bridge talks directly
|
|
||||||
+-------------------------+ to each FLU server in a chain.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
** The notation we use
|
|
||||||
|
|
||||||
- ~N~ = the cluster namespace, chosen by the client.
|
|
||||||
- ~p~ = file prefix, chosen by the client.
|
|
||||||
- ~L~ = the cluster locator (a number, type is implementation-dependent)
|
|
||||||
- ~Map~ = a mapping of cluster locators to chains
|
|
||||||
- ~T~ = the target chain ID/name
|
|
||||||
- ~u~ = a unique opaque file name suffix, e.g. a GUID string
|
|
||||||
- ~F~ = a Machi file name, i.e., a concatenation of ~p^L^N^u~
|
|
||||||
|
|
||||||
** The details: cluster file append
|
|
||||||
|
|
||||||
0. Cluster client chooses ~N~ and ~p~ (i.e., cluster namespace and
|
|
||||||
file prefix) and sends the append request to a Machi cluster member
|
|
||||||
via the Protocol Buffers "high" API.
|
|
||||||
1. Cluster bridge chooses ~T~ (i.e., target chain), based on criteria
|
|
||||||
such as disk utilization percentage.
|
|
||||||
2. Cluster bridge knows the cluster ~Map~ for namespace ~N~.
|
|
||||||
3. Cluster bridge choose some cluster locator value ~L~ such that
|
|
||||||
~rs_hash_with_float(L,Map) = T~ (see algorithm below).
|
|
||||||
4. Cluster bridge sends its request to chain
|
|
||||||
~T~: ~append_chunk(p,L,N,...) -> {ok,p^L^N^u,ByteOffset}~
|
|
||||||
5. Cluster bridge forwards the reply tuple to the client.
|
|
||||||
6. Client stores/uses the file name ~F = p^L^N^u~.
|
|
||||||
|
|
||||||
** The details: Cluster file read
|
|
||||||
|
|
||||||
0. Cluster client sends the read request to a Machi cluster member via
|
|
||||||
the Protocol Buffers "high" API.
|
|
||||||
1. Cluster bridge parses the file name ~F~ to find
|
|
||||||
the values of ~L~ and ~N~ (recall, ~F = p^L^N^u~).
|
|
||||||
2. Cluster bridge knows the Cluster ~Map~ for type ~N~.
|
|
||||||
3. Cluster bridge calculates ~rs_hash_with_float(L,Map) = T~
|
|
||||||
4. Cluster bridge sends request to chain ~T~:
|
|
||||||
~read_chunk(F,...) ->~ ... reply
|
|
||||||
5. Cluster bridge forwards the reply to the client.
|
|
||||||
|
|
||||||
** The details: calculating 'L' (the cluster locator number) to match a desired target chain
|
|
||||||
|
|
||||||
1. We know ~Map~, the current cluster mapping for a cluster namespace ~N~.
|
|
||||||
2. We look inside of ~Map~, and we find all of the unit interval ranges
|
|
||||||
that map to our desired target chain ~T~. Let's call this list
|
|
||||||
~MapList = [Range1=(start,end],Range2=(start,end],...]~.
|
|
||||||
3. In our example, ~T=Chain2~. The example ~Map~ contains a single
|
|
||||||
unit interval range for ~Chain2~, ~[(0.33,0.58]]~.
|
|
||||||
4. Choose a uniformly random number ~r~ on the unit interval.
|
|
||||||
5. Calculate the cluster locator ~L~ by mapping ~r~ onto the concatenation
|
|
||||||
of the cluster hash space range intervals in ~MapList~. For example,
|
|
||||||
if ~r=0.5~, then ~L = 0.33 + 0.5*(0.58-0.33) = 0.455~, which is
|
|
||||||
exactly in the middle of the ~(0.33,0.58]~ interval.
|
|
||||||
|
|
||||||
** A bit more about the cluster namespaces's meaning and use
|
|
||||||
|
|
||||||
For use by Riak CS, for example, we'd likely start with the following
|
|
||||||
namespaces ... working our way down the list as we add new features
|
|
||||||
and/or re-implement existing CS features.
|
|
||||||
|
|
||||||
- "standard" = Chain length = 3, eventually consistency mode
|
|
||||||
- "reduced" = Chain length = 2, eventually consistency mode.
|
|
||||||
- "stanchion7" = Chain length = 7, strong consistency mode. Perhaps
|
|
||||||
use this namespace for the metadata required to re-implement the
|
|
||||||
operations that are performed by today's Stanchion application.
|
|
||||||
|
|
||||||
We want the cluster framework to:
|
|
||||||
|
|
||||||
- provide means of creating and managing
|
|
||||||
chains of different types, e.g., chain length, consistency mode.
|
|
||||||
- manage the mapping of cluster namespace
|
|
||||||
names to the chains in the system.
|
|
||||||
- provide query functions to map a cluster
|
|
||||||
namespace name to a cluster map,
|
|
||||||
e.g. ~get_cluster_latest_map("reduced") -> Map{generation=7,...}~.
|
|
||||||
|
|
||||||
* 8. File migration (a.k.a. rebalancing/reparitioning/resharding/redistribution)
|
|
||||||
|
|
||||||
** What is "migration"?
|
|
||||||
|
|
||||||
This section describes Machi's file migration. Other storage systems
|
|
||||||
call this process as "rebalancing", "repartitioning", "resharding" or
|
|
||||||
"redistribution".
|
|
||||||
For Riak Core applications, it is called "handoff" and "ring resizing"
|
|
||||||
(depending on the context).
|
|
||||||
See also the [[http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html#Balancer][Hadoop file balancer]] for another example of a data
|
|
||||||
migration process.
|
|
||||||
|
|
||||||
As discussed in section 5, the client can have good reason for wanting
|
|
||||||
to have some control of the initial location of the file within the
|
|
||||||
chain. However, the chain manager has an ongoing interest in
|
|
||||||
balancing resources throughout the lifetime of the file. Disks will
|
|
||||||
get full, hardware will change, read workload will fluctuate,
|
|
||||||
etc etc.
|
|
||||||
|
|
||||||
This document uses the word "migration" to describe moving data from
|
|
||||||
one Machi chain to another chain within a cluster system.
|
|
||||||
|
|
||||||
A simple variation of the Random Slicing hash algorithm can easily
|
|
||||||
accommodate Machi's need to migrate files without interfering with
|
|
||||||
availability. Machi's migration task is much simpler due to the
|
|
||||||
immutable nature of Machi file data.
|
|
||||||
|
|
||||||
** Change to Random Slicing
|
|
||||||
|
|
||||||
The map used by the Random Slicing hash algorithm needs a few simple
|
|
||||||
changes to make file migration straightforward.
|
|
||||||
|
|
||||||
- Add a "generation number", a strictly increasing number (similar to
|
|
||||||
a Machi chain's "epoch number") that reflects the history of
|
|
||||||
changes made to the Random Slicing map
|
|
||||||
- Use a list of Random Slicing maps instead of a single map, one map
|
|
||||||
per chance that files may not have been migrated yet out of
|
|
||||||
that map.
|
|
||||||
|
|
||||||
As an example:
|
|
||||||
|
|
||||||
#+CAPTION: Illustration of 'Map', using four Machi chains
|
|
||||||
|
|
||||||
[[./migration-3to4.png]]
|
|
||||||
|
|
||||||
And the new Random Slicing map for some cluster namespace ~N~ might look
|
|
||||||
like this:
|
|
||||||
|
|
||||||
| Generation number / Namespace | 7 / reduced |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| SubMap | 1 |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| Hash range | Chain ID |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| 0.00 - 0.33 | Chain1 |
|
|
||||||
| 0.33 - 0.66 | Chain2 |
|
|
||||||
| 0.66 - 1.00 | Chain3 |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| SubMap | 2 |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| Hash range | Chain ID |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| 0.00 - 0.25 | Chain1 |
|
|
||||||
| 0.25 - 0.33 | Chain4 |
|
|
||||||
| 0.33 - 0.58 | Chain2 |
|
|
||||||
| 0.58 - 0.66 | Chain4 |
|
|
||||||
| 0.66 - 0.91 | Chain3 |
|
|
||||||
| 0.91 - 1.00 | Chain4 |
|
|
||||||
|
|
||||||
When a new Random Slicing map contains a single submap, then its use
|
|
||||||
is identical to the original Random Slicing algorithm. If the map
|
|
||||||
contains multiple submaps, then the access rules change a bit:
|
|
||||||
|
|
||||||
- Write operations always go to the newest/largest submap.
|
|
||||||
- Read operations attempt to read from all unique submaps.
|
|
||||||
- Skip searching submaps that refer to the same chain ID.
|
|
||||||
- In this example, unit interval value 0.10 is mapped to Chain1
|
|
||||||
by both submaps.
|
|
||||||
- Read from newest/largest submap to oldest/smallest submap.
|
|
||||||
- If not found in any submap, search a second time (to handle races
|
|
||||||
with file copying between submaps).
|
|
||||||
- If the requested data is found, optionally copy it directly to the
|
|
||||||
newest submap. (This is a variation of read repair (RR). RR here
|
|
||||||
accelerates the migration process and can reduce the number of
|
|
||||||
operations required to query servers in multiple submaps).
|
|
||||||
|
|
||||||
The cluster manager is responsible for:
|
|
||||||
|
|
||||||
- Managing the various generations of the cluster Random Slicing maps for
|
|
||||||
all namespaces.
|
|
||||||
- Distributing namespace maps to cluster bridges.
|
|
||||||
- Managing the processes that are responsible for copying "cold" data,
|
|
||||||
i.e., files data that is not regularly accessed, to its new submap
|
|
||||||
location.
|
|
||||||
- When migration of a file to its new chain is confirmed successful,
|
|
||||||
delete it from the old chain.
|
|
||||||
|
|
||||||
In example map #7, the cluster manager will copy files with unit interval
|
|
||||||
assignments in ~(0.25,0.33]~, ~(0.58,0.66]~, and ~(0.91,1.00]~ from their
|
|
||||||
old locations in chain IDs Chain1/2/3 to their new chain,
|
|
||||||
Chain4. When the cluster manager is satisfied that all such files have
|
|
||||||
been copied to Chain4, then the cluster manager can create and
|
|
||||||
distribute a new map, such as:
|
|
||||||
|
|
||||||
| Generation number / Namespace | 8 / reduced |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| SubMap | 1 |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| Hash range | Chain ID |
|
|
||||||
|-------------------------------+-------------|
|
|
||||||
| 0.00 - 0.25 | Chain1 |
|
|
||||||
| 0.25 - 0.33 | Chain4 |
|
|
||||||
| 0.33 - 0.58 | Chain2 |
|
|
||||||
| 0.58 - 0.66 | Chain4 |
|
|
||||||
| 0.66 - 0.91 | Chain3 |
|
|
||||||
| 0.91 - 1.00 | Chain4 |
|
|
||||||
|
|
||||||
The HibariDB system performs data migrations in almost exactly this
|
|
||||||
manner. However, one important
|
|
||||||
limitation of HibariDB is not being able to
|
|
||||||
perform more than one migration at a time. HibariDB's data is
|
|
||||||
mutable. Mutation causes many problems when migrating data
|
|
||||||
across two submaps; three or more submaps was too complex to implement
|
|
||||||
quickly and correctly.
|
|
||||||
|
|
||||||
Fortunately for Machi, its file data is immutable and therefore can
|
|
||||||
easily manage many migrations in parallel, i.e., its submap list may
|
|
||||||
be several maps long, each one for an in-progress file migration.
|
|
||||||
|
|
||||||
* 9. Other considerations for FLU/sequencer implementations
|
|
||||||
|
|
||||||
** Append to existing file when possible
|
|
||||||
|
|
||||||
The sequencer should always assign new offsets to the latest/newest
|
|
||||||
file for any prefix, as long as all prerequisites are also true,
|
|
||||||
|
|
||||||
- The epoch has not changed. (In AP mode, epoch change -> mandatory
|
|
||||||
file name suffix change.)
|
|
||||||
- The cluster locator number is stable.
|
|
||||||
- The latest file for prefix ~p~ is smaller than maximum file size for
|
|
||||||
a FLU's configuration.
|
|
||||||
|
|
||||||
The stability of the cluster locator number is an implementation detail that
|
|
||||||
must be managed by the cluster bridge.
|
|
||||||
|
|
||||||
Reuse of the same file is not possible if the bridge always chooses a
|
|
||||||
different cluster locator number ~L~ or if the client always uses a unique
|
|
||||||
file prefix ~p~. The latter is a sign of a misbehaved client; the
|
|
||||||
former is a poorly-implemented bridge.
|
|
||||||
|
|
||||||
* 10. Acknowledgments
|
|
||||||
|
|
||||||
The original source for the "migration-4.png" and "migration-3to4.png" images
|
|
||||||
come from the [[http://hibari.github.io/hibari-doc/images/migration-3to4.png][HibariDB documentation]].
|
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
# Clone and compile Machi
|
|
||||||
|
|
||||||
Clone the Machi source repo and compile the source and test code. Run
|
|
||||||
the following commands at your login shell:
|
|
||||||
|
|
||||||
cd /tmp
|
|
||||||
git clone https://github.com/basho/machi.git
|
|
||||||
cd machi
|
|
||||||
git checkout master
|
|
||||||
make # or 'gmake' if GNU make uses an alternate name
|
|
||||||
|
|
||||||
Then run the unit test suite. This may take up to two minutes or so
|
|
||||||
to finish.
|
|
||||||
|
|
||||||
make test
|
|
||||||
|
|
||||||
At the end, the test suite should report that all tests passed. The
|
|
||||||
actual number of tests shown in the "All `X` tests passed" line may be
|
|
||||||
different than the example below.
|
|
||||||
|
|
||||||
[... many lines omitted ...]
|
|
||||||
module 'event_logger'
|
|
||||||
module 'chain_mgr_legacy'
|
|
||||||
=======================================================
|
|
||||||
All 90 tests passed.
|
|
||||||
|
|
||||||
If you had a test failure, a likely cause may be a limit on the number
|
|
||||||
of file descriptors available to your user process. (Recent releases
|
|
||||||
of OS X have a limit of 1024 file descriptors, which may be too slow.)
|
|
||||||
The output of the `limit -n` will tell you your file descriptor limit.
|
|
|
@ -1,38 +0,0 @@
|
||||||
## Machi developer environment prerequisites
|
|
||||||
|
|
||||||
1. Machi requires an 64-bit variant of UNIX: OS X, FreeBSD, Linux, or
|
|
||||||
Solaris machine is a standard developer environment for C and C++
|
|
||||||
applications (64-bit versions).
|
|
||||||
2. You'll need the `git` source management utility.
|
|
||||||
3. You'll need the 64-bit Erlang/OTP 17 runtime environment. Please
|
|
||||||
don't use earlier or later versions until we have a chance to fix
|
|
||||||
the compilation warnings that versions R16B and 18 will trigger.
|
|
||||||
Also, please verify that you are not using a 32-bit Erlang/OTP
|
|
||||||
runtime package.
|
|
||||||
|
|
||||||
For `git` and the Erlang runtime, please use your OS-specific
|
|
||||||
package manager to install these. If your package manager doesn't
|
|
||||||
have 64-bit Erlang/OTP version 17 available, then we recommend using the
|
|
||||||
[precompiled packages available at Erlang Solutions](https://www.erlang-solutions.com/resources/download.html).
|
|
||||||
|
|
||||||
Also, please verify that you have enough file descriptors available to
|
|
||||||
your user processes. The output of `ulimit -n` should report at least
|
|
||||||
4,000 file descriptors available. If your limit is lower (a frequent
|
|
||||||
problem for OS X users), please increase it to at least 4,000.
|
|
||||||
|
|
||||||
# Using Vagrant to set up a developer environment for Machi
|
|
||||||
|
|
||||||
The Machi source directory contains a `Vagrantfile` for creating an
|
|
||||||
Ubuntu Linux-based virtual machine for compiling and running Machi.
|
|
||||||
This file is in the
|
|
||||||
[$SRC_TOP/priv/humming-consensus-demo.vagrant](../priv/humming-consensus-demo.vagrant)
|
|
||||||
directory.
|
|
||||||
|
|
||||||
If used as-is, the virtual machine specification is modest.
|
|
||||||
|
|
||||||
* 1 virtual CPU
|
|
||||||
* 512MB virtual memory
|
|
||||||
* 768MB swap space
|
|
||||||
* 79GB sparse virtual disk image. After installing prerequisites and
|
|
||||||
compiling Machi, the root file system uses approximately 2.7 GBytes.
|
|
||||||
|
|
|
@ -1,617 +0,0 @@
|
||||||
FLU and Chain Life Cycle Management -*- mode: org; -*-
|
|
||||||
#+STARTUP: lognotedone hidestars indent showall inlineimages
|
|
||||||
#+COMMENT: To generate the outline section: egrep '^\*[*]* ' doc/flu-and-chain-lifecycle.org | egrep -v '^\* Outline' | sed -e 's/^\*\*\* / + /' -e 's/^\*\* / + /' -e 's/^\* /+ /'
|
|
||||||
|
|
||||||
* FLU and Chain Life Cycle Management
|
|
||||||
|
|
||||||
In an ideal world, we (the Machi development team) would have a full
|
|
||||||
vision of how Machi would be managed, down to the last detail of
|
|
||||||
beautiful CLI character and network protocol bit. Our vision isn't
|
|
||||||
complete yet, so we are working one small step at a time.
|
|
||||||
|
|
||||||
* Outline
|
|
||||||
|
|
||||||
+ FLU and Chain Life Cycle Management
|
|
||||||
+ Terminology review
|
|
||||||
+ Terminology: Machi run-time components/services/thingies
|
|
||||||
+ Terminology: Machi chain data structures
|
|
||||||
+ Terminology: Machi cluster data structures
|
|
||||||
+ Overview of administrative life cycles
|
|
||||||
+ Cluster administrative life cycle
|
|
||||||
+ Chain administrative life cycle
|
|
||||||
+ FLU server administrative life cycle
|
|
||||||
+ Quick admin: declarative management of Machi FLU and chain life cycles
|
|
||||||
+ Quick admin uses the "rc.d" config scheme for life cycle management
|
|
||||||
+ Quick admin's declarative "language": an Erlang-flavored AST
|
|
||||||
+ Term 'host': define a new host for FLU services
|
|
||||||
+ Term 'flu': define a new FLU
|
|
||||||
+ Term 'chain': define or reconfigure a chain
|
|
||||||
+ Executing quick admin AST files via the 'machi-admin' utility
|
|
||||||
+ Checking the syntax of an AST file
|
|
||||||
+ Executing an AST file
|
|
||||||
+ Using quick admin to manage multiple machines
|
|
||||||
+ The "rc.d" style configuration file scheme
|
|
||||||
+ Riak had a similar configuration file editing problem (and its solution)
|
|
||||||
+ Machi's "rc.d" file scheme.
|
|
||||||
+ FLU life cycle management using "rc.d" style files
|
|
||||||
+ The key configuration components of a FLU
|
|
||||||
+ Chain life cycle management using "rc.d" style files
|
|
||||||
+ The key configuration components of a chain
|
|
||||||
|
|
||||||
* Terminology review
|
|
||||||
|
|
||||||
** Terminology: Machi run-time components/services/thingies
|
|
||||||
|
|
||||||
+ FLU: a basic Machi server, responsible for managing a collection of
|
|
||||||
files.
|
|
||||||
|
|
||||||
+ Chain: a small collection of FLUs that maintain replicas of the same
|
|
||||||
collection of files. A chain is usually small, 1-3 servers, where
|
|
||||||
more than 3 would be used only in cases when availability of
|
|
||||||
certain data is critical despite failures of several machines.
|
|
||||||
+ The length of a chain is directly proportional to its
|
|
||||||
replication factor, e.g., a chain length=3 will maintain
|
|
||||||
(nominally) 3 replicas of each file.
|
|
||||||
+ To maintain file availability when ~F~ failures have occurred, a
|
|
||||||
chain must be at least ~F+1~ members long. (In comparison, the
|
|
||||||
quorum replication technique requires ~2F+1~ members in the
|
|
||||||
general case.)
|
|
||||||
|
|
||||||
+ Cluster: A collection of Machi chains that are used to store files
|
|
||||||
in a horizontally partitioned/sharded/distributed manner.
|
|
||||||
|
|
||||||
** Terminology: Machi data structures
|
|
||||||
|
|
||||||
+ Projection: used to define a single chain: the chain's consistency
|
|
||||||
mode (strong or eventual consistency), all members (from an
|
|
||||||
administrative point of view), all active members (from a runtime,
|
|
||||||
automatically-managed point of view), repairing/file-syncing members
|
|
||||||
(also runtime, auto-managed), and so on
|
|
||||||
|
|
||||||
+ Epoch: A version number of a projection. The epoch number is used
|
|
||||||
by both clients & servers to manage transitions from one projection
|
|
||||||
to another, e.g., when the chain is temporarily shortened by the
|
|
||||||
failure of a member FLU server.
|
|
||||||
|
|
||||||
** Terminology: Machi cluster data structures
|
|
||||||
|
|
||||||
+ Namespace: A collection of human-friendly names that are mapped to
|
|
||||||
groups of Machi chains that provide the same type of storage
|
|
||||||
service: consistency mode, replication policy, etc.
|
|
||||||
+ A single namespace name, e.g. ~normal-ec~, is paired with a single
|
|
||||||
cluster map (see below).
|
|
||||||
+ Example: ~normal-ec~ might be a collection of Machi chains in
|
|
||||||
eventually-consistent mode that are of length=3.
|
|
||||||
+ Example: ~risky-ec~ might be a collection of Machi chains in
|
|
||||||
eventually-consistent mode that are of length=1.
|
|
||||||
+ Example: ~mgmt-critical~ might be a collection of Machi chains in
|
|
||||||
strongly-consistent mode that are of length=7.
|
|
||||||
|
|
||||||
+ Cluster map: Encodes the rules which partition/shard/distribute
|
|
||||||
the files stored in a particular namespace across a group of chains
|
|
||||||
that collectively store the namespace's files.
|
|
||||||
|
|
||||||
+ Chain weight: A value assigned to each chain within a cluster map
|
|
||||||
structure that defines the relative storage capacity of a chain
|
|
||||||
within the namespace. For example, a chain weight=150 has 50% more
|
|
||||||
capacity than a chain weight=100.
|
|
||||||
|
|
||||||
+ Cluster map epoch: The version number assigned to a cluster map.
|
|
||||||
|
|
||||||
* Overview of administrative life cycles
|
|
||||||
|
|
||||||
** Cluster administrative life cycle
|
|
||||||
|
|
||||||
+ Cluster is first created
|
|
||||||
+ Adds namespaces (e.g. consistency policy + chain length policy) to
|
|
||||||
the cluster
|
|
||||||
+ Chains are added to/removed from a namespace to increase/decrease the
|
|
||||||
namespace's storage capacity.
|
|
||||||
+ Adjust chain weights within a namespace, e.g., to shift files
|
|
||||||
within the namespace to chains with greater storage capacity
|
|
||||||
resources and/or runtime I/O resources.
|
|
||||||
|
|
||||||
A cluster "file migration" is the process of moving files from one
|
|
||||||
namespace member chain to another for purposes of shifting &
|
|
||||||
re-balancing storage capacity and/or runtime I/O capacity.
|
|
||||||
|
|
||||||
** Chain administrative life cycle
|
|
||||||
|
|
||||||
+ A chain is created with an initial FLU membership list.
|
|
||||||
+ Chain may be administratively modified zero or more times to
|
|
||||||
add/remove member FLU servers.
|
|
||||||
+ A chain may be decommissioned.
|
|
||||||
|
|
||||||
See also: http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html
|
|
||||||
|
|
||||||
** FLU server administrative life cycle
|
|
||||||
|
|
||||||
+ A FLU is created after an administrator chooses the FLU's runtime
|
|
||||||
location is selected by the administrator: which machine/virtual
|
|
||||||
machine, IP address and TCP port allocation, etc.
|
|
||||||
+ An unassigned FLU may be added to a chain by chain administrative
|
|
||||||
policy.
|
|
||||||
+ A FLU that is assigned to a chain may be removed from that chain by
|
|
||||||
chain administrative policy.
|
|
||||||
+ In the current implementation, the FLU's Erlang processes will be
|
|
||||||
halted. Then the FLU's data and metadata files will be moved to
|
|
||||||
another area of the disk for safekeeping. Later, a "garbage
|
|
||||||
collection" process can be used for reclaiming disk space used by
|
|
||||||
halted FLU servers.
|
|
||||||
|
|
||||||
See also: http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html
|
|
||||||
|
|
||||||
* Quick admin: declarative management of Machi FLU and chain life cycles
|
|
||||||
|
|
||||||
The "quick admin" scheme is a temporary (?) tool for managing Machi
|
|
||||||
FLU server and chain life cycles in a declarative manner. The API is
|
|
||||||
described in this section.
|
|
||||||
|
|
||||||
** Quick admin uses the "rc.d" config scheme for life cycle management
|
|
||||||
|
|
||||||
As described at the top of
|
|
||||||
http://basho.github.io/machi/edoc/machi_lifecycle_mgr.html, the "rc.d"
|
|
||||||
config files do not manage "policy". "Policy" is doing the right
|
|
||||||
thing with a Machi cluster from a systems administrator's
|
|
||||||
point of view. The "rc.d" config files can only implement decisions
|
|
||||||
made according to policy.
|
|
||||||
|
|
||||||
The "quick admin" tool is a first attempt at automating policy
|
|
||||||
decisions in a safe way (we hope) that is also easy to implement (we
|
|
||||||
hope) with a variety of systems management tools, e.g. Chef, Puppet,
|
|
||||||
Ansible, Saltstack, or plain-old-human-at-a-keyboard.
|
|
||||||
|
|
||||||
** Quick admin's declarative "language": an Erlang-flavored AST
|
|
||||||
|
|
||||||
The "language" that an administrator uses to express desired policy
|
|
||||||
changes is not (yet) a true language. As a quick implementation hack,
|
|
||||||
the current language is an Erlang-flavored abstract syntax tree
|
|
||||||
(AST). The tree isn't very deep, either, frequently just one
|
|
||||||
element tall. (Not much of a tree, is it?)
|
|
||||||
|
|
||||||
There are three terms in the language currently:
|
|
||||||
|
|
||||||
+ ~host~, define a new host that can execute FLU servers
|
|
||||||
+ ~flu~, define a new FLU
|
|
||||||
+ ~chain~, define a new chain or re-configure an existing chain with
|
|
||||||
the same name
|
|
||||||
|
|
||||||
*** Term 'host': define a new host for FLU services
|
|
||||||
|
|
||||||
In this context, a host is a machine, virtual machine, or container
|
|
||||||
that can execute the Machi application and can therefore provide FLU
|
|
||||||
services, i.e. file service, Humming Consensus management.
|
|
||||||
|
|
||||||
Two formats may be used to define a new host:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{host, Name, Props}.
|
|
||||||
{host, Name, AdminI, ClientI, Props}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
The shorter tuple is shorthand notation for the latter. If the
|
|
||||||
shorthand form is used, then it will be converted automatically to the
|
|
||||||
long form as:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{host, Name, AdminI=Name, ClientI=Name, Props}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
Type information, description, and restrictions:
|
|
||||||
|
|
||||||
+ ~Name::string()~ The ~Name~ attribute must be unique. Note that it
|
|
||||||
is possible to define two different hosts, one using a DNS hostname
|
|
||||||
and one using an IP address. The user must avoid this
|
|
||||||
double-definition because it is not enforced by quick admin.
|
|
||||||
+ The ~Name~ field is used for cross-reference purposes with other
|
|
||||||
terms, e.g., ~flu~ and ~chain~.
|
|
||||||
+ There is no syntax yet for removing a host definition.
|
|
||||||
|
|
||||||
+ ~AdminI::string()~ A DNS hostname or IP address for cluster
|
|
||||||
administration purposes, e.g. SSH access.
|
|
||||||
+ This field is unused at the present time.
|
|
||||||
|
|
||||||
+ ~ClientI::string()~ A DNS hostname or IP address for Machi's client
|
|
||||||
protocol access, e.g., Protocol Buffers network API service.
|
|
||||||
+ This field is unused at the present time.
|
|
||||||
|
|
||||||
+ ~props::proplist()~ is an Erlang-style property list for specifying
|
|
||||||
additional configuration options, debugging information, sysadmin
|
|
||||||
comments, etc.
|
|
||||||
|
|
||||||
+ A full-featured admin tool should also include managing several
|
|
||||||
other aspects of configuration related to a "host". For example,
|
|
||||||
for any single IP address, quick admin assumes that there will be
|
|
||||||
exactly one Erlang VM that is running the Machi application. Of
|
|
||||||
course, it is possible to have dozens of Erlang VMs on the same
|
|
||||||
(let's assume for clarity) hardware machine and all running Machi
|
|
||||||
... but there are additional aspects of such a machine that quick
|
|
||||||
admin does not account for
|
|
||||||
+ multiple IP addresses per machine
|
|
||||||
+ multiple Machi package installation paths
|
|
||||||
+ multiple Machi config files (e.g. cuttlefish config, ~etc.conf~,
|
|
||||||
~vm.args~)
|
|
||||||
+ multiple data directories/file system mount points
|
|
||||||
+ This is also a management problem for quick admin for a single
|
|
||||||
Machi package on a machine to take advantage of bulk data
|
|
||||||
storage using multiple multiple file system mount points.
|
|
||||||
+ multiple Erlang VM host names, required for distributed Erlang,
|
|
||||||
which is used for communication with ~machi~ and ~machi-admin~
|
|
||||||
command line utilities.
|
|
||||||
+ and others....
|
|
||||||
|
|
||||||
*** Term 'flu': define a new FLU
|
|
||||||
|
|
||||||
A new FLU is defined relative to a previously-defined ~host~ entities;
|
|
||||||
an exception will be thrown if the ~host~ cannot be cross-referenced.
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{flu, Name, HostName, Port, Props}
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
Type information, description, and restrictions:
|
|
||||||
|
|
||||||
+ ~Name::atom()~ The name of the FLU, as a human-friendly name and
|
|
||||||
also for internal management use; please note the ~atom()~ type.
|
|
||||||
This name must be unique.
|
|
||||||
+ The ~Name~ field is used for cross-reference purposes with the
|
|
||||||
~chain~ term.
|
|
||||||
+ There is no syntax yet for removing a FLU definition.
|
|
||||||
|
|
||||||
+ ~Hostname::string()~ The cross-reference name of the ~host~ that
|
|
||||||
this FLU should run on.
|
|
||||||
|
|
||||||
+ ~Port::non_neg_integer()~ The TCP port used by this FLU server's
|
|
||||||
Protocol Buffers network API listener service
|
|
||||||
|
|
||||||
+ ~props::proplist()~ is an Erlang-style property list for specifying
|
|
||||||
additional configuration options, debugging information, sysadmin
|
|
||||||
comments, etc.
|
|
||||||
|
|
||||||
*** Term 'chain': define or reconfigure a chain
|
|
||||||
|
|
||||||
A chain is defined relative to zero or more previously-defined ~flu~
|
|
||||||
entities; an exception will be thrown if any ~flu~ cannot be
|
|
||||||
cross-referenced.
|
|
||||||
|
|
||||||
Two formats may be used to define/reconfigure a chain:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{chain, Name, FullList, Props}.
|
|
||||||
{chain, Name, CMode, FullList, Witnesses, Props}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
The shorter tuple is shorthand notation for the latter. If the
|
|
||||||
shorthand form is used, then it will be converted automatically to the
|
|
||||||
long form as:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{chain, Name, ap_mode, FullList, [], Props}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
Type information, description, and restrictions:
|
|
||||||
|
|
||||||
+ ~Name::atom()~ The name of the chain, as a human-friendly name and
|
|
||||||
also for internal management use; please note the ~atom()~ type.
|
|
||||||
This name must be unique.
|
|
||||||
+ There is no syntax yet for removing a chain definition.
|
|
||||||
|
|
||||||
+ ~CMode::'ap_mode'|'cp_mode'~ Defines the consistency mode of the
|
|
||||||
chain, either eventual consistency or strong consistency,
|
|
||||||
respectively.
|
|
||||||
+ A chain cannot change consistency mode, e.g., from
|
|
||||||
strong~->~eventual consistency.
|
|
||||||
|
|
||||||
+ ~FullList::list(atom())~ Specifies the list of full-service FLU
|
|
||||||
servers, i.e. servers that provide file data & metadata services as
|
|
||||||
well as Humming Consensus. Each atom in the list must
|
|
||||||
cross-reference with a previously defined ~chain~; an exception will
|
|
||||||
be thrown if any ~flu~ cannot be cross-referenced.
|
|
||||||
|
|
||||||
+ ~Witnesses::list(atom())~ Specifies the list of witness-only
|
|
||||||
servers, i.e. servers that only participate in Humming Consensus.
|
|
||||||
Each atom in the list must cross-reference with a previously defined
|
|
||||||
~chain~; an exception will be thrown if any ~flu~ cannot be
|
|
||||||
cross-referenced.
|
|
||||||
+ This list must be empty for eventual consistency chains.
|
|
||||||
|
|
||||||
+ ~props::proplist()~ is an Erlang-style property list for specifying
|
|
||||||
additional configuration options, debugging information, sysadmin
|
|
||||||
comments, etc.
|
|
||||||
|
|
||||||
+ If this term specifies a new ~chain~ name, then all of the member
|
|
||||||
FLU servers (full & witness types) will be bootstrapped to a
|
|
||||||
starting configuration.
|
|
||||||
|
|
||||||
+ If this term specifies a previously-defined ~chain~ name, then all
|
|
||||||
of the member FLU servers (full & witness types, respectively) will
|
|
||||||
be adjusted to add or remove members, as appropriate.
|
|
||||||
+ Any FLU servers added to either list must not be assigned to any
|
|
||||||
other chain, or they must be a member of this specific chain.
|
|
||||||
+ Any FLU servers removed from either list will be halted.
|
|
||||||
(See the "FLU server administrative life cycle" section above.)
|
|
||||||
|
|
||||||
** Executing quick admin AST files via the 'machi-admin' utility
|
|
||||||
|
|
||||||
Examples of quick admin AST files can be found in the
|
|
||||||
~priv/quick-admin/examples~ directory. Below is an example that will
|
|
||||||
define a new host ( ~"localhost"~ ), three new FLU servers ( ~f1~ & ~f2~
|
|
||||||
and ~f3~ ), and an eventually consistent chain ( ~c1~ ) that uses the new
|
|
||||||
FLU servers:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{host, "localhost", []}.
|
|
||||||
{flu,f1,"localhost",20401,[]}.
|
|
||||||
{flu,f2,"localhost",20402,[]}.
|
|
||||||
{flu,f3,"localhost",20403,[]}.
|
|
||||||
{chain,c1,[f1,f2,f3],[]}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
*** Checking the syntax of an AST file
|
|
||||||
|
|
||||||
Given an AST config file, ~/path/to/ast/file~, its basic syntax and
|
|
||||||
correctness can be checked without executing it.
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
./rel/machi/bin/machi-admin quick-admin-check /path/to/ast/file
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
+ The utility will exit with status zero and output ~ok~ if the syntax
|
|
||||||
and proposed configuration appears to be correct.
|
|
||||||
+ If there is an error, the utility will exit with status one, and an
|
|
||||||
error message will be printed.
|
|
||||||
|
|
||||||
*** Executing an AST file
|
|
||||||
|
|
||||||
Given an AST config file, ~/path/to/ast/file~, it can be executed
|
|
||||||
using the command:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
./rel/machi/bin/machi-admin quick-admin-apply /path/to/ast/file RelativeHost
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
... where the last argument, ~RelativeHost~, should be the exact
|
|
||||||
spelling of one of the previously defined AST ~host~ entities,
|
|
||||||
*and also* is the same host that the ~machi-admin~ utility is being
|
|
||||||
executed on.
|
|
||||||
|
|
||||||
Restrictions and warnings:
|
|
||||||
|
|
||||||
+ This is alpha quality software.
|
|
||||||
|
|
||||||
+ There is no "undo".
|
|
||||||
+ Of course there is, but you need to resort to doing things like
|
|
||||||
using ~machi attach~ to attach to the server's CLI to then execute
|
|
||||||
magic Erlang incantations to stop FLUs, unconfigure chains, etc.
|
|
||||||
+ Oh, and delete some files with magic paths, also.
|
|
||||||
|
|
||||||
** Using quick admin to manage multiple machines
|
|
||||||
|
|
||||||
A quick sketch follows:
|
|
||||||
|
|
||||||
1. Create the AST file to specify all of the changes that you wish to
|
|
||||||
make to all hosts, FLUs, and/or chains, e.g., ~/tmp/ast.txt~.
|
|
||||||
2. Check the basic syntax with the ~quick-admin-check~ argument to
|
|
||||||
~machi-admin~.
|
|
||||||
3. If the syntax is good, then copy ~/tmp/ast.txt~ to all hosts in the
|
|
||||||
cluster, using the same path, ~/tmp/ast.txt~.
|
|
||||||
4. For each machine in the cluster, run:
|
|
||||||
#+BEGIN_SRC
|
|
||||||
./rel/machi/bin/machi-admin quick-admin-apply /tmp/ast.txt RelativeHost
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
... where RelativeHost is the AST ~host~ name of the machine that you
|
|
||||||
are executing the ~machi-admin~ command on. The command should be
|
|
||||||
successful, with exit status 0 and outputting the string ~ok~.
|
|
||||||
|
|
||||||
Finally, for each machine in the cluster, a listing of all files in
|
|
||||||
the directory ~rel/machi/etc/quick-admin-archive~ should show exactly
|
|
||||||
the same files, one for each time that ~quick-admin-apply~ has been
|
|
||||||
run successfully on that machine.
|
|
||||||
|
|
||||||
* The "rc.d" style configuration file scheme
|
|
||||||
|
|
||||||
This configuration scheme is inspired by BSD UNIX's ~init(8)~ process
|
|
||||||
manager's configuration style, called "rc.d" after the name of the
|
|
||||||
directory where these files are stored, ~/etc/rc.d~. The ~init~
|
|
||||||
process is responsible for (among other things) starting UNIX
|
|
||||||
processes at machine boot time and stopping them when the machine is
|
|
||||||
shut down.
|
|
||||||
|
|
||||||
The original scheme used by ~init~ to start processes at boot time was
|
|
||||||
a single Bourne shell script called ~/etc/rc~. When a new software
|
|
||||||
package was installed that required a daemon to be started at boot
|
|
||||||
time, text was added to the ~/etc/rc~ file. Uninstalling packages was
|
|
||||||
much trickier, because it meant removing lines from a file that
|
|
||||||
*is a computer program (run by the Bourne shell, a Turing-complete
|
|
||||||
programming language)*. Error-free editing of the ~/etc/rc~ script
|
|
||||||
was impossible in all cases.
|
|
||||||
|
|
||||||
Later, ~init~'s configuration was split into a few master Bourne shell
|
|
||||||
scripts and a subdirectory, ~/etc/rc.d~. The subdirectory contained
|
|
||||||
shell scripts that were responsible for boot time starting of a single
|
|
||||||
daemon or service, e.g. NFS or an HTTP server. When a new software
|
|
||||||
package was added, a new file was added to the ~rc.d~ subdirectory.
|
|
||||||
When a package was removed, the corresponding file in ~rc.d~ was
|
|
||||||
removed. With this simple scheme, addition & removal of boot time
|
|
||||||
scripts was vastly simplified.
|
|
||||||
|
|
||||||
** Riak had a similar configuration file editing problem (and its solution)
|
|
||||||
|
|
||||||
Another software product from Basho Technologies, Riak, had a similar
|
|
||||||
configuration file editing problem. One file in particular,
|
|
||||||
~app.config~, had a syntax that made it difficult both for human
|
|
||||||
systems administrators and also computer programs to edit the file in
|
|
||||||
a syntactically correct manner.
|
|
||||||
|
|
||||||
Later releases of Riak switched to an alternative configuration file
|
|
||||||
format, one inspired by the BSD UNIX ~sysctl(8)~ utility and
|
|
||||||
~sysctl.conf(5)~ file syntax. The ~sysctl.conf~ format is much easier
|
|
||||||
to manage by computer programs to add items. Removing items is not
|
|
||||||
100% simple, however: the correct lines must be identified and then
|
|
||||||
removed (e.g. with Perl or a text editor or combination of ~grep -v~
|
|
||||||
and ~mv~), but removing any comment lines that "belong" to the removed
|
|
||||||
config item(s) is not any easy for a 1-line shell script to do 100%
|
|
||||||
correctly.
|
|
||||||
|
|
||||||
Machi will use the ~sysctl.conf~ style configuration for some
|
|
||||||
application configuration variables. However, adding & removing FLUs
|
|
||||||
and chains will be managed using the "rc.d" style because of the
|
|
||||||
"rc.d" scheme's simplicity and tolerance of mistakes by administrators
|
|
||||||
(human or computer).
|
|
||||||
|
|
||||||
** Machi's "rc.d" file scheme.
|
|
||||||
|
|
||||||
Machi will use a single subdirectory that will contain configuration
|
|
||||||
files for some life cycle management task, e.g. a single FLU or a
|
|
||||||
single chain.
|
|
||||||
|
|
||||||
The contents of the file should be a single Erlang term, serialized in
|
|
||||||
ASCII form as Erlang source code statement, i.e. a single Erlang term
|
|
||||||
~T~ that is formatted by ~io:format("~w.",[T]).~. This file must be
|
|
||||||
parseable by the Erlang function ~file:consult()~.
|
|
||||||
|
|
||||||
Later versions of Machi may change the file format to be more familiar
|
|
||||||
to administrators who are unaccustomed to Erlang language syntax.
|
|
||||||
|
|
||||||
** FLU life cycle management using "rc.d" style files
|
|
||||||
|
|
||||||
*** The key configuration components of a FLU
|
|
||||||
|
|
||||||
1. The machine (or virtual machine) to run it on.
|
|
||||||
2. The Machi software package's artifacts to execute.
|
|
||||||
3. The disk device(s) used to store Machi file data & metadata, "rc.d"
|
|
||||||
style config files, etc.
|
|
||||||
4. The name, IP address and TCP port assigned to the FLU service.
|
|
||||||
5. Its chain assignment.
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
|
|
||||||
+ Items 1-3 are currently outside of the scope of this life cycle
|
|
||||||
document. We assume that human administrators know how to do these
|
|
||||||
things.
|
|
||||||
+ Item 4's properties are explicitly managed by a FLU-defining "rc.d"
|
|
||||||
style config file.
|
|
||||||
+ Item 5 is managed by the chain life cycle management system.
|
|
||||||
|
|
||||||
Here is an example of a properly formatted FLU config file:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{p_srvr,f1,machi_flu1_client,"192.168.72.23",20401,[]}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
... which corresponds to the following Erlang record definition:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
-record(p_srvr, {
|
|
||||||
name :: atom(),
|
|
||||||
proto_mod = 'machi_flu1_client' :: atom(), % Module name
|
|
||||||
address :: term(), % Protocol-specific
|
|
||||||
port :: term(), % Protocol-specific
|
|
||||||
props = [] :: list() % proplist for other related info
|
|
||||||
}).
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
+ ~name~ is ~f1~. This is name of the FLU. This name should be
|
|
||||||
unique over the lifetime of the administrative domain and thus
|
|
||||||
managed by external policy. This name must be the same as the name
|
|
||||||
of the config file that defines the FLU.
|
|
||||||
+ ~proto_mod~ is used for internal management purposes and should be
|
|
||||||
considered a mandatory constant.
|
|
||||||
+ ~address~ is "192.168.72.23". The DNS hostname or IP address used
|
|
||||||
by other servers to communicate with this FLU. This must be a valid
|
|
||||||
IP address, previously assigned to this machine/VM using the
|
|
||||||
appropriate operating system-specific procedure.
|
|
||||||
+ ~port~ is TCP port 20401. The TCP port number that the FLU listens
|
|
||||||
to for incoming Protocol Buffers-serialized communication. This TCP
|
|
||||||
port must not be in use (now or in the future) by another Machi FLU
|
|
||||||
or any other process running on this machine/VM.
|
|
||||||
+ ~props~ is an Erlang-style property list for specifying additional
|
|
||||||
configuration options, debugging information, sysadmin comments,
|
|
||||||
etc.
|
|
||||||
|
|
||||||
** Chain life cycle management using "rc.d" style files
|
|
||||||
|
|
||||||
Unlike FLUs, chains have a self-management aspect that makes a chain
|
|
||||||
life cycle different from a single FLU server. Machi's chains are
|
|
||||||
self-managing, via Humming Consensus; see the
|
|
||||||
https://github.com/basho/machi/tree/master/doc/ directory for much
|
|
||||||
more detail about Humming Consensus. After FLUs have received their
|
|
||||||
initial chain configuration for Humming Consensus, the FLUs will
|
|
||||||
manage the chain (and each other) by themselves.
|
|
||||||
|
|
||||||
However, Humming Consensus does not handle three chain management
|
|
||||||
problems:
|
|
||||||
|
|
||||||
1. Specifying the very first chain configuration,
|
|
||||||
2. Altering the membership of the chain (i.e. adding/removing FLUs
|
|
||||||
from the chain),
|
|
||||||
3. Stopping the chain permanently.
|
|
||||||
|
|
||||||
A chain "rc.d" file will only be used to bootstrap a newly-defined FLU
|
|
||||||
server. It's like a piece of glue information to introduce the new
|
|
||||||
FLU to the Humming Consensus group that is managing the chain's
|
|
||||||
dynamic state (e.g. which members are up or down). In all other
|
|
||||||
respects, chain config files are ignored by life cycle management code.
|
|
||||||
However, to mimic the life cycle of the FLU server's "rc.d" config
|
|
||||||
files, a chain "rc.d" files is not deleted until the chain has been
|
|
||||||
decommissioned (i.e. defined with length=0).
|
|
||||||
|
|
||||||
*** The key configuration components of a chain
|
|
||||||
|
|
||||||
1. The name of the chain.
|
|
||||||
2. Consistency mode: eventually consistent or strongly consistent.
|
|
||||||
3. The membership list of all FLU servers in the chain.
|
|
||||||
+ Remember, all servers in a single chain will manage full replicas
|
|
||||||
of the same collection of Machi files.
|
|
||||||
4. If the chain is defined to use strongly consistent mode, then a
|
|
||||||
list of "witness servers" may also be defined. See the
|
|
||||||
[https://github.com/basho/machi/tree/master/doc/] documentation for
|
|
||||||
more information on witness servers.
|
|
||||||
+ The witness list must be empty for all chains in eventual
|
|
||||||
consistency mode.
|
|
||||||
|
|
||||||
Here is an example of a properly formatted chain config file:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
{chain_def_v1,c1,ap_mode,
|
|
||||||
[{p_srvr,f1,machi_flu1_client,"localhost",20401,[]},
|
|
||||||
{p_srvr,f2,machi_flu1_client,"localhost",20402,[]},
|
|
||||||
{p_srvr,f3,machi_flu1_client,"localhost",20403,[]}],
|
|
||||||
[],[],[],
|
|
||||||
[f1,f2,f3],
|
|
||||||
[],[]}.
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
... which corresponds to the following Erlang record definition:
|
|
||||||
|
|
||||||
#+BEGIN_SRC
|
|
||||||
-record(chain_def_v1, {
|
|
||||||
name :: atom(), % chain name
|
|
||||||
mode :: 'ap_mode' | 'cp_mode',
|
|
||||||
full = [] :: [p_srvr()],
|
|
||||||
witnesses = [] :: [p_srvr()],
|
|
||||||
old_full = [] :: [atom()], % guard against some races
|
|
||||||
old_witnesses=[] :: [atom()], % guard against some races
|
|
||||||
local_run = [] :: [atom()], % must be tailored to each machine!
|
|
||||||
local_stop = [] :: [atom()], % must be tailored to each machine!
|
|
||||||
props = [] :: list() % proplist for other related info
|
|
||||||
}).
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
+ ~name~ is ~c1~, the name of the chain. This name should be unique
|
|
||||||
over the lifetime of the administrative domain and thus managed by
|
|
||||||
external policy. This name must be the same as the name of the
|
|
||||||
config file that defines the chain.
|
|
||||||
+ ~mode~ is ~ap_mode~, an internal code symbol for eventual
|
|
||||||
consistency mode.
|
|
||||||
+ ~full~ is a list of Erlang ~#p_srvr{}~ records for full-service
|
|
||||||
members of the chain, i.e., providing Machi file data & metadata
|
|
||||||
storage services.
|
|
||||||
+ ~witnesses~ is a list of Erlang ~#p_srvr{}~ records for witness-only
|
|
||||||
FLU servers, i.e., providing only Humming Consensus service.
|
|
||||||
+ The next four fields are used for internal management only.
|
|
||||||
+ ~props~ is an Erlang-style property list for specifying additional
|
|
||||||
configuration options, debugging information, sysadmin comments,
|
|
||||||
etc.
|
|
||||||
|
|
Binary file not shown.
|
@ -1,372 +0,0 @@
|
||||||
|
|
||||||
# Table of contents
|
|
||||||
|
|
||||||
* [Hands-on experiments with Machi and Humming Consensus](#hands-on)
|
|
||||||
* [Using the network partition simulator and convergence demo test code](#partition-simulator)
|
|
||||||
|
|
||||||
<a name="hands-on">
|
|
||||||
# Hands-on experiments with Machi and Humming Consensus
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
Please refer to the
|
|
||||||
[Machi development environment prerequisites doc](./dev-prerequisites.md)
|
|
||||||
for Machi developer environment prerequisites.
|
|
||||||
|
|
||||||
If you do not have an Erlang/OTP runtime system available, but you do
|
|
||||||
have [the Vagrant virtual machine](https://www.vagrantup.com/) manager
|
|
||||||
available, then please refer to the instructions in the prerequisites
|
|
||||||
doc for using Vagrant.
|
|
||||||
|
|
||||||
<a name="clone-compile">
|
|
||||||
## Clone and compile the code
|
|
||||||
|
|
||||||
Please see the
|
|
||||||
[Machi 'clone and compile' doc](./dev-clone-compile.md)
|
|
||||||
for the short list of steps required to fetch the Machi source code
|
|
||||||
from GitHub and to compile & test Machi.
|
|
||||||
|
|
||||||
## Running three Machi instances on a single machine
|
|
||||||
|
|
||||||
All of the commands that should be run at your login shell (e.g. Bash,
|
|
||||||
c-shell) can be cut-and-pasted from this document directly to your
|
|
||||||
login shell prompt.
|
|
||||||
|
|
||||||
Run the following command:
|
|
||||||
|
|
||||||
make stagedevrel
|
|
||||||
|
|
||||||
This will create a directory structure like this:
|
|
||||||
|
|
||||||
|-dev1-|... stand-alone Machi app + subdirectories
|
|
||||||
|-dev-|-dev2-|... stand-alone Machi app + directories
|
|
||||||
|-dev3-|... stand-alone Machi app + directories
|
|
||||||
|
|
||||||
Each of the `dev/dev1`, `dev/dev2`, and `dev/dev3` are stand-alone
|
|
||||||
application instances of Machi and can be run independently of each
|
|
||||||
other on the same machine. This demo will use all three.
|
|
||||||
|
|
||||||
The lifecycle management utilities for Machi are a bit immature,
|
|
||||||
currently. They assume that each Machi server runs on a host with a
|
|
||||||
unique hostname -- there is no flexibility built-in yet to easily run
|
|
||||||
multiple Machi instances on the same machine. To continue with the
|
|
||||||
demo, we need to use `sudo` or `su` to obtain superuser privileges to
|
|
||||||
edit the `/etc/hosts` file.
|
|
||||||
|
|
||||||
Please add the following line to `/etc/hosts`, using this command:
|
|
||||||
|
|
||||||
sudo sh -c 'echo "127.0.0.1 machi1 machi2 machi3" >> /etc/hosts'
|
|
||||||
|
|
||||||
Next, we will use a shell script to finish setting up our cluster. It
|
|
||||||
will do the following for us:
|
|
||||||
|
|
||||||
* Verify that the new line that was added to `/etc/hosts` is correct.
|
|
||||||
* Modify the `etc/app.config` files to configure the Humming Consensus
|
|
||||||
chain manager's actions logged to the `log/console.log` file.
|
|
||||||
* Start the three application instances.
|
|
||||||
* Verify that the three instances are running correctly.
|
|
||||||
* Configure a single chain, with one FLU server per application
|
|
||||||
instance.
|
|
||||||
|
|
||||||
Please run this script using this command:
|
|
||||||
|
|
||||||
./priv/humming-consensus-demo.setup.sh
|
|
||||||
|
|
||||||
If the output looks like this (and exits with status zero), then the
|
|
||||||
script was successful.
|
|
||||||
|
|
||||||
Step: Verify that the required entries in /etc/hosts are present
|
|
||||||
Step: add a verbose logging option to app.config
|
|
||||||
Step: start three three Machi application instances
|
|
||||||
pong
|
|
||||||
pong
|
|
||||||
pong
|
|
||||||
Step: configure one chain to start a Humming Consensus group with three members
|
|
||||||
Result: ok
|
|
||||||
Result: ok
|
|
||||||
Result: ok
|
|
||||||
|
|
||||||
We have now created a single replica chain, called `c1`, that has
|
|
||||||
three file servers participating in the chain. Thanks to the
|
|
||||||
hostnames that we added to `/etc/hosts`, all are using the localhost
|
|
||||||
network interface.
|
|
||||||
|
|
||||||
| App instance | Pseudo | FLU name | TCP port |
|
|
||||||
| directory | Hostname | | number |
|
|
||||||
|--------------+----------+----------+----------|
|
|
||||||
| dev1 | machi1 | flu1 | 20401 |
|
|
||||||
| dev2 | machi2 | flu2 | 20402 |
|
|
||||||
| dev3 | machi3 | flu3 | 20403 |
|
|
||||||
|
|
||||||
The log files for each application instance can be found in the
|
|
||||||
`./dev/devN/log/console.log` file, where the `N` is the instance
|
|
||||||
number: 1, 2, or 3.
|
|
||||||
|
|
||||||
## Understanding the chain manager's log file output
|
|
||||||
|
|
||||||
After running the `./priv/humming-consensus-demo.setup.sh` script,
|
|
||||||
let's look at the last few lines of the `./dev/dev1/log/console.log`
|
|
||||||
log file for Erlang VM process #1.
|
|
||||||
|
|
||||||
2016-03-09 10:16:35.676 [info] <0.105.0>@machi_lifecycle_mgr:process_pending_flu:422 Started FLU f1 with supervisor pid <0.128.0>
|
|
||||||
2016-03-09 10:16:35.676 [info] <0.105.0>@machi_lifecycle_mgr:move_to_flu_config:540 Creating FLU config file f1
|
|
||||||
2016-03-09 10:16:35.790 [info] <0.105.0>@machi_lifecycle_mgr:bootstrap_chain2:312 Configured chain c1 via FLU f1 to mode=ap_mode all=[f1,f2,f3] witnesses=[]
|
|
||||||
2016-03-09 10:16:35.790 [info] <0.105.0>@machi_lifecycle_mgr:move_to_chain_config:546 Creating chain config file c1
|
|
||||||
2016-03-09 10:16:44.139 [info] <0.132.0> CONFIRM epoch 1141 <<155,42,7,221>> upi [] rep [] auth f1 by f1
|
|
||||||
2016-03-09 10:16:44.271 [info] <0.132.0> CONFIRM epoch 1148 <<57,213,154,16>> upi [f1] rep [] auth f1 by f1
|
|
||||||
2016-03-09 10:16:44.864 [info] <0.132.0> CONFIRM epoch 1151 <<239,29,39,70>> upi [f1] rep [f3] auth f1 by f1
|
|
||||||
2016-03-09 10:16:45.235 [info] <0.132.0> CONFIRM epoch 1152 <<173,17,66,225>> upi [f2] rep [f1,f3] auth f2 by f1
|
|
||||||
2016-03-09 10:16:47.343 [info] <0.132.0> CONFIRM epoch 1154 <<154,231,224,149>> upi [f2,f1,f3] rep [] auth f2 by f1
|
|
||||||
|
|
||||||
Let's pick apart some of these lines. We have started all three
|
|
||||||
servers at about the same time. We see some race conditions happen,
|
|
||||||
and some jostling and readjustment happens pretty quickly in the first
|
|
||||||
few seconds.
|
|
||||||
|
|
||||||
* `Started FLU f1 with supervisor pid <0.128.0>`
|
|
||||||
* This VM, #1,
|
|
||||||
started a FLU (Machi data server) with the name `f1`. In the Erlang
|
|
||||||
process supervisor hierarchy, the process ID of the top supervisor
|
|
||||||
is `<0.128.0>`.
|
|
||||||
* `Configured chain c1 via FLU f1 to mode=ap_mode all=[f1,f2,f3] witnesses=[]`
|
|
||||||
* A bootstrap configuration for a chain named `c1` has been created.
|
|
||||||
* The FLUs/data servers that are eligible for participation in the
|
|
||||||
chain have names `f1`, `f2`, and `f3`.
|
|
||||||
* The chain will operate in eventual consistency mode (`ap_mode`)
|
|
||||||
* The witness server list is empty. Witness servers are never used
|
|
||||||
in eventual consistency mode.
|
|
||||||
* `CONFIRM epoch 1141 <<155,42,7,221>> upi [] rep [] auth f1 by f1`
|
|
||||||
* All participants in epoch 1141 are unanimous in adopting epoch
|
|
||||||
1141's projection. All active membership lists are empty, so
|
|
||||||
there is no functional chain replication yet, at least as far as
|
|
||||||
server `f1` knows
|
|
||||||
* The epoch's abbreviated checksum is `<<155,42,7,221>>`.
|
|
||||||
* The UPI list, i.e. the replicas whose data is 100% in sync is
|
|
||||||
`[]`, the empty list. (UPI = Update Propagation Invariant)
|
|
||||||
* The list of servers that are under data repair (`rep`) is also
|
|
||||||
empty, `[]`.
|
|
||||||
* This projection was authored by server `f1`.
|
|
||||||
* The log message was generated by server `f1`.
|
|
||||||
* `CONFIRM epoch 1148 <<57,213,154,16>> upi [f1] rep [] auth f1 by f1`
|
|
||||||
* Now the server `f1` has created a chain of length 1, `[f1]`.
|
|
||||||
* Chain repair/file re-sync is not required when the UPI server list
|
|
||||||
changes from length 0 -> 1.
|
|
||||||
* `CONFIRM epoch 1151 <<239,29,39,70>> upi [f1] rep [f3] auth f1 by f1`
|
|
||||||
* Server `f1` has noticed that server `f3` is alive. Apparently it
|
|
||||||
has not yet noticed that server `f2` is also running.
|
|
||||||
* Server `f3` is in the repair list.
|
|
||||||
* `CONFIRM epoch 1152 <<173,17,66,225>> upi [f2] rep [f1,f3] auth f2 by f1`
|
|
||||||
* Server `f2` is apparently now aware that all three servers are running.
|
|
||||||
* The previous configuration used by `f2` was `upi [f2]`, i.e., `f2`
|
|
||||||
was running in a chain of one. `f2` noticed that `f1` and `f3`
|
|
||||||
were now available and has started adding them to the chain.
|
|
||||||
* All new servers are always added to the tail of the chain in the
|
|
||||||
repair list.
|
|
||||||
* In eventual consistency mode, a UPI change like this is OK.
|
|
||||||
* When performing a read, a client must read from both tail of the
|
|
||||||
UPI list and also from all repairing servers.
|
|
||||||
* When performing a write, the client writes to both the UPI
|
|
||||||
server list and also the repairing list, in that order.
|
|
||||||
* I.e., the client concatenates both lists,
|
|
||||||
`UPI ++ Repairing`, for its chain configuration for the write.
|
|
||||||
* Server `f2` will trigger file repair/re-sync shortly.
|
|
||||||
* The waiting time for starting repair has been configured to be
|
|
||||||
extremely short, 1 second. The default waiting time is 10
|
|
||||||
seconds, in case Humming Consensus remains unstable.
|
|
||||||
* `CONFIRM epoch 1154 <<154,231,224,149>> upi [f2,f1,f3] rep [] auth f2 by f1`
|
|
||||||
* File repair/re-sync has finished. All file data on all servers
|
|
||||||
are now in sync.
|
|
||||||
* The UPI/in-sync part of the chain is now `[f2,f1,f3]`, and there
|
|
||||||
are no servers under repair.
|
|
||||||
|
|
||||||
## Let's create some failures
|
|
||||||
|
|
||||||
Here are some suggestions for creating failures.
|
|
||||||
|
|
||||||
* Use the `./dev/devN/bin/machi stop` and `./dev/devN/bin/machi start`
|
|
||||||
commands to stop & start VM #`N`.
|
|
||||||
* Stop a VM abnormally by using `kill`. The OS process name to look
|
|
||||||
for is `beam.smp`.
|
|
||||||
* Suspend and resume a VM, using the `SIGSTOP` and `SIGCONT` signals.
|
|
||||||
* E.g. `kill -STOP 9823` and `kill -CONT 9823`
|
|
||||||
|
|
||||||
The network partition simulator is not (yet) available when running
|
|
||||||
Machi in this mode. Please see the next section for instructions on
|
|
||||||
how to use partition simulator.
|
|
||||||
|
|
||||||
|
|
||||||
<a name="partition-simulator">
|
|
||||||
# Using the network partition simulator and convergence demo test code
|
|
||||||
|
|
||||||
This is the demo code mentioned in the presentation that Scott Lystig
|
|
||||||
Fritchie gave at the
|
|
||||||
[RICON 2015 conference](http://ricon.io).
|
|
||||||
* [slides (PDF format)](http://ricon.io/speakers/slides/Scott_Fritchie_Ricon_2015.pdf)
|
|
||||||
* [video](https://www.youtube.com/watch?v=yR5kHL1bu1Q)
|
|
||||||
|
|
||||||
## A complete example of all input and output
|
|
||||||
|
|
||||||
If you don't have an Erlang/OTP 17 runtime environment available,
|
|
||||||
please see this file for full input and output of a strong consistency
|
|
||||||
length=3 chain test:
|
|
||||||
https://gist.github.com/slfritchie/8352efc88cc18e62c72c
|
|
||||||
This file contains all commands input and all simulator output from a
|
|
||||||
sample run of the simulator.
|
|
||||||
|
|
||||||
To help interpret the output of the test, please skip ahead to the
|
|
||||||
"The test output is very verbose" section.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
If you don't have `git` and/or the Erlang 17 runtime system available
|
|
||||||
on your OS X, FreeBSD, Linux, or Solaris machine, please take a look
|
|
||||||
at the [Prerequisites section](#prerequisites) first. When you have
|
|
||||||
installed the prerequisite software, please return back here.
|
|
||||||
|
|
||||||
## Clone and compile the code
|
|
||||||
|
|
||||||
Please briefly visit the [Clone and compile the code](#clone-compile)
|
|
||||||
section. When finished, please return back here.
|
|
||||||
|
|
||||||
## Run an interactive Erlang CLI shell
|
|
||||||
|
|
||||||
Run the following command at your login shell:
|
|
||||||
|
|
||||||
erl -pz .eunit ebin deps/*/ebin
|
|
||||||
|
|
||||||
If you are using Erlang/OTP version 17, you should see some CLI output
|
|
||||||
that looks like this:
|
|
||||||
|
|
||||||
Erlang/OTP 17 [erts-6.4] [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
|
|
||||||
|
|
||||||
Eshell V6.4 (abort with ^G)
|
|
||||||
1>
|
|
||||||
|
|
||||||
## The test output is very verbose ... what are the important parts?
|
|
||||||
|
|
||||||
The output of the Erlang command
|
|
||||||
`machi_chain_manager1_converge_demo:help()` will display the following
|
|
||||||
guide to the output of the tests.
|
|
||||||
|
|
||||||
A visualization of the convergence behavior of the chain self-management
|
|
||||||
algorithm for Machi.
|
|
||||||
|
|
||||||
1. Set up some server and chain manager pairs.
|
|
||||||
2. Create a number of different network partition scenarios, where
|
|
||||||
(simulated) partitions may be symmetric or asymmetric. Then stop changing
|
|
||||||
the partitions and keep the simulated network stable (and perhaps broken).
|
|
||||||
3. Run a number of iterations of the algorithm in parallel by poking each
|
|
||||||
of the manager processes on a random'ish basis.
|
|
||||||
4. Afterward, fetch the chain transition changes made by each FLU and
|
|
||||||
verify that no transition was unsafe.
|
|
||||||
|
|
||||||
During the iteration periods, the following is a cheatsheet for the output.
|
|
||||||
See the internal source for interpreting the rest of the output.
|
|
||||||
|
|
||||||
'SET partitions = '
|
|
||||||
|
|
||||||
A pair-wise list of actors which cannot send messages. The
|
|
||||||
list is uni-directional. If there are three servers (a,b,c),
|
|
||||||
and if the partitions list is '[{a,b},{b,c}]' then all
|
|
||||||
messages from a->b and b->c will be dropped, but any other
|
|
||||||
sender->recipient messages will be delivered successfully.
|
|
||||||
|
|
||||||
'x uses:'
|
|
||||||
|
|
||||||
The FLU x has made an internal state transition and is using
|
|
||||||
this epoch's projection as operating chain configuration. The
|
|
||||||
rest of the line is a summary of the projection.
|
|
||||||
|
|
||||||
'CONFIRM epoch {N}'
|
|
||||||
|
|
||||||
This message confirms that all of the servers listed in the
|
|
||||||
UPI and repairing lists of the projection at epoch {N} have
|
|
||||||
agreed to use this projection because they all have written
|
|
||||||
this projection to their respective private projection stores.
|
|
||||||
The chain is now usable by/available to all clients.
|
|
||||||
|
|
||||||
'Sweet, private projections are stable'
|
|
||||||
|
|
||||||
This report announces that this iteration of the test cycle
|
|
||||||
has passed successfully. The report that follows briefly
|
|
||||||
summarizes the latest private projection used by each
|
|
||||||
participating server. For example, when in strong consistency
|
|
||||||
mode with 'a' as a witness and 'b' and 'c' as real servers:
|
|
||||||
|
|
||||||
%% Legend:
|
|
||||||
%% server name, epoch ID, UPI list, repairing list, down list, ...
|
|
||||||
%% ... witness list, 'false' (a constant value)
|
|
||||||
|
|
||||||
[{a,{{1116,<<23,143,246,55>>},[a,b],[],[c],[a],false}},
|
|
||||||
{b,{{1116,<<23,143,246,55>>},[a,b],[],[c],[a],false}}]
|
|
||||||
|
|
||||||
Both servers 'a' and 'b' agree on epoch 1116 with epoch ID
|
|
||||||
{1116,<<23,143,246,55>>} where UPI=[a,b], repairing=[],
|
|
||||||
down=[c], and witnesses=[a].
|
|
||||||
|
|
||||||
Server 'c' is not shown because 'c' has wedged itself OOS (out
|
|
||||||
of service) by configuring a chain length of zero.
|
|
||||||
|
|
||||||
If no servers are listed in the report (i.e. only '[]' is
|
|
||||||
displayed), then all servers have wedged themselves OOS, and
|
|
||||||
the chain is unavailable.
|
|
||||||
|
|
||||||
'DoIt,'
|
|
||||||
|
|
||||||
This marks a group of tick events which trigger the manager
|
|
||||||
processes to evaluate their environment and perhaps make a
|
|
||||||
state transition.
|
|
||||||
|
|
||||||
A long chain of 'DoIt,DoIt,DoIt,' means that the chain state has
|
|
||||||
(probably) settled to a stable configuration, which is the goal of the
|
|
||||||
algorithm.
|
|
||||||
|
|
||||||
Press control-c to interrupt the test....".
|
|
||||||
|
|
||||||
## Run a test in eventual consistency mode
|
|
||||||
|
|
||||||
Run the following command at the Erlang CLI prompt:
|
|
||||||
|
|
||||||
machi_chain_manager1_converge_demo:t(3, [{private_write_verbose,true}]).
|
|
||||||
|
|
||||||
The first argument, `3`, is the number of servers to participate in
|
|
||||||
the chain. Please note:
|
|
||||||
|
|
||||||
* Chain lengths as short as 1 or 2 are valid, but the results are a
|
|
||||||
bit boring.
|
|
||||||
* Chain lengths as long as 7 or 9 can be used, but they may
|
|
||||||
suffer from longer periods of churn/instability before all chain
|
|
||||||
managers reach agreement via humming consensus. (It is future work
|
|
||||||
to shorten the worst of the unstable churn latencies.)
|
|
||||||
* In eventual consistency mode, chain lengths may be even numbers,
|
|
||||||
e.g. 2, 4, or 6.
|
|
||||||
* The simulator will choose partition events from the permutations of
|
|
||||||
all 1, 2, and 3 node partition pairs. The total runtime will
|
|
||||||
increase *dramatically* with chain length.
|
|
||||||
* Chain length 2: about 3 partition cases
|
|
||||||
* Chain length 3: about 35 partition cases
|
|
||||||
* Chain length 4: about 230 partition cases
|
|
||||||
* Chain length 5: about 1100 partition cases
|
|
||||||
|
|
||||||
## Run a test in strong consistency mode (with witnesses):
|
|
||||||
|
|
||||||
*NOTE:* Due to a bug in the test code, please do not try to run the
|
|
||||||
convergence test in strong consistency mode and also without the
|
|
||||||
correct minority number of witness servers! If in doubt, please run
|
|
||||||
the commands shown below exactly.
|
|
||||||
|
|
||||||
Run the following command at the Erlang CLI prompt:
|
|
||||||
|
|
||||||
machi_chain_manager1_converge_demo:t(3, [{private_write_verbose,true}, {consistency_mode, cp_mode}, {witnesses, [a]}]).
|
|
||||||
|
|
||||||
The first argument, `3`, is the number of servers to participate in
|
|
||||||
the chain. Chain lengths as long as 7 or 9 can be used, but they may
|
|
||||||
suffer from longer periods of churn/instability before all chain
|
|
||||||
managers reach agreement via humming consensus.
|
|
||||||
|
|
||||||
Due to the bug mentioned above, please use the following
|
|
||||||
commands when running with chain lengths of 5 or 7, respectively.
|
|
||||||
|
|
||||||
machi_chain_manager1_converge_demo:t(5, [{private_write_verbose,true}, {consistency_mode, cp_mode}, {witnesses, [a,b]}]).
|
|
||||||
machi_chain_manager1_converge_demo:t(7, [{private_write_verbose,true}, {consistency_mode, cp_mode}, {witnesses, [a,b,c]}]).
|
|
||||||
|
|
170
doc/overview.edoc
Normal file
170
doc/overview.edoc
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
|
||||||
|
@title Machi: a small village of replicated files
|
||||||
|
|
||||||
|
@doc
|
||||||
|
|
||||||
|
== About This EDoc Documentation ==
|
||||||
|
|
||||||
|
This EDoc-style documentation will concern itself only with Erlang
|
||||||
|
function APIs and function & data types. Higher-level design and
|
||||||
|
commentary will remain outside of the Erlang EDoc system; please see
|
||||||
|
the "Pointers to Other Machi Documentation" section below for more
|
||||||
|
details.
|
||||||
|
|
||||||
|
Readers should beware that this documentation may be out-of-sync with
|
||||||
|
the source code. When in doubt, use the `make edoc' command to
|
||||||
|
regenerate all HTML pages.
|
||||||
|
|
||||||
|
It is the developer's responsibility to re-generate the documentation
|
||||||
|
periodically and commit it to the Git repo.
|
||||||
|
|
||||||
|
== Machi Code Overview ==
|
||||||
|
|
||||||
|
=== Chain Manager ===
|
||||||
|
|
||||||
|
The Chain Manager is responsible for managing the state of Machi's
|
||||||
|
"Chain Replication" state. This role is roughly analogous to the
|
||||||
|
"Riak Core" application inside of Riak, which takes care of
|
||||||
|
coordinating replica placement and replica repair.
|
||||||
|
|
||||||
|
For each primitive data server in the cluster, a Machi FLU, there is a
|
||||||
|
Chain Manager process that manages its FLU's role within the Machi
|
||||||
|
cluster's Chain Replication scheme. Each Chain Manager process
|
||||||
|
executes locally and independently to manage the distributed state of
|
||||||
|
a single Machi Chain Replication chain.
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
|
||||||
|
<li> To contrast with Riak Core ... Riak Core's claimant process is
|
||||||
|
solely responsible for managing certain critical aspects of
|
||||||
|
Riak Core distributed state. Machi's Chain Manager process
|
||||||
|
performs similar tasks as Riak Core's claimant. However, Machi
|
||||||
|
has several active Chain Manager processes, one per FLU server,
|
||||||
|
instead of a single active process like Core's claimant. Each
|
||||||
|
Chain Manager process acts independently; each is constrained
|
||||||
|
so that it will reach consensus via independent computation
|
||||||
|
& action.
|
||||||
|
|
||||||
|
Full discussion of this distributed consensus is outside the
|
||||||
|
scope of this document; see the "Pointers to Other Machi
|
||||||
|
Documentation" section below for more information.
|
||||||
|
</li>
|
||||||
|
<li> Machi differs from a Riak Core application because Machi's
|
||||||
|
replica placement policy is simply, "All Machi servers store
|
||||||
|
replicas of all Machi files".
|
||||||
|
Machi is intended to be a primitive building block for creating larger
|
||||||
|
cluster-of-clusters where files are
|
||||||
|
distributed/fragmented/sharded across a large pool of
|
||||||
|
independent Machi clusters.
|
||||||
|
</li>
|
||||||
|
<li> See
|
||||||
|
[https://www.usenix.org/legacy/events/osdi04/tech/renesse.html]
|
||||||
|
for a copy of the paper, "Chain Replication for Supporting High
|
||||||
|
Throughput and Availability" by Robbert van Renesse and Fred
|
||||||
|
B. Schneider.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
=== FLU ===
|
||||||
|
|
||||||
|
The FLU is the basic storage server for Machi.
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> The name FLU is taken from "flash storage unit" from the paper
|
||||||
|
"CORFU: A Shared Log Design for Flash Clusters" by
|
||||||
|
Balakrishnan, Malkhi, Prabhakaran, and Wobber. See
|
||||||
|
[https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/balakrishnan]
|
||||||
|
</li>
|
||||||
|
<li> In CORFU, the sequencer step is a prerequisite step that is
|
||||||
|
performed by a separate component, the Sequencer.
|
||||||
|
In Machi, the `append_chunk()' protocol message has
|
||||||
|
an implicit "sequencer" operation applied by the "head" of the
|
||||||
|
Machi Chain Replication chain. If a client wishes to write
|
||||||
|
data that has already been assigned a sequencer position, then
|
||||||
|
the `write_chunk()' API function is used.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
For each FLU, there are three independent tasks that are implemented
|
||||||
|
using three different Erlang processes:
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> A FLU server, implemented primarily by `machi_flu.erl'.
|
||||||
|
</li>
|
||||||
|
<li> A projection store server, implemented primarily by
|
||||||
|
`machi_projection_store.erl'.
|
||||||
|
</li>
|
||||||
|
<li> A chain state manager server, implemented primarily by
|
||||||
|
`machi_chain_manager1.erl'.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
From the perspective of failure detection, it is very convenient that
|
||||||
|
all three FLU-related services (file server, sequencer server, and
|
||||||
|
projection server) are accessed using the same single TCP port.
|
||||||
|
|
||||||
|
=== Projection (data structure) ===
|
||||||
|
|
||||||
|
The projection is a data structure that specifies the current state
|
||||||
|
of the Machi cluster: all FLUs, which FLUS are considered
|
||||||
|
up/running or down/crashed/stopped, which FLUs are actively
|
||||||
|
participants in the Chain Replication protocol, and which FLUs are
|
||||||
|
under "repair" (i.e., having their data resyncronized when
|
||||||
|
newly-added to a cluster or when restarting after a crash).
|
||||||
|
|
||||||
|
=== Projection Store (server) ===
|
||||||
|
|
||||||
|
The projection store is a storage service that is implemented by an
|
||||||
|
Erlang/OTP `gen_server' process that is associated with each
|
||||||
|
FLU. Conceptually, the projection store is an array of
|
||||||
|
write-once registers. For each projection store register, the
|
||||||
|
key is a 2-tuple of an epoch number (`non_neg_integer()' type)
|
||||||
|
and a projection type (`public' or `private' type); the value is
|
||||||
|
a projection data structure (`projection_v1()' type).
|
||||||
|
|
||||||
|
=== Client and Proxy Client ===
|
||||||
|
|
||||||
|
Machi is intentionally avoiding using distributed Erlang for Machi's
|
||||||
|
communication. This design decision makes Erlang-side code more
|
||||||
|
difficult & complex but allows us the freedom of implementing
|
||||||
|
parts of Machi in other languages without major
|
||||||
|
protocol&API&glue code changes later in the product's
|
||||||
|
lifetime.
|
||||||
|
|
||||||
|
There are two layers of interface for Machi clients.
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> The `machi_flu1_client' module implements an API that uses a
|
||||||
|
TCP socket directly.
|
||||||
|
</li>
|
||||||
|
<li> The `machi_proxy_flu1_client' module implements an API that
|
||||||
|
uses a local, long-lived `gen_server' process as a proxy for
|
||||||
|
the remote, perhaps disconnected-or-crashed Machi FLU server.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
The types for both modules ought to be the same. However, due to
|
||||||
|
rapid code churn, some differences might exist. Any major difference
|
||||||
|
is (almost by definition) a bug: please open a GitHub issue to request
|
||||||
|
a correction.
|
||||||
|
|
||||||
|
== TODO notes ==
|
||||||
|
|
||||||
|
Any use of the string "TODO" in upper/lower/mixed case, anywhere in
|
||||||
|
the code, is a reminder signal of unfinished work.
|
||||||
|
|
||||||
|
== Pointers to Other Machi Documentation ==
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> If you are viewing this document locally, please look in the
|
||||||
|
`../doc/' directory,
|
||||||
|
</li>
|
||||||
|
<li> If you are viewing this document via the Web, please find the
|
||||||
|
documentation via this link:
|
||||||
|
[http://github.com/basho/machi/tree/master/doc/]
|
||||||
|
Please be aware that this link points to the `master' branch
|
||||||
|
of the Machi source repository and therefore may be
|
||||||
|
out-of-sync with non-`master' branch code.
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
Binary file not shown.
Before Width: | Height: | Size: 115 KiB |
File diff suppressed because it is too large
Load diff
|
@ -1489,7 +1489,7 @@ In Usenix ATC 2009.
|
||||||
{\tt https://www.usenix.org/legacy/event/usenix09/ tech/full\_papers/terrace/terrace.pdf}
|
{\tt https://www.usenix.org/legacy/event/usenix09/ tech/full\_papers/terrace/terrace.pdf}
|
||||||
|
|
||||||
\bibitem{chain-replication}
|
\bibitem{chain-replication}
|
||||||
van Renesse, Robbert and Schneider, Fred.
|
van Renesse, Robbert et al.
|
||||||
Chain Replication for Supporting High Throughput and Availability.
|
Chain Replication for Supporting High Throughput and Availability.
|
||||||
Proceedings of the 6th Conference on Symposium on Operating Systems
|
Proceedings of the 6th Conference on Symposium on Operating Systems
|
||||||
Design \& Implementation (OSDI'04) - Volume 6, 2004.
|
Design \& Implementation (OSDI'04) - Volume 6, 2004.
|
||||||
|
|
12
filter-dialyzer-dep-warnings
Normal file
12
filter-dialyzer-dep-warnings
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
####################### patterns for general errors in dep modules:
|
||||||
|
^protobuffs\.erl:
|
||||||
|
^protobuffs_[a-z_]*\.erl:
|
||||||
|
^leexinc\.hrl:[0-9][0-9]*:
|
||||||
|
^machi_chain_manager1.erl:[0-9][0-9]*: Guard test RetrospectiveP::'false' =:= 'true' can never succeed
|
||||||
|
^machi_pb\.erl:[0-9][0-9]*:
|
||||||
|
^pokemon_pb\.erl:[0-9][0-9]*:
|
||||||
|
####################### patterns for unknown functions:
|
||||||
|
^ basho_bench_config:get/2
|
||||||
|
^ erl_prettypr:format/1
|
||||||
|
^ erl_syntax:form_list/1
|
||||||
|
^ machi_partition_simulator:get/1
|
|
@ -18,9 +18,10 @@
|
||||||
%%
|
%%
|
||||||
%% -------------------------------------------------------------------
|
%% -------------------------------------------------------------------
|
||||||
|
|
||||||
%% @doc Now 4GiBytes, could be up to 64bit due to PB message limit of
|
-define(MAX_FILE_SIZE, 256*1024*1024). % 256 MBytes
|
||||||
%% chunk size
|
-define(MAX_CHUNK_SIZE, ((1 bsl 32) - 1)).
|
||||||
-define(DEFAULT_MAX_FILE_SIZE, ((1 bsl 32) - 1)).
|
%% -define(DATA_DIR, "/Volumes/SAM1/seq-tests/data").
|
||||||
|
-define(DATA_DIR, "./data").
|
||||||
-define(MINIMUM_OFFSET, 1024).
|
-define(MINIMUM_OFFSET, 1024).
|
||||||
|
|
||||||
%% 0th draft of checksum typing with 1st byte.
|
%% 0th draft of checksum typing with 1st byte.
|
||||||
|
@ -29,35 +30,6 @@
|
||||||
-define(CSUM_TAG_SERVER_SHA, 2). % Server-genereated SHA1
|
-define(CSUM_TAG_SERVER_SHA, 2). % Server-genereated SHA1
|
||||||
-define(CSUM_TAG_SERVER_REGEN_SHA, 3). % Server-regenerated SHA1
|
-define(CSUM_TAG_SERVER_REGEN_SHA, 3). % Server-regenerated SHA1
|
||||||
|
|
||||||
-define(CSUM_TAG_NONE_ATOM, none).
|
|
||||||
-define(CSUM_TAG_CLIENT_SHA_ATOM, client_sha).
|
|
||||||
-define(CSUM_TAG_SERVER_SHA_ATOM, server_sha).
|
|
||||||
-define(CSUM_TAG_SERVER_REGEN_SHA_ATOM, server_regen_sha).
|
|
||||||
|
|
||||||
%% Protocol Buffers goop
|
%% Protocol Buffers goop
|
||||||
-define(PB_MAX_MSG_SIZE, (33*1024*1024)).
|
-define(PB_MAX_MSG_SIZE, (33*1024*1024)).
|
||||||
-define(PB_PACKET_OPTS, [{packet, 4}, {packet_size, ?PB_MAX_MSG_SIZE}]).
|
-define(PB_PACKET_OPTS, [{packet, 4}, {packet_size, ?PB_MAX_MSG_SIZE}]).
|
||||||
|
|
||||||
%% TODO: it's used in flu_sup and elsewhere, change this to suitable name
|
|
||||||
-define(TEST_ETS_TABLE, test_ets_table).
|
|
||||||
|
|
||||||
-define(DEFAULT_COC_NAMESPACE, "").
|
|
||||||
-define(DEFAULT_COC_LOCATOR, 0).
|
|
||||||
|
|
||||||
-record(ns_info, {
|
|
||||||
version = 0 :: machi_dt:namespace_version(),
|
|
||||||
name = <<>> :: machi_dt:namespace(),
|
|
||||||
locator = 0 :: machi_dt:locator()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-record(append_opts, {
|
|
||||||
chunk_extra = 0 :: machi_dt:chunk_size(),
|
|
||||||
preferred_file_name :: 'undefined' | machi_dt:file_name_s(),
|
|
||||||
flag_fail_preferred = false :: boolean()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-record(read_opts, {
|
|
||||||
no_checksum = false :: boolean(),
|
|
||||||
no_chunk = false :: boolean(),
|
|
||||||
needs_trimmed = false :: boolean()
|
|
||||||
}).
|
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
%% machi merkle tree records
|
|
||||||
|
|
||||||
-record(naive, {
|
|
||||||
chunk_size = 1048576 :: pos_integer(), %% default 1 MB
|
|
||||||
recalc = true :: boolean(),
|
|
||||||
root :: 'undefined' | binary(),
|
|
||||||
lvl1 = [] :: [ binary() ],
|
|
||||||
lvl2 = [] :: [ binary() ],
|
|
||||||
lvl3 = [] :: [ binary() ],
|
|
||||||
leaves = [] :: [ { Offset :: pos_integer(),
|
|
||||||
Size :: pos_integer(),
|
|
||||||
Csum :: binary()} ]
|
|
||||||
}).
|
|
||||||
|
|
||||||
-record(mt, {
|
|
||||||
filename :: string(),
|
|
||||||
tree :: #naive{},
|
|
||||||
backend = 'naive' :: 'naive'
|
|
||||||
}).
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
%% -------------------------------------------------------------------
|
%% -------------------------------------------------------------------
|
||||||
%%
|
%%
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
%% Copyright (c) 2007-2014 Basho Technologies, Inc. All Rights Reserved.
|
||||||
%%
|
%%
|
||||||
%% This file is provided to you under the Apache License,
|
%% This file is provided to you under the Apache License,
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
%% Version 2.0 (the "License"); you may not use this file
|
||||||
|
@ -22,11 +22,10 @@
|
||||||
-define(MACHI_PROJECTION_HRL, true).
|
-define(MACHI_PROJECTION_HRL, true).
|
||||||
|
|
||||||
-type pv1_consistency_mode() :: 'ap_mode' | 'cp_mode'.
|
-type pv1_consistency_mode() :: 'ap_mode' | 'cp_mode'.
|
||||||
-type pv1_chain_name():: atom().
|
|
||||||
-type pv1_csum() :: binary().
|
-type pv1_csum() :: binary().
|
||||||
-type pv1_epoch() :: {pv1_epoch_n(), pv1_csum()}.
|
-type pv1_epoch() :: {pv1_epoch_n(), pv1_csum()}.
|
||||||
-type pv1_epoch_n() :: non_neg_integer().
|
-type pv1_epoch_n() :: non_neg_integer().
|
||||||
-type pv1_server() :: atom().
|
-type pv1_server() :: atom() | binary().
|
||||||
-type pv1_timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
|
-type pv1_timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
|
||||||
|
|
||||||
-record(p_srvr, {
|
-record(p_srvr, {
|
||||||
|
@ -40,8 +39,7 @@
|
||||||
-record(flap_i, {
|
-record(flap_i, {
|
||||||
flap_count :: {term(), term()},
|
flap_count :: {term(), term()},
|
||||||
all_hosed :: list(),
|
all_hosed :: list(),
|
||||||
all_flap_counts :: list(),
|
all_flap_counts :: list()
|
||||||
my_unique_prop_count :: non_neg_integer()
|
|
||||||
}).
|
}).
|
||||||
|
|
||||||
-type p_srvr() :: #p_srvr{}.
|
-type p_srvr() :: #p_srvr{}.
|
||||||
|
@ -49,14 +47,10 @@
|
||||||
|
|
||||||
-define(DUMMY_PV1_EPOCH, {0,<<0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0>>}).
|
-define(DUMMY_PV1_EPOCH, {0,<<0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0>>}).
|
||||||
|
|
||||||
%% Kludge for spam gossip. TODO: replace me
|
|
||||||
-define(SPAM_PROJ_EPOCH, ((1 bsl 32) - 7)).
|
|
||||||
|
|
||||||
-record(projection_v1, {
|
-record(projection_v1, {
|
||||||
epoch_number :: pv1_epoch_n() | ?SPAM_PROJ_EPOCH,
|
epoch_number :: pv1_epoch_n(),
|
||||||
epoch_csum :: pv1_csum(),
|
epoch_csum :: pv1_csum(),
|
||||||
author_server :: pv1_server(),
|
author_server :: pv1_server(),
|
||||||
chain_name = ch_not_def_yet :: pv1_chain_name(),
|
|
||||||
all_members :: [pv1_server()],
|
all_members :: [pv1_server()],
|
||||||
witnesses = [] :: [pv1_server()],
|
witnesses = [] :: [pv1_server()],
|
||||||
creation_time :: pv1_timestamp(),
|
creation_time :: pv1_timestamp(),
|
||||||
|
@ -64,6 +58,8 @@
|
||||||
upi :: [pv1_server()],
|
upi :: [pv1_server()],
|
||||||
repairing :: [pv1_server()],
|
repairing :: [pv1_server()],
|
||||||
down :: [pv1_server()],
|
down :: [pv1_server()],
|
||||||
|
flap :: 'undefined' | #flap_i{}, % flapping information
|
||||||
|
inner :: 'undefined' | #projection_v1{},
|
||||||
dbg :: list(), %proplist(), is checksummed
|
dbg :: list(), %proplist(), is checksummed
|
||||||
dbg2 :: list(), %proplist(), is not checksummed
|
dbg2 :: list(), %proplist(), is not checksummed
|
||||||
members_dict :: p_srvr_dict()
|
members_dict :: p_srvr_dict()
|
||||||
|
@ -77,16 +73,4 @@
|
||||||
%% create a consistent projection ranking score.
|
%% create a consistent projection ranking score.
|
||||||
-define(MAX_CHAIN_LENGTH, 64).
|
-define(MAX_CHAIN_LENGTH, 64).
|
||||||
|
|
||||||
-record(chain_def_v1, {
|
|
||||||
name :: atom(), % chain name
|
|
||||||
mode :: pv1_consistency_mode(),
|
|
||||||
full = [] :: [p_srvr()],
|
|
||||||
witnesses = [] :: [p_srvr()],
|
|
||||||
old_full = [] :: [pv1_server()], % guard against some races
|
|
||||||
old_witnesses=[] :: [pv1_server()], % guard against some races
|
|
||||||
local_run = [] :: [pv1_server()], % must be tailored to each machine!
|
|
||||||
local_stop = [] :: [pv1_server()], % must be tailored to each machine!
|
|
||||||
props = [] :: list() % proplist for other related info
|
|
||||||
}).
|
|
||||||
|
|
||||||
-endif. % !MACHI_PROJECTION_HRL
|
-endif. % !MACHI_PROJECTION_HRL
|
||||||
|
|
|
@ -1,56 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
echo "Step: Verify that the required entries in /etc/hosts are present"
|
|
||||||
for i in 1 2 3; do
|
|
||||||
grep machi$i /etc/hosts | egrep -s '^127.0.0.1' > /dev/null 2>&1
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo ""
|
|
||||||
echo "'grep -s machi$i' failed. Aborting, sorry."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
ping -c 1 machi$i > /dev/null 2>&1
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo ""
|
|
||||||
echo "Ping attempt on host machi$i failed. Aborting."
|
|
||||||
echo ""
|
|
||||||
ping -c 1 machi$i
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Step: add a verbose logging option to app.config"
|
|
||||||
for i in 1 2 3; do
|
|
||||||
ed ./dev/dev$i/etc/app.config <<EOF > /dev/null 2>&1
|
|
||||||
/verbose_confirm
|
|
||||||
a
|
|
||||||
{chain_manager_opts, [{private_write_verbose_confirm,true}]},
|
|
||||||
{stability_time, 1},
|
|
||||||
.
|
|
||||||
w
|
|
||||||
q
|
|
||||||
EOF
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Step: start three three Machi application instances"
|
|
||||||
for i in 1 2 3; do
|
|
||||||
./dev/dev$i/bin/machi start
|
|
||||||
./dev/dev$i/bin/machi ping
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Sorry, a 'ping' check for instance dev$i failed. Aborting."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Step: configure one chain to start a Humming Consensus group with three members"
|
|
||||||
|
|
||||||
# Note: $CWD of each Machi proc is two levels below the source code root dir.
|
|
||||||
LIFECYCLE000=../../priv/quick-admin-examples/demo-000
|
|
||||||
for i in 3 2 1; do
|
|
||||||
./dev/dev$i/bin/machi-admin quick-admin-apply $LIFECYCLE000 machi$i
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Sorry, 'machi-admin quick-admin-apply failed' on machi$i. Aborting."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
exit 0
|
|
93
priv/humming-consensus-demo.vagrant/Vagrantfile
vendored
93
priv/humming-consensus-demo.vagrant/Vagrantfile
vendored
|
@ -1,93 +0,0 @@
|
||||||
# -*- mode: ruby -*-
|
|
||||||
# vi: set ft=ruby :
|
|
||||||
|
|
||||||
# All Vagrant configuration is done below. The "2" in Vagrant.configure
|
|
||||||
# configures the configuration version (we support older styles for
|
|
||||||
# backwards compatibility). Please don't change it unless you know what
|
|
||||||
# you're doing.
|
|
||||||
Vagrant.configure(2) do |config|
|
|
||||||
# The most common configuration options are documented and commented below.
|
|
||||||
# For a complete reference, please see the online documentation at
|
|
||||||
# https://docs.vagrantup.com.
|
|
||||||
|
|
||||||
# Every Vagrant development environment requires a box. You can search for
|
|
||||||
# boxes at https://atlas.hashicorp.com/search.
|
|
||||||
# If this Vagrant box has not been downloaded before (e.g. using "vagrant box add"),
|
|
||||||
# then Vagrant will automatically download the VM image from HashiCorp.
|
|
||||||
config.vm.box = "hashicorp/precise64"
|
|
||||||
# If using a FreeBSD box, Bash may not be installed.
|
|
||||||
# Use the config.ssh.shell setting to specify an alternate shell.
|
|
||||||
# Note, however, that any code in the 'config.vm.provision' section
|
|
||||||
# would then have to use this shell's syntax!
|
|
||||||
# config.ssh.shell = "/bin/csh -l"
|
|
||||||
|
|
||||||
# Disable automatic box update checking. If you disable this, then
|
|
||||||
# boxes will only be checked for updates when the user runs
|
|
||||||
# `vagrant box outdated`. This is not recommended.
|
|
||||||
# config.vm.box_check_update = false
|
|
||||||
|
|
||||||
# Create a forwarded port mapping which allows access to a specific port
|
|
||||||
# within the machine from a port on the host machine. In the example below,
|
|
||||||
# accessing "localhost:8080" will access port 80 on the guest machine.
|
|
||||||
# config.vm.network "forwarded_port", guest: 80, host: 8080
|
|
||||||
|
|
||||||
# Create a private network, which allows host-only access to the machine
|
|
||||||
# using a specific IP.
|
|
||||||
# config.vm.network "private_network", ip: "192.168.33.10"
|
|
||||||
|
|
||||||
# Create a public network, which generally matched to bridged network.
|
|
||||||
# Bridged networks make the machine appear as another physical device on
|
|
||||||
# your network.
|
|
||||||
# config.vm.network "public_network"
|
|
||||||
|
|
||||||
# Share an additional folder to the guest VM. The first argument is
|
|
||||||
# the path on the host to the actual folder. The second argument is
|
|
||||||
# the path on the guest to mount the folder. And the optional third
|
|
||||||
# argument is a set of non-required options.
|
|
||||||
# config.vm.synced_folder "../data", "/vagrant_data"
|
|
||||||
|
|
||||||
# Provider-specific configuration so you can fine-tune various
|
|
||||||
# backing providers for Vagrant. These expose provider-specific options.
|
|
||||||
# Example for VirtualBox:
|
|
||||||
#
|
|
||||||
config.vm.provider "virtualbox" do |vb|
|
|
||||||
# Display the VirtualBox GUI when booting the machine
|
|
||||||
# vb.gui = true
|
|
||||||
|
|
||||||
# Customize the amount of memory on the VM:
|
|
||||||
vb.memory = "512"
|
|
||||||
end
|
|
||||||
#
|
|
||||||
# View the documentation for the provider you are using for more
|
|
||||||
# information on available options.
|
|
||||||
|
|
||||||
# Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
|
|
||||||
# such as FTP and Heroku are also available. See the documentation at
|
|
||||||
# https://docs.vagrantup.com/v2/push/atlas.html for more information.
|
|
||||||
# config.push.define "atlas" do |push|
|
|
||||||
# push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
|
|
||||||
# end
|
|
||||||
|
|
||||||
# Enable provisioning with a shell script. Additional provisioners such as
|
|
||||||
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
|
|
||||||
# documentation for more information about their specific syntax and use.
|
|
||||||
config.vm.provision "shell", inline: <<-SHELL
|
|
||||||
# Install prerequsites
|
|
||||||
# Support here for FreeBSD is experimental
|
|
||||||
apt-get update ; sudo apt-get install -y git sudo rsync ; # Ubuntu Linux
|
|
||||||
env ASSUME_ALWAYS_YES=yes pkg install -f git sudo rsync ; # FreeBSD 10
|
|
||||||
|
|
||||||
# Install dependent packages, using slf-configurator
|
|
||||||
git clone https://github.com/slfritchie/slf-configurator.git
|
|
||||||
chown -R vagrant ./slf-configurator
|
|
||||||
(cd slf-configurator ; sudo sh -x ./ALL.sh)
|
|
||||||
echo 'export PATH=${PATH}:/usr/local/erlang/17.5/bin' >> ~vagrant/.bashrc
|
|
||||||
export PATH=${PATH}:/usr/local/erlang/17.5/bin
|
|
||||||
## echo 'set path = ( $path /usr/local/erlang/17.5/bin )' >> ~vagrant/.cshrc
|
|
||||||
## setenv PATH /usr/local/erlang/17.5/bin:$PATH
|
|
||||||
|
|
||||||
git clone https://github.com/basho/machi.git
|
|
||||||
(cd machi ; git checkout master ; make && make test )
|
|
||||||
chown -R vagrant ./machi
|
|
||||||
SHELL
|
|
||||||
end
|
|
|
@ -36,7 +36,7 @@ while (<I>) {
|
||||||
$indent = " " x ($count * 4);
|
$indent = " " x ($count * 4);
|
||||||
s/^#*\s*[0-9. ]*//;
|
s/^#*\s*[0-9. ]*//;
|
||||||
$anchor = "n$label";
|
$anchor = "n$label";
|
||||||
printf T1 "%s+ [%s. %s](#%s)\n", $indent, $label, $_, $anchor;
|
printf T1 "%s+ [%s %s](#%s)\n", $indent, $label, $_, $anchor;
|
||||||
printf T2 "<a name=\"%s\">\n", $anchor;
|
printf T2 "<a name=\"%s\">\n", $anchor;
|
||||||
$line =~ s/(#+)\s*[0-9. ]*/$1 $label. /;
|
$line =~ s/(#+)\s*[0-9. ]*/$1 $label. /;
|
||||||
print T2 $line;
|
print T2 $line;
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
{host, "localhost", []}.
|
|
|
@ -1,4 +0,0 @@
|
||||||
{flu,f1,"localhost",20401,[]}.
|
|
||||||
{flu,f2,"localhost",20402,[]}.
|
|
||||||
{flu,f3,"localhost",20403,[]}.
|
|
||||||
{chain,c1,[f1,f2,f3],[]}.
|
|
|
@ -1,4 +0,0 @@
|
||||||
{flu,f4,"localhost",20404,[]}.
|
|
||||||
{flu,f5,"localhost",20405,[]}.
|
|
||||||
{flu,f6,"localhost",20406,[]}.
|
|
||||||
{chain,c2,[f4,f5,f6],[]}.
|
|
|
@ -1,7 +0,0 @@
|
||||||
{host, "machi1", []}.
|
|
||||||
{host, "machi2", []}.
|
|
||||||
{host, "machi3", []}.
|
|
||||||
{flu,f1,"machi1",20401,[]}.
|
|
||||||
{flu,f2,"machi2",20402,[]}.
|
|
||||||
{flu,f3,"machi3",20403,[]}.
|
|
||||||
{chain,c1,[f1,f2,f3],[]}.
|
|
|
@ -1,10 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
if [ "${TRAVIS_PULL_REQUEST}" = "false" ]; then
|
|
||||||
echo '$TRAVIS_PULL_REQUEST is false, skipping tests'
|
|
||||||
exit 0
|
|
||||||
else
|
|
||||||
echo '$TRAVIS_PULL_REQUEST is not false ($TRAVIS_PULL_REQUEST), running tests'
|
|
||||||
make test
|
|
||||||
make dialyzer
|
|
||||||
fi
|
|
16
rebar.config
16
rebar.config
|
@ -1,21 +1,11 @@
|
||||||
{require_otp_vsn, "17|18"}.
|
{require_otp_vsn, "17"}.
|
||||||
|
|
||||||
%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
|
%%% {erl_opts, [warnings_as_errors, {parse_transform, lager_transform}, debug_info]}.
|
||||||
{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
|
{erl_opts, [{parse_transform, lager_transform}, debug_info]}.
|
||||||
{edoc_opts, [{dir, "./edoc"}]}.
|
{edoc_opts, [{dir, "./edoc"}]}.
|
||||||
|
|
||||||
{deps, [
|
{deps, [
|
||||||
{cuttlefish, ".*", {git, "git://github.com/basho/cuttlefish.git", {branch, "develop"}}},
|
{lager, ".*", {git, "git://github.com/basho/lager.git", {tag, "2.0.1"}}},
|
||||||
{sext, ".*", {git, "git://github.com/basho/sext.git", {branch, "master"}}},
|
{protobuffs, "0.8.*", {git, "git://github.com/basho/erlang_protobuffs.git", {tag, "0.8.1p4"}}}
|
||||||
{eleveldb, ".*", {git, "git://github.com/basho/eleveldb.git", {branch, "develop"}}},
|
|
||||||
{lager, ".*", {git, "git://github.com/basho/lager.git", {tag, "2.2.0"}}},
|
|
||||||
{protobuffs, "0.8.*", {git, "git://github.com/basho/erlang_protobuffs.git", {tag, "0.8.1p4"}}},
|
|
||||||
{riak_dt, ".*", {git, "git://github.com/basho/riak_dt.git", {branch, "develop"}}},
|
|
||||||
{ranch, ".*", {git, "git://github.com/ninenines/ranch.git", {branch, "master"}}},
|
|
||||||
{node_package, ".*", {git, "git://github.com/basho/node_package.git", {branch, "develop"}}},
|
|
||||||
{eper, ".*", {git, "git://github.com/basho/eper.git", {tag, "0.92-basho1"}}},
|
|
||||||
{cluster_info, ".*", {git, "git://github.com/basho/cluster_info", {branch, "develop"}}}
|
|
||||||
]}.
|
]}.
|
||||||
|
|
||||||
{sub_dirs, ["rel", "apps/machi"]}.
|
|
||||||
{lib_dirs, ["apps/machi"]}.
|
|
||||||
|
|
|
@ -1,35 +0,0 @@
|
||||||
[
|
|
||||||
{machi, [
|
|
||||||
%% Data directory for all FLUs.
|
|
||||||
{flu_data_dir, "{{platform_data_dir}}/flu"},
|
|
||||||
|
|
||||||
%% FLU config directory
|
|
||||||
{flu_config_dir, "{{platform_etc_dir}}/flu-config"},
|
|
||||||
|
|
||||||
%% Chain config directory
|
|
||||||
{chain_config_dir, "{{platform_etc_dir}}/chain-config"},
|
|
||||||
|
|
||||||
%% FLUs to start at app start.
|
|
||||||
%% This task has moved to machi_flu_sup and machi_lifecycle_mgr.
|
|
||||||
|
|
||||||
%% Number of metadata manager processes to run per FLU.
|
|
||||||
%% Default = 10
|
|
||||||
%% {metadata_manager_count, 2},
|
|
||||||
|
|
||||||
%% Default options for chain manager processes.
|
|
||||||
%% {chain_manager_opts, [{private_write_verbose,true},
|
|
||||||
%% {private_write_verbose_confirm,true}]},
|
|
||||||
|
|
||||||
%% Platform vars (mirror of reltool packaging)
|
|
||||||
{platform_data_dir, "{{platform_data_dir}}"},
|
|
||||||
{platform_etc_dir, "{{platform_etc_dir}}"},
|
|
||||||
|
|
||||||
%% Do not delete, do not put Machi config items after this line.
|
|
||||||
{final_comma_stopper, do_not_delete}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{lager, [
|
|
||||||
{error_logger_hwm, 5000} % lager's default of 50/sec is too low
|
|
||||||
]
|
|
||||||
}
|
|
||||||
].
|
|
|
@ -1,84 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
# -*- tab-width:4;indent-tabs-mode:nil -*-
|
|
||||||
# ex: ts=4 sw=4 et
|
|
||||||
|
|
||||||
# Pull environment for this install
|
|
||||||
. "{{runner_base_dir}}/lib/env.sh"
|
|
||||||
|
|
||||||
# Make sure the user running this script is the owner and/or su to that user
|
|
||||||
check_user "$@"
|
|
||||||
ES=$?
|
|
||||||
if [ "$ES" -ne 0 ]; then
|
|
||||||
exit $ES
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Keep track of where script was invoked
|
|
||||||
ORIGINAL_DIR=$(pwd)
|
|
||||||
|
|
||||||
# Make sure CWD is set to runner run dir
|
|
||||||
cd $RUNNER_BASE_DIR
|
|
||||||
|
|
||||||
# Identify the script name
|
|
||||||
SCRIPT=`basename $0`
|
|
||||||
|
|
||||||
usage() {
|
|
||||||
echo "Usage: $SCRIPT { quick-admin-check | quick-admin-apply | "
|
|
||||||
echo " top }"
|
|
||||||
}
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
quick-admin-check)
|
|
||||||
# Make sure the local node IS running
|
|
||||||
node_up_check
|
|
||||||
|
|
||||||
shift
|
|
||||||
|
|
||||||
NODE_NAME=${NAME_ARG#* } # target machi server node name
|
|
||||||
IN_FILE="$1"
|
|
||||||
|
|
||||||
$ERTS_PATH/erl -noshell -noinput $NAME_PARAM machi_test$NAME_HOST $COOKIE_ARG \
|
|
||||||
-remsh $NODE_NAME \
|
|
||||||
-eval "Me = self(), spawn('"$NODE_NAME"', fun() -> X = (catch(machi_lifecycle_mgr:quick_admin_sanity_check(\"$IN_FILE\"))), Me ! {res, X} end), XX = receive {res, Res} -> Res after 10*1000 -> timeout end, io:format(user, \"Result: ~p\n\", [XX]), case XX of \
|
|
||||||
ok -> init:stop(); \
|
|
||||||
_ -> init:stop(1) \
|
|
||||||
end."
|
|
||||||
|
|
||||||
;;
|
|
||||||
quick-admin-apply)
|
|
||||||
# Make sure the local node IS running
|
|
||||||
node_up_check
|
|
||||||
|
|
||||||
shift
|
|
||||||
|
|
||||||
NODE_NAME=${NAME_ARG#* } # target machi server node name
|
|
||||||
IN_FILE="$1"
|
|
||||||
RELATIVE_HOST="$2"
|
|
||||||
|
|
||||||
$ERTS_PATH/erl -noshell -noinput $NAME_PARAM machi_test$NAME_HOST $COOKIE_ARG \
|
|
||||||
-remsh $NODE_NAME \
|
|
||||||
-eval "Me = self(), spawn('"$NODE_NAME"', fun() -> X = (catch(machi_lifecycle_mgr:quick_admin_apply(\"$IN_FILE\", \"$RELATIVE_HOST\"))), Me ! {res, X} end), XX = receive {res, Res} -> Res after 10*1000 -> timeout end, io:format(user, \"Result: ~p\n\", [XX]), case XX of \
|
|
||||||
ok -> init:stop(); \
|
|
||||||
_ -> init:stop(1) \
|
|
||||||
end."
|
|
||||||
|
|
||||||
;;
|
|
||||||
top)
|
|
||||||
# Make sure the local node IS running
|
|
||||||
node_up_check
|
|
||||||
|
|
||||||
shift
|
|
||||||
|
|
||||||
MYPID=$$
|
|
||||||
NODE_NAME=${NAME_ARG#* }
|
|
||||||
$ERTS_PATH/erl -noshell -noinput \
|
|
||||||
-pa $RUNNER_LIB_DIR/basho-patches \
|
|
||||||
-hidden $NAME_PARAM machi_etop$MYPID$NAME_HOST $COOKIE_ARG \
|
|
||||||
-s etop -s erlang halt -output text \
|
|
||||||
-node $NODE_NAME \
|
|
||||||
$* -tracing off
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
|
@ -1,220 +0,0 @@
|
||||||
%%-*- mode: erlang -*-
|
|
||||||
|
|
||||||
%% @doc Where to emit the default log messages (typically at 'info'
|
|
||||||
%% severity):
|
|
||||||
%% off: disabled
|
|
||||||
%% file: the file specified by log.console.file
|
|
||||||
%% console: to standard output (seen when using `machi attach-direct`)
|
|
||||||
%% both: log.console.file and standard out.
|
|
||||||
{mapping, "log.console", "lager.handlers", [
|
|
||||||
{default, {{console_log_default}} },
|
|
||||||
{datatype, {enum, [off, file, console, both]}}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc The severity level of the console log, default is 'info'.
|
|
||||||
{mapping, "log.console.level", "lager.handlers", [
|
|
||||||
{default, info},
|
|
||||||
{datatype, {enum, [debug, info, notice, warning, error, critical, alert, emergency, none]}}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc When 'log.console' is set to 'file' or 'both', the file where
|
|
||||||
%% console messages will be logged.
|
|
||||||
{mapping, "log.console.file", "lager.handlers", [
|
|
||||||
{default, "$(platform_log_dir)/console.log"},
|
|
||||||
{datatype, file}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc The file where error messages will be logged.
|
|
||||||
{mapping, "log.error.file", "lager.handlers", [
|
|
||||||
{default, "$(platform_log_dir)/error.log"},
|
|
||||||
{datatype, file}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc When set to 'on', enables log output to syslog.
|
|
||||||
{mapping, "log.syslog", "lager.handlers", [
|
|
||||||
{default, off},
|
|
||||||
{datatype, flag}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc When set to 'on', enables log output to syslog.
|
|
||||||
{mapping, "log.syslog.ident", "lager.handlers", [
|
|
||||||
{default, "machi"},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc Syslog facility to log entries from Riak.
|
|
||||||
{mapping, "log.syslog.facility", "lager.handlers", [
|
|
||||||
{default, daemon},
|
|
||||||
{datatype, {enum,[kern, user, mail, daemon, auth, syslog,
|
|
||||||
lpr, news, uucp, clock, authpriv, ftp,
|
|
||||||
cron, local0, local1, local2, local3,
|
|
||||||
local4, local5, local6, local7]}},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc The severity level at which to log entries to syslog, default is 'info'.
|
|
||||||
{mapping, "log.syslog.level", "lager.handlers", [
|
|
||||||
{default, info},
|
|
||||||
{datatype, {enum, [debug, info, notice, warning, error, critical, alert, emergency, none]}},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
{translation,
|
|
||||||
"lager.handlers",
|
|
||||||
fun(Conf) ->
|
|
||||||
SyslogHandler = case cuttlefish:conf_get("log.syslog", Conf) of
|
|
||||||
true ->
|
|
||||||
Ident = cuttlefish:conf_get("log.syslog.ident", Conf),
|
|
||||||
Facility = cuttlefish:conf_get("log.syslog.facility", Conf),
|
|
||||||
LogLevel = cuttlefish:conf_get("log.syslog.level", Conf),
|
|
||||||
[{lager_syslog_backend, [Ident, Facility, LogLevel]}];
|
|
||||||
_ -> []
|
|
||||||
end,
|
|
||||||
ErrorHandler = case cuttlefish:conf_get("log.error.file", Conf) of
|
|
||||||
undefined -> [];
|
|
||||||
ErrorFilename -> [{lager_file_backend, [{file, ErrorFilename},
|
|
||||||
{level, error},
|
|
||||||
{size, 10485760},
|
|
||||||
{date, "$D0"},
|
|
||||||
{count, 5}]}]
|
|
||||||
end,
|
|
||||||
|
|
||||||
ConsoleLogLevel = cuttlefish:conf_get("log.console.level", Conf),
|
|
||||||
ConsoleLogFile = cuttlefish:conf_get("log.console.file", Conf),
|
|
||||||
|
|
||||||
ConsoleHandler = {lager_console_backend, ConsoleLogLevel},
|
|
||||||
ConsoleFileHandler = {lager_file_backend, [{file, ConsoleLogFile},
|
|
||||||
{level, ConsoleLogLevel},
|
|
||||||
{size, 10485760},
|
|
||||||
{date, "$D0"},
|
|
||||||
{count, 5}]},
|
|
||||||
|
|
||||||
ConsoleHandlers = case cuttlefish:conf_get("log.console", Conf) of
|
|
||||||
off -> [];
|
|
||||||
file -> [ConsoleFileHandler];
|
|
||||||
console -> [ConsoleHandler];
|
|
||||||
both -> [ConsoleHandler, ConsoleFileHandler];
|
|
||||||
_ -> []
|
|
||||||
end,
|
|
||||||
SyslogHandler ++ ConsoleHandlers ++ ErrorHandler
|
|
||||||
end
|
|
||||||
}.
|
|
||||||
|
|
||||||
|
|
||||||
%% @doc Whether to enable Erlang's built-in error logger.
|
|
||||||
{mapping, "sasl", "sasl.sasl_error_logger", [
|
|
||||||
{default, off},
|
|
||||||
{datatype, flag},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc Whether to enable the crash log.
|
|
||||||
{mapping, "log.crash", "lager.crash_log", [
|
|
||||||
{default, on},
|
|
||||||
{datatype, flag}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc If the crash log is enabled, the file where its messages will
|
|
||||||
%% be written.
|
|
||||||
{mapping, "log.crash.file", "lager.crash_log", [
|
|
||||||
{default, "$(platform_log_dir)/crash.log"},
|
|
||||||
{datatype, file}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
{translation,
|
|
||||||
"lager.crash_log",
|
|
||||||
fun(Conf) ->
|
|
||||||
case cuttlefish:conf_get("log.crash", Conf) of
|
|
||||||
false -> undefined;
|
|
||||||
_ ->
|
|
||||||
cuttlefish:conf_get("log.crash.file", Conf, "{{platform_log_dir}}/crash.log")
|
|
||||||
end
|
|
||||||
end}.
|
|
||||||
|
|
||||||
%% @doc Maximum size in bytes of individual messages in the crash log
|
|
||||||
{mapping, "log.crash.maximum_message_size", "lager.crash_log_msg_size", [
|
|
||||||
{default, "64KB"},
|
|
||||||
{datatype, bytesize}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc Maximum size of the crash log in bytes, before it is rotated
|
|
||||||
{mapping, "log.crash.size", "lager.crash_log_size", [
|
|
||||||
{default, "10MB"},
|
|
||||||
{datatype, bytesize}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc The schedule on which to rotate the crash log. For more
|
|
||||||
%% information see:
|
|
||||||
%% https://github.com/basho/lager/blob/master/README.md#internal-log-rotation
|
|
||||||
{mapping, "log.crash.rotation", "lager.crash_log_date", [
|
|
||||||
{default, "$D0"}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc The number of rotated crash logs to keep. When set to
|
|
||||||
%% 'current', only the current open log file is kept.
|
|
||||||
{mapping, "log.crash.rotation.keep", "lager.crash_log_count", [
|
|
||||||
{default, 5},
|
|
||||||
{datatype, [integer, {atom, current}]},
|
|
||||||
{validators, ["rotation_count"]}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
{validator,
|
|
||||||
"rotation_count",
|
|
||||||
"must be 'current' or a positive integer",
|
|
||||||
fun(current) -> true;
|
|
||||||
(Int) when is_integer(Int) andalso Int >= 0 -> true;
|
|
||||||
(_) -> false
|
|
||||||
end}.
|
|
||||||
|
|
||||||
{translation,
|
|
||||||
"lager.crash_log_count",
|
|
||||||
fun(Conf) ->
|
|
||||||
case cuttlefish:conf_get("log.crash.rotation.keep", Conf) of
|
|
||||||
current -> 0;
|
|
||||||
Int -> Int
|
|
||||||
end
|
|
||||||
end}.
|
|
||||||
|
|
||||||
%% @doc Whether to redirect error_logger messages into lager -
|
|
||||||
%% defaults to true
|
|
||||||
{mapping, "log.error.redirect", "lager.error_logger_redirect", [
|
|
||||||
{default, on},
|
|
||||||
{datatype, flag},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% @doc Maximum number of error_logger messages to handle in a second
|
|
||||||
{mapping, "log.error.messages_per_second", "lager.error_logger_hwm", [
|
|
||||||
{default, 100},
|
|
||||||
{datatype, integer},
|
|
||||||
hidden
|
|
||||||
]}.
|
|
||||||
|
|
||||||
|
|
||||||
%% @doc Cookie for distributed node communication. All nodes in the
|
|
||||||
%% same cluster should use the same cookie or they will not be able to
|
|
||||||
%% communicate.
|
|
||||||
{mapping, "distributed_cookie", "vm_args.-setcookie", [
|
|
||||||
{default, "machi"}
|
|
||||||
]}.
|
|
||||||
|
|
||||||
|
|
||||||
%% override zdbbl from 1mb to 32mb
|
|
||||||
{mapping, "erlang.distribution_buffer_size", "vm_args.+zdbbl", [
|
|
||||||
{default, "32MB"},
|
|
||||||
merge
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% VM scheduler collapse, part 1 of 2
|
|
||||||
{mapping, "erlang.schedulers.force_wakeup_interval", "vm_args.+sfwi", [
|
|
||||||
{default, 500},
|
|
||||||
{datatype, integer},
|
|
||||||
merge
|
|
||||||
]}.
|
|
||||||
|
|
||||||
%% VM scheduler collapse, part 2 of 2
|
|
||||||
{mapping, "erlang.schedulers.compaction_of_load", "vm_args.+scl", [
|
|
||||||
{default, false},
|
|
||||||
merge
|
|
||||||
]}.
|
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
## Name of the riak node
|
|
||||||
-name {{node}}
|
|
||||||
|
|
||||||
## Cookie for distributed erlang. All nodes in the same cluster
|
|
||||||
## should use the same cookie or they will not be able to communicate.
|
|
||||||
-setcookie machi
|
|
||||||
|
|
||||||
## Heartbeat management; auto-restarts VM if it dies or becomes unresponsive
|
|
||||||
## (Disabled by default..use with caution!)
|
|
||||||
##-heart
|
|
||||||
|
|
||||||
## Enable kernel poll and a few async threads
|
|
||||||
+K true
|
|
||||||
+A 64
|
|
||||||
|
|
||||||
## Treat error_logger warnings as warnings
|
|
||||||
+W w
|
|
||||||
|
|
||||||
## Increase number of concurrent ports/sockets
|
|
||||||
-env ERL_MAX_PORTS 4096
|
|
||||||
|
|
||||||
## ## Tweak GC to run more often
|
|
||||||
## -env ERL_FULLSWEEP_AFTER 0
|
|
||||||
|
|
||||||
## Set the location of crash dumps
|
|
||||||
-env ERL_CRASH_DUMP {{crash_dump}}
|
|
||||||
|
|
16
rel/gen_dev
16
rel/gen_dev
|
@ -1,16 +0,0 @@
|
||||||
#! /bin/sh
|
|
||||||
#
|
|
||||||
# Example usage: gen_dev dev4 vars.src vars
|
|
||||||
#
|
|
||||||
# Generate an overlay config for devNNN from vars.src and write to vars
|
|
||||||
#
|
|
||||||
|
|
||||||
NAME=$1
|
|
||||||
TEMPLATE=$2
|
|
||||||
VARFILE=$3
|
|
||||||
|
|
||||||
NODE="$NAME@127.0.0.1"
|
|
||||||
|
|
||||||
echo "Generating $NAME - node='$NODE'"
|
|
||||||
sed -e "s/@NODE@/$NODE/" \
|
|
||||||
< $TEMPLATE > $VARFILE
|
|
|
@ -1,4 +0,0 @@
|
||||||
{lib_dirs, ["../deps"]}.
|
|
||||||
%% {plugin_dir, "../deps/cuttlefish/src"}.
|
|
||||||
%% {plugins, [cuttlefish_rebar_plugin]}.
|
|
||||||
%% {cuttlefish_filename, "machi.conf"}.
|
|
|
@ -1,113 +0,0 @@
|
||||||
%% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*-
|
|
||||||
%% ex: ft=erlang ts=4 sw=4 et
|
|
||||||
{sys, [
|
|
||||||
{lib_dirs, ["../deps"]},
|
|
||||||
{rel, "machi", "0.0.0",
|
|
||||||
[
|
|
||||||
kernel,
|
|
||||||
stdlib,
|
|
||||||
lager,
|
|
||||||
sasl,
|
|
||||||
public_key,
|
|
||||||
ssl,
|
|
||||||
%% riak_sysmon,
|
|
||||||
%% os_mon,
|
|
||||||
crypto,
|
|
||||||
runtime_tools,
|
|
||||||
machi
|
|
||||||
%% cluster_info,
|
|
||||||
%% exometer_core,
|
|
||||||
]},
|
|
||||||
{rel, "start_clean", "",
|
|
||||||
[
|
|
||||||
kernel,
|
|
||||||
stdlib
|
|
||||||
]},
|
|
||||||
{boot_rel, "machi"},
|
|
||||||
{profile, embedded},
|
|
||||||
{excl_sys_filters, ["^bin/.*",
|
|
||||||
"^erts.*/bin/(dialyzer|typer)",
|
|
||||||
"^erts.*/doc",
|
|
||||||
"^erts.*/man"]},
|
|
||||||
{excl_archive_filters, [".*"]},
|
|
||||||
%% {app, cuttlefish, [{incl_cond, include}]},
|
|
||||||
%% {app, cluster_info, [{incl_cond, include}]},
|
|
||||||
{app, eper, [{incl_cond, include}]},
|
|
||||||
{app, sasl, [{incl_cond, include}]},
|
|
||||||
%% {app, syslog, [{incl_cond, include}]},
|
|
||||||
%% {app, lager_syslog, [{incl_cond, include}]},
|
|
||||||
{app, lager, [{incl_cond, include}]}
|
|
||||||
%% {app, exometer_core, [{incl_cond, include}]},
|
|
||||||
]}.
|
|
||||||
|
|
||||||
|
|
||||||
{target_dir, "machi"}.
|
|
||||||
|
|
||||||
{overlay_vars, "vars.config"}.
|
|
||||||
|
|
||||||
{overlay, [
|
|
||||||
{mkdir, "data"},
|
|
||||||
{mkdir, "data/^PRESERVE"},
|
|
||||||
{mkdir, "log"},
|
|
||||||
|
|
||||||
%% Copy base files for starting and interacting w/ node
|
|
||||||
{copy, "../deps/node_package/priv/base/erl",
|
|
||||||
"{{erts_vsn}}/bin/erl"},
|
|
||||||
{copy, "../deps/node_package/priv/base/nodetool",
|
|
||||||
"{{erts_vsn}}/bin/nodetool"},
|
|
||||||
%% {copy, "../deps/cuttlefish/cuttlefish",
|
|
||||||
%% "{{erts_vsn}}/bin/cuttlefish"},
|
|
||||||
{template, "../deps/node_package/priv/base/runner",
|
|
||||||
"bin/machi"},
|
|
||||||
{template, "../deps/node_package/priv/base/env.sh",
|
|
||||||
"lib/env.sh"},
|
|
||||||
{template, "../deps/node_package/priv/base/app_epath.sh",
|
|
||||||
"lib/app_epath.sh"},
|
|
||||||
|
|
||||||
%% Copy config files
|
|
||||||
|
|
||||||
%% Cuttlefish Schema Files have a priority order.
|
|
||||||
%% Anything in a file prefixed with 00- will override
|
|
||||||
%% anything in a file with a higher numbered prefix.
|
|
||||||
|
|
||||||
%% Please only use 0[0-9]-*.schema for development purposes
|
|
||||||
%% NOTHING PERMANENT
|
|
||||||
|
|
||||||
%% {template, "files/riak.schema", "lib/10-riak.schema"},
|
|
||||||
%% {template, "../deps/cuttlefish/priv/erlang_vm.schema", "lib/11-erlang_vm.schema"},
|
|
||||||
|
|
||||||
%% {template, "../deps/riak_core/priv/riak_core.schema", "lib/12-riak_core.schema"},
|
|
||||||
%% {template, "../deps/riak_api/priv/riak_api.schema", "lib/13-riak_api.schema"},
|
|
||||||
%% {template, "../deps/riak_kv/priv/riak_kv.schema", "lib/14-riak_kv.schema"},
|
|
||||||
%% {template, "../deps/riak_sysmon/priv/riak_sysmon.schema", "lib/15-riak_sysmon.schema"},
|
|
||||||
%% {template, "../deps/bitcask/priv/bitcask.schema", "lib/16-bitcask.schema"},
|
|
||||||
%% {template, "../deps/bitcask/priv/bitcask_multi.schema", "lib/17-bitcask_multi.schema"},
|
|
||||||
%% {template, "../deps/riak_control/priv/riak_control.schema", "lib/18-riak_control.schema"},
|
|
||||||
|
|
||||||
%% {template, "../deps/riak_kv/priv/multi_backend.schema", "lib/20-multi_backend.schema"},
|
|
||||||
%% {template, "../deps/eleveldb/priv/eleveldb.schema", "lib/21-leveldb.schema"},
|
|
||||||
%% {template, "../deps/eleveldb/priv/eleveldb_multi.schema", "lib/22-leveldb_multi.schema"},
|
|
||||||
%% {template, "../deps/yokozuna/priv/yokozuna.schema", "lib/30-yokozuna.schema"},
|
|
||||||
|
|
||||||
%% Copy additional bin scripts
|
|
||||||
{template, "files/machi-admin", "bin/machi-admin"},
|
|
||||||
|
|
||||||
{template, "files/vm.args", "etc/vm.args"},
|
|
||||||
{template, "files/app.config", "etc/app.config"},
|
|
||||||
{mkdir, "etc/chain-config"},
|
|
||||||
{mkdir, "etc/flu-config"},
|
|
||||||
{mkdir, "etc/pending"},
|
|
||||||
{mkdir, "etc/rejected"},
|
|
||||||
|
|
||||||
%% Experiment: quick-admin
|
|
||||||
{mkdir, "etc/quick-admin-archive"},
|
|
||||||
{mkdir, "priv"},
|
|
||||||
{mkdir, "priv/quick-admin-examples"},
|
|
||||||
{copy, "../priv/quick-admin-examples/000", "priv/quick-admin-examples"},
|
|
||||||
{copy, "../priv/quick-admin-examples/001", "priv/quick-admin-examples"},
|
|
||||||
{copy, "../priv/quick-admin-examples/002", "priv/quick-admin-examples"},
|
|
||||||
{copy, "../priv/quick-admin-examples/demo-000", "priv/quick-admin-examples/demo-000"},
|
|
||||||
|
|
||||||
{mkdir, "lib/basho-patches"}
|
|
||||||
%% {copy, "../apps/machi/ebin/etop_txt.beam", "lib/basho-patches"}
|
|
||||||
]}.
|
|
|
@ -1,48 +0,0 @@
|
||||||
%% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*-
|
|
||||||
%% ex: ft=erlang ts=4 sw=4 et
|
|
||||||
|
|
||||||
%% NOTE: When modifying this file, also keep its near cousin
|
|
||||||
%% config file rel/vars/dev_vars.config.src in sync!
|
|
||||||
|
|
||||||
%% Platform-specific installation paths
|
|
||||||
{platform_bin_dir, "./bin"}.
|
|
||||||
{platform_data_dir, "./data"}.
|
|
||||||
{platform_etc_dir, "./etc"}.
|
|
||||||
{platform_lib_dir, "./lib"}.
|
|
||||||
{platform_log_dir, "./log"}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% etc/app.config
|
|
||||||
%%
|
|
||||||
{sasl_error_log, "{{platform_log_dir}}/sasl-error.log"}.
|
|
||||||
{sasl_log_dir, "{{platform_log_dir}}/sasl"}.
|
|
||||||
|
|
||||||
%% lager
|
|
||||||
{console_log_default, file}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% etc/vm.args
|
|
||||||
%%
|
|
||||||
{node, "machi@127.0.0.1"}.
|
|
||||||
{crash_dump, "{{platform_log_dir}}/erl_crash.dump"}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% bin/machi
|
|
||||||
%%
|
|
||||||
{runner_script_dir, "\`cd \\`dirname $0\\` 1>/dev/null && /bin/pwd\`"}.
|
|
||||||
{runner_base_dir, "{{runner_script_dir}}/.."}.
|
|
||||||
{runner_etc_dir, "$RUNNER_BASE_DIR/etc"}.
|
|
||||||
{runner_log_dir, "$RUNNER_BASE_DIR/log"}.
|
|
||||||
{runner_lib_dir, "$RUNNER_BASE_DIR/lib"}.
|
|
||||||
{runner_patch_dir, "$RUNNER_BASE_DIR/lib/basho-patches"}.
|
|
||||||
{pipe_dir, "/tmp/$RUNNER_BASE_DIR/"}.
|
|
||||||
{runner_user, ""}.
|
|
||||||
{runner_wait_process, "machi_flu_sup"}.
|
|
||||||
{runner_ulimit_warn, 65536}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% cuttlefish
|
|
||||||
%%
|
|
||||||
{cuttlefish, ""}. % blank = off
|
|
||||||
{cuttlefish_conf, "machi.conf"}.
|
|
||||||
|
|
|
@ -1,48 +0,0 @@
|
||||||
%% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*-
|
|
||||||
%% ex: ft=erlang ts=4 sw=4 et
|
|
||||||
|
|
||||||
%% NOTE: When modifying this file, also keep its near cousin
|
|
||||||
%% config file rel/vars/dev_vars.config.src in sync!
|
|
||||||
|
|
||||||
%% Platform-specific installation paths
|
|
||||||
{platform_bin_dir, "./bin"}.
|
|
||||||
{platform_data_dir, "./data"}.
|
|
||||||
{platform_etc_dir, "./etc"}.
|
|
||||||
{platform_lib_dir, "./lib"}.
|
|
||||||
{platform_log_dir, "./log"}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% etc/app.config
|
|
||||||
%%
|
|
||||||
{sasl_error_log, "{{platform_log_dir}}/sasl-error.log"}.
|
|
||||||
{sasl_log_dir, "{{platform_log_dir}}/sasl"}.
|
|
||||||
|
|
||||||
%% lager
|
|
||||||
{console_log_default, file}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% etc/vm.args
|
|
||||||
%%
|
|
||||||
{node, "@NODE@"}.
|
|
||||||
{crash_dump, "{{platform_log_dir}}/erl_crash.dump"}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% bin/machi
|
|
||||||
%%
|
|
||||||
{runner_script_dir, "\`cd \\`dirname $0\\` 1>/dev/null && /bin/pwd\`"}.
|
|
||||||
{runner_base_dir, "{{runner_script_dir}}/.."}.
|
|
||||||
{runner_etc_dir, "$RUNNER_BASE_DIR/etc"}.
|
|
||||||
{runner_log_dir, "$RUNNER_BASE_DIR/log"}.
|
|
||||||
{runner_lib_dir, "$RUNNER_BASE_DIR/lib"}.
|
|
||||||
{runner_patch_dir, "$RUNNER_BASE_DIR/lib/basho-patches"}.
|
|
||||||
{pipe_dir, "/tmp/$RUNNER_BASE_DIR/"}.
|
|
||||||
{runner_user, ""}.
|
|
||||||
{runner_wait_process, "machi_flu_sup"}.
|
|
||||||
{runner_ulimit_warn, 65536}.
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% cuttlefish
|
|
||||||
%%
|
|
||||||
{cuttlefish, ""}. % blank = off
|
|
||||||
{cuttlefish_conf, "machi.conf"}.
|
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
{application, machi, [
|
{application, machi, [
|
||||||
{description, "A village of write-once files."},
|
{description, "A village of write-once files."},
|
||||||
{vsn, "0.0.1"},
|
{vsn, "0.0.0"},
|
||||||
{applications, [kernel, stdlib, crypto, cluster_info, ranch]},
|
{applications, [kernel, stdlib, crypto]},
|
||||||
{mod,{machi_app,[]}},
|
{mod,{machi_app,[]}},
|
||||||
{registered, []},
|
{registered, []},
|
||||||
{env, [
|
{env, [
|
||||||
%% Don't use this static env for defaults, or we will fall into config hell.
|
{flu_list,
|
||||||
|
[
|
||||||
|
%%%%%% {flu_a, 32900, "./data.flu_a"}
|
||||||
|
]}
|
||||||
]}
|
]}
|
||||||
]}.
|
]}.
|
||||||
|
|
218
src/machi.proto
218
src/machi.proto
|
@ -40,18 +40,12 @@ enum Mpb_GeneralStatusCode {
|
||||||
BAD_ARG = 1;
|
BAD_ARG = 1;
|
||||||
WEDGED = 2;
|
WEDGED = 2;
|
||||||
BAD_CHECKSUM = 3;
|
BAD_CHECKSUM = 3;
|
||||||
/*
|
|
||||||
** There is no timout error code, only PARTITION. If the client
|
|
||||||
** wants to know if a lot of time has elapsed, then the client
|
|
||||||
** can do its own timekeeping.
|
|
||||||
*/
|
|
||||||
PARTITION = 4;
|
PARTITION = 4;
|
||||||
NOT_WRITTEN = 5;
|
NOT_WRITTEN = 5;
|
||||||
WRITTEN = 6;
|
WRITTEN = 6;
|
||||||
TRIMMED = 7; // The whole file was trimmed
|
NO_SUCH_FILE = 7;
|
||||||
NO_SUCH_FILE = 8;
|
PARTIAL_READ = 8;
|
||||||
PARTIAL_READ = 9;
|
BAD_EPOCH = 9;
|
||||||
BAD_EPOCH = 10;
|
|
||||||
BAD_JOSS = 255; // Only for testing by the Taipan
|
BAD_JOSS = 255; // Only for testing by the Taipan
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,14 +81,6 @@ message Mpb_ChunkCSum {
|
||||||
optional bytes csum = 2;
|
optional bytes csum = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_Chunk {
|
|
||||||
required uint64 offset = 1;
|
|
||||||
required string file_name = 2;
|
|
||||||
required bytes chunk = 3;
|
|
||||||
// TODO: must be required, in future?
|
|
||||||
optional Mpb_ChunkCSum csum = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
// epoch_id() type
|
// epoch_id() type
|
||||||
message Mpb_EpochID {
|
message Mpb_EpochID {
|
||||||
required uint32 epoch_number = 1;
|
required uint32 epoch_number = 1;
|
||||||
|
@ -139,7 +125,6 @@ message Mpb_ErrorResp {
|
||||||
// append_chunk() : Mpb_AppendChunkReq and Mpb_AppendChunkResp
|
// append_chunk() : Mpb_AppendChunkReq and Mpb_AppendChunkResp
|
||||||
// write_chunk() : Mpb_WriteChunkReq and Mpb_WriteChunkResp
|
// write_chunk() : Mpb_WriteChunkReq and Mpb_WriteChunkResp
|
||||||
// read_chunk() : Mpb_ReadChunkReq and Mpb_ReadChunkResp
|
// read_chunk() : Mpb_ReadChunkReq and Mpb_ReadChunkResp
|
||||||
// trim_chunk() : Mpb_TrimChunkReq and Mpb_TrimChunkResp
|
|
||||||
// checksum_list() : Mpb_ChecksumListReq and Mpb_ChecksumListResp
|
// checksum_list() : Mpb_ChecksumListReq and Mpb_ChecksumListResp
|
||||||
// list_files() : Mpb_ListFilesReq and Mpb_ListFilesResp
|
// list_files() : Mpb_ListFilesReq and Mpb_ListFilesResp
|
||||||
//
|
//
|
||||||
|
@ -170,18 +155,11 @@ message Mpb_AuthResp {
|
||||||
// High level API: append_chunk() request & response
|
// High level API: append_chunk() request & response
|
||||||
|
|
||||||
message Mpb_AppendChunkReq {
|
message Mpb_AppendChunkReq {
|
||||||
// General namespace arguments
|
optional bytes placement_key = 1;
|
||||||
/* In single chain/non-clustered environment, use namespace="" */
|
required string prefix = 2;
|
||||||
required string namespace = 1;
|
required bytes chunk = 3;
|
||||||
|
required Mpb_ChunkCSum csum = 4;
|
||||||
required string prefix = 10;
|
optional uint32 chunk_extra = 5;
|
||||||
required bytes chunk = 11;
|
|
||||||
required Mpb_ChunkCSum csum = 12;
|
|
||||||
|
|
||||||
optional uint32 chunk_extra = 20;
|
|
||||||
optional string preferred_file_name = 21;
|
|
||||||
/* Fail the operation if our preferred file name is not available */
|
|
||||||
optional bool flag_fail_preferred = 22 [default=false];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_AppendChunkResp {
|
message Mpb_AppendChunkResp {
|
||||||
|
@ -193,7 +171,10 @@ message Mpb_AppendChunkResp {
|
||||||
// High level API: write_chunk() request & response
|
// High level API: write_chunk() request & response
|
||||||
|
|
||||||
message Mpb_WriteChunkReq {
|
message Mpb_WriteChunkReq {
|
||||||
required Mpb_Chunk chunk = 10;
|
required string file = 1;
|
||||||
|
required uint64 offset = 2;
|
||||||
|
required bytes chunk = 3;
|
||||||
|
required Mpb_ChunkCSum csum = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_WriteChunkResp {
|
message Mpb_WriteChunkResp {
|
||||||
|
@ -203,38 +184,22 @@ message Mpb_WriteChunkResp {
|
||||||
// High level API: read_chunk() request & response
|
// High level API: read_chunk() request & response
|
||||||
|
|
||||||
message Mpb_ReadChunkReq {
|
message Mpb_ReadChunkReq {
|
||||||
// No namespace arguments are required because NS is embedded
|
required string file = 1;
|
||||||
// inside of the file name.
|
required uint64 offset = 2;
|
||||||
|
required uint32 size = 3;
|
||||||
|
|
||||||
required Mpb_ChunkPos chunk_pos = 10;
|
// Use flag_checksum=non-zero to request the chunk's checksum also
|
||||||
|
optional uint32 flag_checksum = 4 [default=0];
|
||||||
// Use flag_no_checksum=non-zero to skip returning the chunk's checksum.
|
|
||||||
// TODO: not implemented yet.
|
|
||||||
optional bool flag_no_checksum = 20 [default=false];
|
|
||||||
|
|
||||||
// Use flag_no_chunk=non-zero to skip returning the chunk (which
|
// Use flag_no_chunk=non-zero to skip returning the chunk (which
|
||||||
// only makes sense if flag_no_checksum is not set).
|
// only makes sense if flag_checksum is set).
|
||||||
// TODO: not implemented yet.
|
optional uint32 flag_no_chunk = 5 [default=0];
|
||||||
optional bool flag_no_chunk = 21 [default=false];
|
|
||||||
|
|
||||||
// TODO: not implemented yet.
|
|
||||||
optional bool flag_needs_trimmed = 22 [default=false];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_ReadChunkResp {
|
message Mpb_ReadChunkResp {
|
||||||
required Mpb_GeneralStatusCode status = 1;
|
required Mpb_GeneralStatusCode status = 1;
|
||||||
repeated Mpb_Chunk chunks = 2;
|
optional bytes chunk = 2;
|
||||||
repeated Mpb_ChunkPos trimmed = 3;
|
optional Mpb_ChunkCSum csum = 3;
|
||||||
}
|
|
||||||
|
|
||||||
// High level API: trim_chunk() request & response
|
|
||||||
|
|
||||||
message Mpb_TrimChunkReq {
|
|
||||||
required Mpb_ChunkPos chunk_pos = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message Mpb_TrimChunkResp {
|
|
||||||
required Mpb_GeneralStatusCode status = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// High level API: checksum_list() request & response
|
// High level API: checksum_list() request & response
|
||||||
|
@ -254,8 +219,6 @@ message Mpb_ChecksumListResp {
|
||||||
// High level API: list_files() request & response
|
// High level API: list_files() request & response
|
||||||
|
|
||||||
message Mpb_ListFilesReq {
|
message Mpb_ListFilesReq {
|
||||||
// TODO: Add flag for file glob/regexp/other filter type
|
|
||||||
// TODO: What else could go wrong?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_ListFilesResp {
|
message Mpb_ListFilesResp {
|
||||||
|
@ -288,9 +251,8 @@ message Mpb_Request {
|
||||||
optional Mpb_AppendChunkReq append_chunk = 112;
|
optional Mpb_AppendChunkReq append_chunk = 112;
|
||||||
optional Mpb_WriteChunkReq write_chunk = 113;
|
optional Mpb_WriteChunkReq write_chunk = 113;
|
||||||
optional Mpb_ReadChunkReq read_chunk = 114;
|
optional Mpb_ReadChunkReq read_chunk = 114;
|
||||||
optional Mpb_TrimChunkReq trim_chunk = 115;
|
optional Mpb_ChecksumListReq checksum_list = 115;
|
||||||
optional Mpb_ChecksumListReq checksum_list = 116;
|
optional Mpb_ListFilesReq list_files = 116;
|
||||||
optional Mpb_ListFilesReq list_files = 117;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_Response {
|
message Mpb_Response {
|
||||||
|
@ -312,9 +274,8 @@ message Mpb_Response {
|
||||||
optional Mpb_AppendChunkResp append_chunk = 12;
|
optional Mpb_AppendChunkResp append_chunk = 12;
|
||||||
optional Mpb_WriteChunkResp write_chunk = 13;
|
optional Mpb_WriteChunkResp write_chunk = 13;
|
||||||
optional Mpb_ReadChunkResp read_chunk = 14;
|
optional Mpb_ReadChunkResp read_chunk = 14;
|
||||||
optional Mpb_TrimChunkResp trim_chunk = 15;
|
optional Mpb_ChecksumListResp checksum_list = 15;
|
||||||
optional Mpb_ChecksumListResp checksum_list = 16;
|
optional Mpb_ListFilesResp list_files = 16;
|
||||||
optional Mpb_ListFilesResp list_files = 17;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
|
@ -342,17 +303,18 @@ message Mpb_ProjectionV1 {
|
||||||
required uint32 epoch_number = 1;
|
required uint32 epoch_number = 1;
|
||||||
required bytes epoch_csum = 2;
|
required bytes epoch_csum = 2;
|
||||||
required string author_server = 3;
|
required string author_server = 3;
|
||||||
required string chain_name = 4;
|
repeated string all_members = 4;
|
||||||
repeated string all_members = 5;
|
repeated string witnesses = 5;
|
||||||
repeated string witnesses = 6;
|
required Mpb_Now creation_time = 6;
|
||||||
required Mpb_Now creation_time = 7;
|
required Mpb_Mode mode = 7;
|
||||||
required Mpb_Mode mode = 8;
|
repeated string upi = 8;
|
||||||
repeated string upi = 9;
|
repeated string repairing = 9;
|
||||||
repeated string repairing = 10;
|
repeated string down = 10;
|
||||||
repeated string down = 11;
|
optional bytes opaque_flap = 11;
|
||||||
required bytes opaque_dbg = 12;
|
optional bytes opaque_inner = 12;
|
||||||
required bytes opaque_dbg2 = 13;
|
required bytes opaque_dbg = 13;
|
||||||
repeated Mpb_MembersDictEntry members_dict = 14;
|
required bytes opaque_dbg2 = 14;
|
||||||
|
repeated Mpb_MembersDictEntry members_dict = 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
|
@ -367,7 +329,6 @@ message Mpb_ProjectionV1 {
|
||||||
// append_chunk()
|
// append_chunk()
|
||||||
// write_chunk()
|
// write_chunk()
|
||||||
// read_chunk()
|
// read_chunk()
|
||||||
// trim_chunk()
|
|
||||||
// checksum_list()
|
// checksum_list()
|
||||||
// list_files()
|
// list_files()
|
||||||
// wedge_status()
|
// wedge_status()
|
||||||
|
@ -388,20 +349,12 @@ message Mpb_ProjectionV1 {
|
||||||
// Low level API: append_chunk()
|
// Low level API: append_chunk()
|
||||||
|
|
||||||
message Mpb_LL_AppendChunkReq {
|
message Mpb_LL_AppendChunkReq {
|
||||||
// General namespace arguments
|
required Mpb_EpochID epoch_id = 1;
|
||||||
required uint32 namespace_version = 1;
|
optional bytes placement_key = 2;
|
||||||
required string namespace = 2;
|
required string prefix = 3;
|
||||||
required uint32 locator = 3;
|
required bytes chunk = 4;
|
||||||
|
required Mpb_ChunkCSum csum = 5;
|
||||||
required Mpb_EpochID epoch_id = 10;
|
optional uint32 chunk_extra = 6;
|
||||||
required string prefix = 11;
|
|
||||||
required bytes chunk = 12;
|
|
||||||
required Mpb_ChunkCSum csum = 13;
|
|
||||||
|
|
||||||
optional uint32 chunk_extra = 20;
|
|
||||||
optional string preferred_file_name = 21;
|
|
||||||
/* Fail the operation if our preferred file name is not available */
|
|
||||||
optional bool flag_fail_preferred = 22 [default=false];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_LL_AppendChunkResp {
|
message Mpb_LL_AppendChunkResp {
|
||||||
|
@ -413,12 +366,11 @@ message Mpb_LL_AppendChunkResp {
|
||||||
// Low level API: write_chunk()
|
// Low level API: write_chunk()
|
||||||
|
|
||||||
message Mpb_LL_WriteChunkReq {
|
message Mpb_LL_WriteChunkReq {
|
||||||
// General namespace arguments
|
required Mpb_EpochID epoch_id = 1;
|
||||||
required uint32 namespace_version = 1;
|
required string file = 2;
|
||||||
required string namespace = 2;
|
required uint64 offset = 3;
|
||||||
|
required bytes chunk = 4;
|
||||||
required Mpb_EpochID epoch_id = 10;
|
required Mpb_ChunkCSum csum = 5;
|
||||||
required Mpb_Chunk chunk = 11;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_LL_WriteChunkResp {
|
message Mpb_LL_WriteChunkResp {
|
||||||
|
@ -428,54 +380,30 @@ message Mpb_LL_WriteChunkResp {
|
||||||
// Low level API: read_chunk()
|
// Low level API: read_chunk()
|
||||||
|
|
||||||
message Mpb_LL_ReadChunkReq {
|
message Mpb_LL_ReadChunkReq {
|
||||||
// General namespace arguments
|
required Mpb_EpochID epoch_id = 1;
|
||||||
required uint32 namespace_version = 1;
|
required string file = 2;
|
||||||
required string namespace = 2;
|
required uint64 offset = 3;
|
||||||
|
required uint32 size = 4;
|
||||||
|
|
||||||
required Mpb_EpochID epoch_id = 10;
|
// Use flag_checksum=non-zero to request the chunk's checksum also
|
||||||
required Mpb_ChunkPos chunk_pos = 11;
|
optional uint32 flag_get_checksum = 5 [default=0];
|
||||||
|
|
||||||
// Use flag_no_checksum=non-zero to skip returning the chunk's checksum.
|
|
||||||
// TODO: not implemented yet.
|
|
||||||
optional bool flag_no_checksum = 20 [default=false];
|
|
||||||
|
|
||||||
// Use flag_no_chunk=non-zero to skip returning the chunk (which
|
// Use flag_no_chunk=non-zero to skip returning the chunk (which
|
||||||
// only makes sense if flag_checksum is not set).
|
// only makes sense if flag_checksum is set).
|
||||||
// TODO: not implemented yet.
|
optional uint32 flag_no_chunk = 6 [default=0];
|
||||||
optional bool flag_no_chunk = 21 [default=false];
|
|
||||||
|
|
||||||
optional bool flag_needs_trimmed = 22 [default=false];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_LL_ReadChunkResp {
|
message Mpb_LL_ReadChunkResp {
|
||||||
required Mpb_GeneralStatusCode status = 1;
|
required Mpb_GeneralStatusCode status = 1;
|
||||||
repeated Mpb_Chunk chunks = 2;
|
optional bytes chunk = 2;
|
||||||
repeated Mpb_ChunkPos trimmed = 3;
|
optional Mpb_ChunkCSum csum = 3;
|
||||||
}
|
|
||||||
|
|
||||||
// Low level API: trim_chunk()
|
|
||||||
|
|
||||||
message Mpb_LL_TrimChunkReq {
|
|
||||||
// General namespace arguments
|
|
||||||
required uint32 namespace_version = 1;
|
|
||||||
required string namespace = 2;
|
|
||||||
|
|
||||||
required Mpb_EpochID epoch_id = 10;
|
|
||||||
required string file = 11;
|
|
||||||
required uint64 offset = 12;
|
|
||||||
required uint32 size = 13;
|
|
||||||
|
|
||||||
optional bool trigger_gc = 20 [default=false];
|
|
||||||
}
|
|
||||||
|
|
||||||
message Mpb_LL_TrimChunkResp {
|
|
||||||
required Mpb_GeneralStatusCode status = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Low level API: checksum_list()
|
// Low level API: checksum_list()
|
||||||
|
|
||||||
message Mpb_LL_ChecksumListReq {
|
message Mpb_LL_ChecksumListReq {
|
||||||
required string file = 1;
|
required Mpb_EpochID epoch_id = 1;
|
||||||
|
required string file = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_LL_ChecksumListResp {
|
message Mpb_LL_ChecksumListResp {
|
||||||
|
@ -506,9 +434,7 @@ message Mpb_LL_WedgeStatusReq {
|
||||||
message Mpb_LL_WedgeStatusResp {
|
message Mpb_LL_WedgeStatusResp {
|
||||||
required Mpb_GeneralStatusCode status = 1;
|
required Mpb_GeneralStatusCode status = 1;
|
||||||
optional Mpb_EpochID epoch_id = 2;
|
optional Mpb_EpochID epoch_id = 2;
|
||||||
optional bool wedged_flag = 3;
|
optional uint32 wedged_flag = 3;
|
||||||
optional uint32 namespace_version = 4;
|
|
||||||
optional string namespace = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Low level API: delete_migration()
|
// Low level API: delete_migration()
|
||||||
|
@ -637,12 +563,11 @@ message Mpb_LL_Request {
|
||||||
optional Mpb_LL_AppendChunkReq append_chunk = 30;
|
optional Mpb_LL_AppendChunkReq append_chunk = 30;
|
||||||
optional Mpb_LL_WriteChunkReq write_chunk = 31;
|
optional Mpb_LL_WriteChunkReq write_chunk = 31;
|
||||||
optional Mpb_LL_ReadChunkReq read_chunk = 32;
|
optional Mpb_LL_ReadChunkReq read_chunk = 32;
|
||||||
optional Mpb_LL_TrimChunkReq trim_chunk = 33;
|
optional Mpb_LL_ChecksumListReq checksum_list = 33;
|
||||||
optional Mpb_LL_ChecksumListReq checksum_list = 34;
|
optional Mpb_LL_ListFilesReq list_files = 34;
|
||||||
optional Mpb_LL_ListFilesReq list_files = 35;
|
optional Mpb_LL_WedgeStatusReq wedge_status = 35;
|
||||||
optional Mpb_LL_WedgeStatusReq wedge_status = 36;
|
optional Mpb_LL_DeleteMigrationReq delete_migration = 36;
|
||||||
optional Mpb_LL_DeleteMigrationReq delete_migration = 37;
|
optional Mpb_LL_TruncHackReq trunc_hack = 37;
|
||||||
optional Mpb_LL_TruncHackReq trunc_hack = 38;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Mpb_LL_Response {
|
message Mpb_LL_Response {
|
||||||
|
@ -672,10 +597,9 @@ message Mpb_LL_Response {
|
||||||
optional Mpb_LL_AppendChunkResp append_chunk = 30;
|
optional Mpb_LL_AppendChunkResp append_chunk = 30;
|
||||||
optional Mpb_LL_WriteChunkResp write_chunk = 31;
|
optional Mpb_LL_WriteChunkResp write_chunk = 31;
|
||||||
optional Mpb_LL_ReadChunkResp read_chunk = 32;
|
optional Mpb_LL_ReadChunkResp read_chunk = 32;
|
||||||
optional Mpb_LL_TrimChunkResp trim_chunk = 33;
|
optional Mpb_LL_ChecksumListResp checksum_list = 33;
|
||||||
optional Mpb_LL_ChecksumListResp checksum_list = 34;
|
optional Mpb_LL_ListFilesResp list_files = 34;
|
||||||
optional Mpb_LL_ListFilesResp list_files = 35;
|
optional Mpb_LL_WedgeStatusResp wedge_status = 35;
|
||||||
optional Mpb_LL_WedgeStatusResp wedge_status = 36;
|
optional Mpb_LL_DeleteMigrationResp delete_migration = 36;
|
||||||
optional Mpb_LL_DeleteMigrationResp delete_migration = 37;
|
optional Mpb_LL_TruncHackResp trunc_hack = 37;
|
||||||
optional Mpb_LL_TruncHackResp trunc_hack = 38;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,13 +73,8 @@ verify_file_checksums_local2(Sock1, EpochID, Path0) ->
|
||||||
{ok, FH} ->
|
{ok, FH} ->
|
||||||
File = re:replace(Path, ".*/", "", [{return, binary}]),
|
File = re:replace(Path, ".*/", "", [{return, binary}]),
|
||||||
try
|
try
|
||||||
ReadChunk = fun(F, Offset, Size) ->
|
ReadChunk = fun(_File, Offset, Size) ->
|
||||||
case file:pread(FH, Offset, Size) of
|
file:pread(FH, Offset, Size)
|
||||||
{ok, Bin} ->
|
|
||||||
{ok, {[{F, Offset, Bin, undefined}], []}};
|
|
||||||
Err ->
|
|
||||||
Err
|
|
||||||
end
|
|
||||||
end,
|
end,
|
||||||
verify_file_checksums_common(Sock1, EpochID, File, ReadChunk)
|
verify_file_checksums_common(Sock1, EpochID, File, ReadChunk)
|
||||||
after
|
after
|
||||||
|
@ -90,18 +85,17 @@ verify_file_checksums_local2(Sock1, EpochID, Path0) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
verify_file_checksums_remote2(Sock1, EpochID, File) ->
|
verify_file_checksums_remote2(Sock1, EpochID, File) ->
|
||||||
NSInfo = undefined,
|
|
||||||
ReadChunk = fun(File_name, Offset, Size) ->
|
ReadChunk = fun(File_name, Offset, Size) ->
|
||||||
?FLU_C:read_chunk(Sock1, NSInfo, EpochID,
|
?FLU_C:read_chunk(Sock1, EpochID,
|
||||||
File_name, Offset, Size, undefined)
|
File_name, Offset, Size)
|
||||||
end,
|
end,
|
||||||
verify_file_checksums_common(Sock1, EpochID, File, ReadChunk).
|
verify_file_checksums_common(Sock1, EpochID, File, ReadChunk).
|
||||||
|
|
||||||
verify_file_checksums_common(Sock1, _EpochID, File, ReadChunk) ->
|
verify_file_checksums_common(Sock1, EpochID, File, ReadChunk) ->
|
||||||
try
|
try
|
||||||
case ?FLU_C:checksum_list(Sock1, File) of
|
case ?FLU_C:checksum_list(Sock1, EpochID, File) of
|
||||||
{ok, InfoBin} ->
|
{ok, InfoBin} ->
|
||||||
Info = machi_csum_table:split_checksum_list_blob_decode(InfoBin),
|
{Info, _} = machi_flu1:split_checksum_list_blob_decode(InfoBin),
|
||||||
Res = lists:foldl(verify_chunk_checksum(File, ReadChunk),
|
Res = lists:foldl(verify_chunk_checksum(File, ReadChunk),
|
||||||
[], Info),
|
[], Info),
|
||||||
{ok, Res};
|
{ok, Res};
|
||||||
|
@ -116,11 +110,9 @@ verify_file_checksums_common(Sock1, _EpochID, File, ReadChunk) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
verify_chunk_checksum(File, ReadChunk) ->
|
verify_chunk_checksum(File, ReadChunk) ->
|
||||||
fun({0, ?MINIMUM_OFFSET, none}, []) ->
|
fun({Offset, Size, <<_Tag:1/binary, CSum/binary>>}, Acc) ->
|
||||||
[];
|
|
||||||
({Offset, Size, <<_Tag:1/binary, CSum/binary>>}, Acc) ->
|
|
||||||
case ReadChunk(File, Offset, Size) of
|
case ReadChunk(File, Offset, Size) of
|
||||||
{ok, {[{_, Offset, Chunk, _}], _}} ->
|
{ok, Chunk} ->
|
||||||
CSum2 = machi_util:checksum_chunk(Chunk),
|
CSum2 = machi_util:checksum_chunk(Chunk),
|
||||||
if CSum == CSum2 ->
|
if CSum == CSum2 ->
|
||||||
Acc;
|
Acc;
|
||||||
|
|
|
@ -36,8 +36,12 @@
|
||||||
-export([start/2, stop/1]).
|
-export([start/2, stop/1]).
|
||||||
|
|
||||||
start(_StartType, _StartArgs) ->
|
start(_StartType, _StartArgs) ->
|
||||||
machi_cinfo:register(),
|
case machi_sup:start_link() of
|
||||||
machi_sup:start_link().
|
{ok, Pid} ->
|
||||||
|
{ok, Pid};
|
||||||
|
Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
stop(_State) ->
|
stop(_State) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
%% -------------------------------------------------------------------
|
%% -------------------------------------------------------------------
|
||||||
%%
|
%%
|
||||||
%% Copyright (c) 2007-2016 Basho Technologies, Inc. All Rights Reserved.
|
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
||||||
%%
|
%%
|
||||||
%% This file is provided to you under the Apache License,
|
%% This file is provided to you under the Apache License,
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
%% Version 2.0 (the "License"); you may not use this file
|
||||||
|
@ -43,25 +43,23 @@
|
||||||
%% could add new entries to this ETS table.
|
%% could add new entries to this ETS table.
|
||||||
%%
|
%%
|
||||||
%% Now we can use various integer-centric key generators that are
|
%% Now we can use various integer-centric key generators that are
|
||||||
%% already bundled with basho_bench. NOTE: this scheme does not allow
|
%% already bundled with basho_bench.
|
||||||
%% mixing of 'append' and 'read' operations in the same config. Basho
|
|
||||||
%% Bench does not support different key generators for different
|
|
||||||
%% operations, unfortunately. The work-around is to run two different
|
|
||||||
%% Basho Bench instances: on for 'append' ops with a key generator for
|
|
||||||
%% the desired prefix(es), and the other for 'read' ops with an
|
|
||||||
%% integer key generator.
|
|
||||||
%%
|
%%
|
||||||
%% TODO: The 'read' operator will always read chunks at exactly the
|
%% TODO: Add CRC checking, when feasible and when supported on the
|
||||||
%% byte offset & size as the original append/write ops. If reads are
|
%% server side.
|
||||||
%% desired at any arbitrary offset & size, then a new strategy is
|
%%
|
||||||
%% required.
|
%% TODO: As an alternate idea, if we know that the chunks written are
|
||||||
|
%% always the same size, and if we don't care about CRC checking, then
|
||||||
|
%% all we need to know are the file names & file sizes on the server:
|
||||||
|
%% we can then pick any valid offset within that file. That would
|
||||||
|
%% certainly be more scalable than the zillion-row-ETS-table, which is
|
||||||
|
%% definitely RAM-hungry.
|
||||||
|
|
||||||
-module(machi_basho_bench_driver).
|
-module(machi_basho_bench_driver).
|
||||||
|
|
||||||
-export([new/1, run/4]).
|
-export([new/1, run/4]).
|
||||||
|
|
||||||
-record(m, {
|
-record(m, {
|
||||||
id,
|
|
||||||
conn,
|
conn,
|
||||||
max_key
|
max_key
|
||||||
}).
|
}).
|
||||||
|
@ -83,7 +81,7 @@ new(Id) ->
|
||||||
{read_concurrency, true}]),
|
{read_concurrency, true}]),
|
||||||
ets:insert(ETS, {max_key, 0}),
|
ets:insert(ETS, {max_key, 0}),
|
||||||
ets:insert(ETS, {total_bytes, 0}),
|
ets:insert(ETS, {total_bytes, 0}),
|
||||||
MaxKeys = load_ets_table_maybe(Conn, ETS),
|
MaxKeys = load_ets_table(Conn, ETS),
|
||||||
?INFO("Key preload: finished, ~w keys loaded", [MaxKeys]),
|
?INFO("Key preload: finished, ~w keys loaded", [MaxKeys]),
|
||||||
Bytes = ets:lookup_element(ETS, total_bytes, 2),
|
Bytes = ets:lookup_element(ETS, total_bytes, 2),
|
||||||
?INFO("Key preload: finished, chunk list specifies ~s MBytes of chunks",
|
?INFO("Key preload: finished, chunk list specifies ~s MBytes of chunks",
|
||||||
|
@ -92,14 +90,12 @@ new(Id) ->
|
||||||
true ->
|
true ->
|
||||||
ok
|
ok
|
||||||
end,
|
end,
|
||||||
{ok, #m{id=Id, conn=Conn}}.
|
{ok, #m{conn=Conn}}.
|
||||||
|
|
||||||
run(append, KeyGen, ValueGen, #m{conn=Conn}=S) ->
|
run(append, KeyGen, ValueGen, #m{conn=Conn}=S) ->
|
||||||
Prefix = KeyGen(),
|
Prefix = KeyGen(),
|
||||||
Value = ValueGen(),
|
Value = ValueGen(),
|
||||||
CSum = machi_util:make_client_csum(Value),
|
case machi_cr_client:append_chunk(Conn, Prefix, Value, ?THE_TIMEOUT) of
|
||||||
AppendOpts = {append_opts,0,undefined,false}, % HACK FIXME
|
|
||||||
case machi_cr_client:append_chunk(Conn, undefined, Prefix, Value, CSum, AppendOpts, ?THE_TIMEOUT) of
|
|
||||||
{ok, Pos} ->
|
{ok, Pos} ->
|
||||||
EtsKey = ets:update_counter(?ETS_TAB, max_key, 1),
|
EtsKey = ets:update_counter(?ETS_TAB, max_key, 1),
|
||||||
true = ets:insert(?ETS_TAB, {EtsKey, Pos}),
|
true = ets:insert(?ETS_TAB, {EtsKey, Pos}),
|
||||||
|
@ -116,26 +112,9 @@ run(read, KeyGen, _ValueGen, #m{conn=Conn, max_key=MaxKey}=S) ->
|
||||||
Idx = KeyGen() rem MaxKey,
|
Idx = KeyGen() rem MaxKey,
|
||||||
%% {File, Offset, Size, _CSum} = ets:lookup_element(?ETS_TAB, Idx, 2),
|
%% {File, Offset, Size, _CSum} = ets:lookup_element(?ETS_TAB, Idx, 2),
|
||||||
{File, Offset, Size} = ets:lookup_element(?ETS_TAB, Idx, 2),
|
{File, Offset, Size} = ets:lookup_element(?ETS_TAB, Idx, 2),
|
||||||
ReadOpts = {read_opts,false,false,false}, % HACK FIXME
|
case machi_cr_client:read_chunk(Conn, File, Offset, Size, ?THE_TIMEOUT) of
|
||||||
case machi_cr_client:read_chunk(Conn, undefined, File, Offset, Size, ReadOpts, ?THE_TIMEOUT) of
|
{ok, _Chunk} ->
|
||||||
{ok, {Chunks, _Trimmed}} ->
|
|
||||||
%% io:format(user, "Chunks ~P\n", [Chunks, 15]),
|
|
||||||
%% {ok, S};
|
|
||||||
case lists:all(fun({File2, Offset2, Chunk, CSum}) ->
|
|
||||||
{_Tag, CS} = machi_util:unmake_tagged_csum(CSum),
|
|
||||||
CS2 = machi_util:checksum_chunk(Chunk),
|
|
||||||
if CS == CS2 ->
|
|
||||||
true;
|
|
||||||
CS /= CS2 ->
|
|
||||||
?ERROR("Client-side checksum error for file ~p offset ~p expected ~p got ~p\n", [File2, Offset2, CS, CS2]),
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end, Chunks) of
|
|
||||||
true ->
|
|
||||||
{ok, S};
|
{ok, S};
|
||||||
false ->
|
|
||||||
{error, bad_checksum, S}
|
|
||||||
end;
|
|
||||||
{error, _}=Err ->
|
{error, _}=Err ->
|
||||||
?ERROR("read file ~p offset ~w size ~w: ~w\n",
|
?ERROR("read file ~p offset ~w size ~w: ~w\n",
|
||||||
[File, Offset, Size, Err]),
|
[File, Offset, Size, Err]),
|
||||||
|
@ -153,40 +132,21 @@ find_server_info(_Id) ->
|
||||||
Ps
|
Ps
|
||||||
end.
|
end.
|
||||||
|
|
||||||
load_ets_table_maybe(Conn, ETS) ->
|
|
||||||
case basho_bench_config:get(operations, undefined) of
|
|
||||||
undefined ->
|
|
||||||
?ERROR("The 'operations' key is missing from the config file, aborting", []),
|
|
||||||
exit(bad_config);
|
|
||||||
Ops when is_list(Ops) ->
|
|
||||||
case lists:keyfind(read, 1, Ops) of
|
|
||||||
{read,_} ->
|
|
||||||
load_ets_table(Conn, ETS);
|
|
||||||
false ->
|
|
||||||
?INFO("No 'read' op in the 'operations' list ~p, skipping ETS table load.", [Ops]),
|
|
||||||
0
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
load_ets_table(Conn, ETS) ->
|
load_ets_table(Conn, ETS) ->
|
||||||
{ok, Fs} = machi_cr_client:list_files(Conn),
|
{ok, Fs} = machi_cr_client:list_files(Conn),
|
||||||
[begin
|
[begin
|
||||||
{ok, InfoBin} = machi_cr_client:checksum_list(Conn, File, ?THE_TIMEOUT),
|
{ok, InfoBin} = machi_cr_client:checksum_list(Conn, File),
|
||||||
PosList = machi_csum_table:split_checksum_list_blob_decode(InfoBin),
|
{PosList, _} = machi_flu1:split_checksum_list_blob_decode(InfoBin),
|
||||||
?INFO("File ~s len PosList ~p\n", [File, length(PosList)]),
|
|
||||||
StartKey = ets:update_counter(ETS, max_key, 0),
|
StartKey = ets:update_counter(ETS, max_key, 0),
|
||||||
{_, C, Bytes} = lists:foldl(fun({_Off,0,_CSum}, {_K, _C, _Bs}=Acc) ->
|
%% _EndKey = lists:foldl(fun({Off,Sz,CSum}, K) ->
|
||||||
Acc;
|
%% V = {File, Off, Sz, CSum},
|
||||||
({0,_Sz,_CSum}, {_K, _C, _Bs}=Acc) ->
|
{_, Bytes} = lists:foldl(fun({Off,Sz,_CSum}, {K, Bs}) ->
|
||||||
Acc;
|
|
||||||
({Off,Sz,_CSum}, {K, C, Bs}) ->
|
|
||||||
V = {File, Off, Sz},
|
V = {File, Off, Sz},
|
||||||
ets:insert(ETS, {K, V}),
|
ets:insert(ETS, {K, V}),
|
||||||
{K + 1, C + 1, Bs + Sz}
|
{K + 1, Bs + Sz}
|
||||||
end, {StartKey, 0, 0}, PosList),
|
end, {StartKey, 0}, PosList),
|
||||||
_ = ets:update_counter(ETS, max_key, C),
|
ets:update_counter(ETS, max_key, length(PosList)),
|
||||||
_ = ets:update_counter(ETS, total_bytes, Bytes),
|
ets:update_counter(ETS, total_bytes, Bytes)
|
||||||
ok
|
|
||||||
end || {_Size, File} <- Fs],
|
end || {_Size, File} <- Fs],
|
||||||
ets:update_counter(?ETS_TAB, max_key, 0).
|
ets:update_counter(?ETS_TAB, max_key, 0).
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -96,6 +96,11 @@
|
||||||
|
|
||||||
-export([repair/7]).
|
-export([repair/7]).
|
||||||
|
|
||||||
|
repair_cp(_Src, _Dst, _MembersDict, _Opts) ->
|
||||||
|
%% TODO: add missing function: wipe away any trace of chunks
|
||||||
|
%% are present on Dst but missing on Src.
|
||||||
|
exit(todo_cp_mode).
|
||||||
|
|
||||||
repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
||||||
%% Use process dict so that 'after' clause can always quit all
|
%% Use process dict so that 'after' clause can always quit all
|
||||||
%% proxy pids.
|
%% proxy pids.
|
||||||
|
@ -103,10 +108,9 @@ repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
||||||
Add = fun(Name, Pid) -> put(proxies_dict, orddict:store(Name, Pid, get(proxies_dict))) end,
|
Add = fun(Name, Pid) -> put(proxies_dict, orddict:store(Name, Pid, get(proxies_dict))) end,
|
||||||
OurFLUs = lists:usort([Src] ++ Repairing ++ UPI), % AP assumption!
|
OurFLUs = lists:usort([Src] ++ Repairing ++ UPI), % AP assumption!
|
||||||
RepairMode = proplists:get_value(repair_mode, Opts, repair),
|
RepairMode = proplists:get_value(repair_mode, Opts, repair),
|
||||||
Verb = proplists:get_value(verbose, Opts, false),
|
Verb = proplists:get_value(verbose, Opts, true),
|
||||||
RepairId = proplists:get_value(repair_id, Opts, id1),
|
|
||||||
Res = try
|
Res = try
|
||||||
_ = [begin
|
[begin
|
||||||
{ok, Proxy} = machi_proxy_flu1_client:start_link(P),
|
{ok, Proxy} = machi_proxy_flu1_client:start_link(P),
|
||||||
Add(FLU, Proxy)
|
Add(FLU, Proxy)
|
||||||
end || {FLU,P} <- MembersDict, lists:member(FLU, OurFLUs)],
|
end || {FLU,P} <- MembersDict, lists:member(FLU, OurFLUs)],
|
||||||
|
@ -117,39 +121,31 @@ repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
||||||
get_file_lists(Proxy, FLU, Dict)
|
get_file_lists(Proxy, FLU, Dict)
|
||||||
end, D, ProxiesDict),
|
end, D, ProxiesDict),
|
||||||
MissingFileSummary = make_missing_file_summary(D2, OurFLUs),
|
MissingFileSummary = make_missing_file_summary(D2, OurFLUs),
|
||||||
%% ?VERB("~w MissingFileSummary ~p\n",[RepairId,MissingFileSummary]),
|
?VERB("MissingFileSummary ~p\n", [MissingFileSummary]),
|
||||||
lager:info("Repair ~w MissingFileSummary ~p\n",
|
|
||||||
[RepairId, MissingFileSummary]),
|
|
||||||
|
|
||||||
[ets:insert(ETS, {{directive_bytes, FLU}, 0}) || FLU <- OurFLUs],
|
[ets:insert(ETS, {{directive_bytes, FLU}, 0}) || FLU <- OurFLUs],
|
||||||
%% Repair files from perspective of Src, i.e. tail(UPI).
|
%% Repair files from perspective of Src, i.e. tail(UPI).
|
||||||
SrcProxy = orddict:fetch(Src, ProxiesDict),
|
SrcProxy = orddict:fetch(Src, ProxiesDict),
|
||||||
{ok, EpochID} = machi_proxy_flu1_client:get_epoch_id(
|
{ok, EpochID} = machi_proxy_flu1_client:get_epoch_id(
|
||||||
SrcProxy, ?SHORT_TIMEOUT),
|
SrcProxy, ?SHORT_TIMEOUT),
|
||||||
%% ?VERB("Make repair directives: "),
|
?VERB("Make repair directives: "),
|
||||||
Ds =
|
Ds =
|
||||||
[{File, make_repair_directives(
|
[{File, make_repair_directives(
|
||||||
ConsistencyMode, RepairMode, File, Size, EpochID,
|
ConsistencyMode, RepairMode, File, Size, EpochID,
|
||||||
Verb,
|
Verb,
|
||||||
Src, OurFLUs, ProxiesDict, ETS)} ||
|
Src, OurFLUs, ProxiesDict, ETS)} ||
|
||||||
{File, {Size, _MissingList}} <- MissingFileSummary],
|
{File, {Size, _MissingList}} <- MissingFileSummary],
|
||||||
%% ?VERB(" done\n"),
|
?VERB(" done\n"),
|
||||||
lager:info("Repair ~w repair directives finished\n", [RepairId]),
|
|
||||||
[begin
|
[begin
|
||||||
[{_, Bytes}] = ets:lookup(ETS, {directive_bytes, FLU}),
|
[{_, Bytes}] = ets:lookup(ETS, {directive_bytes, FLU}),
|
||||||
%% ?VERB("Out-of-sync data for FLU ~p: ~s MBytes\n",
|
?VERB("Out-of-sync data for FLU ~p: ~s MBytes\n",
|
||||||
%% [FLU, mbytes(Bytes)]),
|
[FLU, mbytes(Bytes)])
|
||||||
lager:info("Repair ~w "
|
|
||||||
"Out-of-sync data for FLU ~p: ~s MBytes\n",
|
|
||||||
[RepairId, FLU, mbytes(Bytes)]),
|
|
||||||
ok
|
|
||||||
end || FLU <- OurFLUs],
|
end || FLU <- OurFLUs],
|
||||||
|
|
||||||
%% ?VERB("Execute repair directives: "),
|
?VERB("Execute repair directives: "),
|
||||||
ok = execute_repair_directives(ConsistencyMode, Ds, Src, EpochID,
|
ok = execute_repair_directives(ConsistencyMode, Ds, Src, EpochID,
|
||||||
Verb, OurFLUs, ProxiesDict, ETS),
|
Verb, OurFLUs, ProxiesDict, ETS),
|
||||||
%% ?VERB(" done\n"),
|
?VERB(" done\n"),
|
||||||
lager:info("Repair ~w repair directives finished\n", [RepairId]),
|
|
||||||
ok
|
ok
|
||||||
catch
|
catch
|
||||||
What:Why ->
|
What:Why ->
|
||||||
|
@ -160,7 +156,7 @@ repair(ap_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
||||||
Pid <- orddict:to_list(get(proxies_dict))]
|
Pid <- orddict:to_list(get(proxies_dict))]
|
||||||
end,
|
end,
|
||||||
Res;
|
Res;
|
||||||
repair(cp_mode=_ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
repair(cp_mode=ConsistencyMode, Src, Repairing, UPI, MembersDict, ETS, Opts) ->
|
||||||
io:format(user, "\n\nTODO! cp_mode repair is not fully implemented!\n\n", []),
|
io:format(user, "\n\nTODO! cp_mode repair is not fully implemented!\n\n", []),
|
||||||
repair(ap_mode, Src, Repairing, UPI, MembersDict, ETS, Opts).
|
repair(ap_mode, Src, Repairing, UPI, MembersDict, ETS, Opts).
|
||||||
|
|
||||||
|
@ -207,7 +203,7 @@ make_repair_compare_fun(SrcFLU) ->
|
||||||
T_a =< T_b
|
T_a =< T_b
|
||||||
end.
|
end.
|
||||||
|
|
||||||
make_repair_directives(ConsistencyMode, RepairMode, File, Size, _EpochID,
|
make_repair_directives(ConsistencyMode, RepairMode, File, Size, EpochID,
|
||||||
Verb, Src, FLUs0, ProxiesDict, ETS) ->
|
Verb, Src, FLUs0, ProxiesDict, ETS) ->
|
||||||
true = (Size < ?MAX_OFFSET),
|
true = (Size < ?MAX_OFFSET),
|
||||||
FLUs = lists:usort(FLUs0),
|
FLUs = lists:usort(FLUs0),
|
||||||
|
@ -216,9 +212,11 @@ make_repair_directives(ConsistencyMode, RepairMode, File, Size, _EpochID,
|
||||||
Proxy = orddict:fetch(FLU, ProxiesDict),
|
Proxy = orddict:fetch(FLU, ProxiesDict),
|
||||||
OffSzCs =
|
OffSzCs =
|
||||||
case machi_proxy_flu1_client:checksum_list(
|
case machi_proxy_flu1_client:checksum_list(
|
||||||
Proxy, File, ?LONG_TIMEOUT) of
|
Proxy, EpochID, File, ?LONG_TIMEOUT) of
|
||||||
{ok, InfoBin} ->
|
{ok, InfoBin} ->
|
||||||
machi_csum_table:split_checksum_list_blob_decode(InfoBin);
|
{Info, _} =
|
||||||
|
machi_flu1:split_checksum_list_blob_decode(InfoBin),
|
||||||
|
Info;
|
||||||
{error, no_such_file} ->
|
{error, no_such_file} ->
|
||||||
[]
|
[]
|
||||||
end,
|
end,
|
||||||
|
@ -236,6 +234,7 @@ make_repair_directives(ConsistencyMode, RepairMode, File, Size, _EpochID,
|
||||||
|
|
||||||
make_repair_directives2(C2, ConsistencyMode, RepairMode,
|
make_repair_directives2(C2, ConsistencyMode, RepairMode,
|
||||||
File, Verb, Src, FLUs, ProxiesDict, ETS) ->
|
File, Verb, Src, FLUs, ProxiesDict, ETS) ->
|
||||||
|
?VERB("."),
|
||||||
make_repair_directives3(C2, ConsistencyMode, RepairMode,
|
make_repair_directives3(C2, ConsistencyMode, RepairMode,
|
||||||
File, Verb, Src, FLUs, ProxiesDict, ETS, []).
|
File, Verb, Src, FLUs, ProxiesDict, ETS, []).
|
||||||
|
|
||||||
|
@ -265,18 +264,7 @@ make_repair_directives3([{Offset, Size, CSum, _FLU}=A|Rest0],
|
||||||
%% byte range from all FLUs
|
%% byte range from all FLUs
|
||||||
%% 3b. Log big warning about data loss.
|
%% 3b. Log big warning about data loss.
|
||||||
%% 4. Log any other checksum discrepencies as they are found.
|
%% 4. Log any other checksum discrepencies as they are found.
|
||||||
QQ = [begin
|
exit({todo_repair_sanity_check, ?LINE, File, Offset, As})
|
||||||
Pxy = orddict:fetch(FLU, ProxiesDict),
|
|
||||||
{ok, EpochID} = machi_proxy_flu1_client:get_epoch_id(
|
|
||||||
Pxy, ?SHORT_TIMEOUT),
|
|
||||||
NSInfo = undefined,
|
|
||||||
XX = machi_proxy_flu1_client:read_chunk(
|
|
||||||
Pxy, NSInfo, EpochID, File, Offset, Size, undefined,
|
|
||||||
?SHORT_TIMEOUT),
|
|
||||||
{FLU, XX}
|
|
||||||
end || {__Offset, __Size, __CSum, FLU} <- As],
|
|
||||||
|
|
||||||
exit({todo_repair_sanity_check, ?LINE, File, Offset, {as,As}, {qq,QQ}})
|
|
||||||
end,
|
end,
|
||||||
%% List construction guarantees us that there's at least one ?MAX_OFFSET
|
%% List construction guarantees us that there's at least one ?MAX_OFFSET
|
||||||
%% item remains. Sort order + our "taking" of all exact Offset+Size
|
%% item remains. Sort order + our "taking" of all exact Offset+Size
|
||||||
|
@ -297,16 +285,15 @@ make_repair_directives3([{Offset, Size, CSum, _FLU}=A|Rest0],
|
||||||
true -> Src;
|
true -> Src;
|
||||||
false -> hd(Gots)
|
false -> hd(Gots)
|
||||||
end,
|
end,
|
||||||
_ = [ets:update_counter(ETS, {directive_bytes, FLU_m}, Size) ||
|
[ets:update_counter(ETS, {directive_bytes, FLU_m}, Size) ||
|
||||||
FLU_m <- Missing],
|
FLU_m <- Missing],
|
||||||
if Missing == [] ->
|
if Missing == [] ->
|
||||||
noop;
|
noop;
|
||||||
true ->
|
true ->
|
||||||
{copy, A, Missing}
|
{copy, A, Missing}
|
||||||
end
|
end;
|
||||||
%% end;
|
ConsistencyMode == cp_mode ->
|
||||||
%% ConsistencyMode == cp_mode ->
|
exit({todo_cp_mode, ?MODULE, ?LINE})
|
||||||
%% exit({todo_cp_mode, ?MODULE, ?LINE})
|
|
||||||
end,
|
end,
|
||||||
Acc2 = if Do == noop -> Acc;
|
Acc2 = if Do == noop -> Acc;
|
||||||
true -> [Do|Acc]
|
true -> [Do|Acc]
|
||||||
|
@ -329,42 +316,38 @@ execute_repair_directives(ap_mode=_ConsistencyMode, Ds, _Src, EpochID, Verb,
|
||||||
{ProxiesDict, EpochID, Verb, ETS}, Ds),
|
{ProxiesDict, EpochID, Verb, ETS}, Ds),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
execute_repair_directive({File, Cmds}, {ProxiesDict, EpochID, _Verb, ETS}=Acc) ->
|
execute_repair_directive({File, Cmds}, {ProxiesDict, EpochID, Verb, ETS}=Acc) ->
|
||||||
EtsKeys = [{in_files, t_in_files}, {in_chunks, t_in_chunks},
|
EtsKeys = [{in_files, t_in_files}, {in_chunks, t_in_chunks},
|
||||||
{in_bytes, t_in_bytes}, {out_files, t_out_files},
|
{in_bytes, t_in_bytes}, {out_files, t_out_files},
|
||||||
{out_chunks, t_out_chunks}, {out_bytes, t_out_bytes}],
|
{out_chunks, t_out_chunks}, {out_bytes, t_out_bytes}],
|
||||||
[ets:insert(ETS, {L_K, 0}) || {L_K, _T_K} <- EtsKeys],
|
[ets:insert(ETS, {L_K, 0}) || {L_K, _T_K} <- EtsKeys],
|
||||||
F = fun({copy, {Offset, Size, TaggedCSum, MySrc}, MyDsts}, Acc2) ->
|
F = fun({copy, {Offset, Size, TaggedCSum, MySrc}, MyDsts}, Acc2) ->
|
||||||
SrcP = orddict:fetch(MySrc, ProxiesDict),
|
SrcP = orddict:fetch(MySrc, ProxiesDict),
|
||||||
%% case ets:lookup_element(ETS, in_chunks, 2) rem 100 of
|
case ets:lookup_element(ETS, in_chunks, 2) rem 100 of
|
||||||
%% 0 -> ?VERB(".2", []);
|
0 -> ?VERB(".", []);
|
||||||
%% _ -> ok
|
_ -> ok
|
||||||
%% end,
|
end,
|
||||||
_T1 = os:timestamp(),
|
_T1 = os:timestamp(),
|
||||||
%% TODO: support case multiple written or trimmed chunks returned
|
{ok, Chunk} = machi_proxy_flu1_client:read_chunk(
|
||||||
NSInfo = undefined,
|
SrcP, EpochID, File, Offset, Size,
|
||||||
{ok, {[{_, Offset, Chunk, _ReadCSum}|OtherChunks], []=_TrimmedList}} =
|
|
||||||
machi_proxy_flu1_client:read_chunk(
|
|
||||||
SrcP, NSInfo, EpochID, File, Offset, Size, undefined,
|
|
||||||
?SHORT_TIMEOUT),
|
?SHORT_TIMEOUT),
|
||||||
[] = OtherChunks,
|
|
||||||
_T2 = os:timestamp(),
|
_T2 = os:timestamp(),
|
||||||
<<_Tag:1/binary, CSum/binary>> = TaggedCSum,
|
<<_Tag:1/binary, CSum/binary>> = TaggedCSum,
|
||||||
case machi_util:checksum_chunk(Chunk) of
|
case machi_util:checksum_chunk(Chunk) of
|
||||||
CSum_now when CSum_now == CSum ->
|
CSum_now when CSum_now == CSum ->
|
||||||
_ = [begin
|
[begin
|
||||||
DstP = orddict:fetch(DstFLU, ProxiesDict),
|
DstP = orddict:fetch(DstFLU, ProxiesDict),
|
||||||
_T3 = os:timestamp(),
|
_T3 = os:timestamp(),
|
||||||
ok = machi_proxy_flu1_client:write_chunk(
|
ok = machi_proxy_flu1_client:write_chunk(
|
||||||
DstP, NSInfo, EpochID, File, Offset, Chunk, TaggedCSum,
|
DstP, EpochID, File, Offset, Chunk,
|
||||||
?SHORT_TIMEOUT),
|
?SHORT_TIMEOUT),
|
||||||
_T4 = os:timestamp()
|
_T4 = os:timestamp()
|
||||||
end || DstFLU <- MyDsts],
|
end || DstFLU <- MyDsts],
|
||||||
_ = ets:update_counter(ETS, in_chunks, 1),
|
ets:update_counter(ETS, in_chunks, 1),
|
||||||
_ = ets:update_counter(ETS, in_bytes, Size),
|
ets:update_counter(ETS, in_bytes, Size),
|
||||||
N = length(MyDsts),
|
N = length(MyDsts),
|
||||||
_ = ets:update_counter(ETS, out_chunks, N),
|
ets:update_counter(ETS, out_chunks, N),
|
||||||
_ = ets:update_counter(ETS, out_bytes, N*Size),
|
ets:update_counter(ETS, out_bytes, N*Size),
|
||||||
Acc2;
|
Acc2;
|
||||||
CSum_now ->
|
CSum_now ->
|
||||||
error_logger:error_msg(
|
error_logger:error_msg(
|
||||||
|
@ -383,7 +366,7 @@ execute_repair_directive({File, Cmds}, {ProxiesDict, EpochID, _Verb, ETS}=Acc) -
|
||||||
end,
|
end,
|
||||||
ok = lists:foldl(F, ok, Cmds),
|
ok = lists:foldl(F, ok, Cmds),
|
||||||
%% Copy this file's stats to the total counts.
|
%% Copy this file's stats to the total counts.
|
||||||
_ = [ets:update_counter(ETS, T_K, ets:lookup_element(ETS, L_K, 2)) ||
|
[ets:update_counter(ETS, T_K, ets:lookup_element(ETS, L_K, 2)) ||
|
||||||
{L_K, T_K} <- EtsKeys],
|
{L_K, T_K} <- EtsKeys],
|
||||||
Acc.
|
Acc.
|
||||||
|
|
||||||
|
|
|
@ -1,104 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc cluster_info callback module for machi specific information
|
|
||||||
%% gathering.
|
|
||||||
|
|
||||||
-module(machi_cinfo).
|
|
||||||
|
|
||||||
%% cluster_info callbacks
|
|
||||||
-export([register/0, cluster_info_init/0, cluster_info_generator_funs/0]).
|
|
||||||
|
|
||||||
%% for debug in interactive shell
|
|
||||||
-export([dump/0,
|
|
||||||
public_projection/1, private_projection/1,
|
|
||||||
chain_manager/1, fitness/1, flu1/1]).
|
|
||||||
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-spec register() -> ok.
|
|
||||||
register() ->
|
|
||||||
ok = cluster_info:register_app(?MODULE).
|
|
||||||
|
|
||||||
-spec cluster_info_init() -> ok.
|
|
||||||
cluster_info_init() ->
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-spec cluster_info_generator_funs() -> [{string(), fun((pid()) -> ok)}].
|
|
||||||
cluster_info_generator_funs() ->
|
|
||||||
FluNames = [Name || {Name, _, _, _} <- supervisor:which_children(machi_flu_sup)],
|
|
||||||
lists:flatten([generator_funs_package(Name) || Name <- FluNames]).
|
|
||||||
|
|
||||||
generator_funs_package(FluName) ->
|
|
||||||
[{"Public projection of FLU " ++ atom_to_list(FluName),
|
|
||||||
cinfo_wrapper(fun public_projection/1, FluName)},
|
|
||||||
{"Private projection of FLU " ++ atom_to_list(FluName),
|
|
||||||
cinfo_wrapper(fun private_projection/1, FluName)},
|
|
||||||
{"Chain manager status of FLU " ++ atom_to_list(FluName),
|
|
||||||
cinfo_wrapper(fun chain_manager/1, FluName)},
|
|
||||||
{"Fitness server status of FLU " ++ atom_to_list(FluName),
|
|
||||||
cinfo_wrapper(fun fitness/1, FluName)},
|
|
||||||
{"FLU1 status of FLU " ++ atom_to_list(FluName),
|
|
||||||
cinfo_wrapper(fun flu1/1, FluName)}].
|
|
||||||
|
|
||||||
dump() ->
|
|
||||||
{{Y,M,D},{HH,MM,SS}} = calendar:local_time(),
|
|
||||||
Filename = lists:flatten(io_lib:format(
|
|
||||||
"machi-ci-~4..0B~2..0B~2..0B-~2..0B~2..0B~2..0B.html",
|
|
||||||
[Y,M,D,HH,MM,SS])),
|
|
||||||
cluster_info:dump_local_node(Filename).
|
|
||||||
|
|
||||||
-spec public_projection(atom()) -> [{atom(), term()}].
|
|
||||||
public_projection(FluName) ->
|
|
||||||
projection(FluName, public).
|
|
||||||
|
|
||||||
-spec private_projection(atom()) -> [{atom(), term()}].
|
|
||||||
private_projection(FluName) ->
|
|
||||||
projection(FluName, private).
|
|
||||||
|
|
||||||
-spec chain_manager(atom()) -> term().
|
|
||||||
chain_manager(FluName) ->
|
|
||||||
Mgr = machi_flu_psup:make_mgr_supname(FluName),
|
|
||||||
sys:get_status(Mgr).
|
|
||||||
|
|
||||||
-spec fitness(atom()) -> term().
|
|
||||||
fitness(FluName) ->
|
|
||||||
Fitness = machi_flu_psup:make_fitness_regname(FluName),
|
|
||||||
sys:get_status(Fitness).
|
|
||||||
|
|
||||||
-spec flu1(atom()) -> [{atom(), term()}].
|
|
||||||
flu1(FluName) ->
|
|
||||||
State = machi_flu1_append_server:current_state(FluName),
|
|
||||||
machi_flu1_append_server:format_state(State).
|
|
||||||
|
|
||||||
%% Internal functions
|
|
||||||
|
|
||||||
projection(FluName, Kind) ->
|
|
||||||
ProjStore = machi_flu1:make_projection_server_regname(FluName),
|
|
||||||
{ok, Projection} = machi_projection_store:read_latest_projection(
|
|
||||||
whereis(ProjStore), Kind),
|
|
||||||
Fields = record_info(fields, projection_v1),
|
|
||||||
[_Name | Values] = tuple_to_list(Projection),
|
|
||||||
lists:zip(Fields, Values).
|
|
||||||
|
|
||||||
cinfo_wrapper(Fun, FluName) ->
|
|
||||||
fun(C) ->
|
|
||||||
cluster_info:format(C, "~p", [Fun(FluName)])
|
|
||||||
end.
|
|
|
@ -1,43 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc Configuration consulting utilities. Some conventions:
|
|
||||||
%% - The function name should match with exact configuration
|
|
||||||
%% name in `app.config' or `advanced.config' of `machi' section.
|
|
||||||
%% - The default value of that configuration is expected to be in
|
|
||||||
%% cuttlefish schema file. Otherwise some macro in headers may
|
|
||||||
%% be chosen.
|
|
||||||
%% - Documentation of the configuration is supposed to be written
|
|
||||||
%% in cuttlefish schema file, rather than @doc section of the function.
|
|
||||||
%% - spec of the function should be written.
|
|
||||||
%% - Returning `undefined' is strongly discouraged. Return some default
|
|
||||||
%% value instead.
|
|
||||||
%% - `application:get_env/3' is recommended. See `max_file_size/0' for
|
|
||||||
%% example.
|
|
||||||
|
|
||||||
-module(machi_config).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
-export([max_file_size/0]).
|
|
||||||
|
|
||||||
-spec max_file_size() -> pos_integer().
|
|
||||||
max_file_size() ->
|
|
||||||
application:get_env(machi, max_file_size, ?DEFAULT_MAX_FILE_SIZE).
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,329 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2016 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_csum_table).
|
|
||||||
|
|
||||||
-export([open/2,
|
|
||||||
find/3,
|
|
||||||
write/6, write/4, trim/5,
|
|
||||||
find_leftneighbor/2, find_rightneighbor/2,
|
|
||||||
all_trimmed/3, any_trimmed/3,
|
|
||||||
all_trimmed/2,
|
|
||||||
calc_unwritten_bytes/1,
|
|
||||||
split_checksum_list_blob_decode/1,
|
|
||||||
all/1,
|
|
||||||
close/1, delete/1,
|
|
||||||
foldl_chunks/3]).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-endif.
|
|
||||||
|
|
||||||
-record(machi_csum_table,
|
|
||||||
{file :: string(),
|
|
||||||
table :: eleveldb:db_ref()}).
|
|
||||||
|
|
||||||
-type table() :: #machi_csum_table{}.
|
|
||||||
-type byte_sequence() :: { Offset :: non_neg_integer(),
|
|
||||||
Size :: pos_integer()|infinity }.
|
|
||||||
-type chunk() :: {Offset :: machi_dt:file_offset(),
|
|
||||||
Size :: machi_dt:chunk_size(),
|
|
||||||
machi_dt:chunk_csum() | trimmed | none}.
|
|
||||||
|
|
||||||
-export_type([table/0]).
|
|
||||||
|
|
||||||
-spec open(string(), proplists:proplist()) ->
|
|
||||||
{ok, table()} | {error, file:posix()}.
|
|
||||||
|
|
||||||
open(CSumFilename, _Opts) ->
|
|
||||||
LevelDBOptions = [{create_if_missing, true},
|
|
||||||
%% Keep this table small so as not to interfere
|
|
||||||
%% operating system's file cache, which is for
|
|
||||||
%% Machi's main read efficiency
|
|
||||||
{total_leveldb_mem_percent, 10}],
|
|
||||||
{ok, T} = eleveldb:open(CSumFilename, LevelDBOptions),
|
|
||||||
%% Dummy entry for reserved headers
|
|
||||||
ok = eleveldb:put(T,
|
|
||||||
sext:encode({0, ?MINIMUM_OFFSET}),
|
|
||||||
sext:encode(?CSUM_TAG_NONE_ATOM),
|
|
||||||
[{sync, true}]),
|
|
||||||
C0 = #machi_csum_table{
|
|
||||||
file=CSumFilename,
|
|
||||||
table=T},
|
|
||||||
{ok, C0}.
|
|
||||||
|
|
||||||
-spec split_checksum_list_blob_decode(binary())-> [chunk()].
|
|
||||||
split_checksum_list_blob_decode(Bin) ->
|
|
||||||
erlang:binary_to_term(Bin).
|
|
||||||
|
|
||||||
|
|
||||||
-define(has_overlap(LeftOffset, LeftSize, RightOffset, RightSize),
|
|
||||||
((LeftOffset - (RightOffset+RightSize)) * (LeftOffset+LeftSize - RightOffset) < 0)).
|
|
||||||
|
|
||||||
-spec find(table(), machi_dt:file_offset(), machi_dt:chunk_size())
|
|
||||||
-> [chunk()].
|
|
||||||
find(#machi_csum_table{table=T}, Offset, Size) ->
|
|
||||||
{ok, I} = eleveldb:iterator(T, [], keys_only),
|
|
||||||
EndKey = sext:encode({Offset+Size, 0}),
|
|
||||||
StartKey = sext:encode({Offset, Size}),
|
|
||||||
{ok, FirstKey} = case eleveldb:iterator_move(I, StartKey) of
|
|
||||||
{error, invalid_iterator} ->
|
|
||||||
try
|
|
||||||
%% Assume that the invalid_iterator is because
|
|
||||||
%% we tried to move to the end via StartKey.
|
|
||||||
%% Instead, move there directly.
|
|
||||||
{ok, _} = eleveldb:iterator_move(I, last),
|
|
||||||
{ok, _} = eleveldb:iterator_move(I, prev)
|
|
||||||
catch
|
|
||||||
_:_ ->
|
|
||||||
{ok, _} = eleveldb:iterator_move(I, first)
|
|
||||||
end;
|
|
||||||
{ok, _} = R0 ->
|
|
||||||
case eleveldb:iterator_move(I, prev) of
|
|
||||||
{error, invalid_iterator} ->
|
|
||||||
R0;
|
|
||||||
{ok, _} = R1 ->
|
|
||||||
R1
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
_ = eleveldb:iterator_close(I),
|
|
||||||
FoldFun = fun({K, V}, Acc) ->
|
|
||||||
{TargetOffset, TargetSize} = sext:decode(K),
|
|
||||||
case ?has_overlap(TargetOffset, TargetSize, Offset, Size) of
|
|
||||||
true ->
|
|
||||||
[{TargetOffset, TargetSize, sext:decode(V)}|Acc];
|
|
||||||
false ->
|
|
||||||
Acc
|
|
||||||
end;
|
|
||||||
(_K, Acc) ->
|
|
||||||
lager:error("~p wrong option", [_K]),
|
|
||||||
Acc
|
|
||||||
end,
|
|
||||||
lists:reverse(eleveldb_fold(T, FirstKey, EndKey, FoldFun, [])).
|
|
||||||
|
|
||||||
%% @doc Updates all chunk info, by deleting existing entries if exists
|
|
||||||
%% and putting new chunk info
|
|
||||||
-spec write(table(),
|
|
||||||
machi_dt:file_offset(), machi_dt:chunk_size(),
|
|
||||||
machi_dt:chunk_csum()|'none'|'trimmed',
|
|
||||||
undefined|chunk(), undefined|chunk()) ->
|
|
||||||
ok | {error, term()}.
|
|
||||||
write(#machi_csum_table{table=T} = CsumT, Offset, Size, CSum,
|
|
||||||
LeftUpdate, RightUpdate) ->
|
|
||||||
PutOps =
|
|
||||||
[{put,
|
|
||||||
sext:encode({Offset, Size}),
|
|
||||||
sext:encode(CSum)}]
|
|
||||||
++ case LeftUpdate of
|
|
||||||
{LO, LS, LCsum} when LO + LS =:= Offset ->
|
|
||||||
[{put,
|
|
||||||
sext:encode({LO, LS}),
|
|
||||||
sext:encode(LCsum)}];
|
|
||||||
undefined ->
|
|
||||||
[]
|
|
||||||
end
|
|
||||||
++ case RightUpdate of
|
|
||||||
{RO, RS, RCsum} when RO =:= Offset + Size ->
|
|
||||||
[{put,
|
|
||||||
sext:encode({RO, RS}),
|
|
||||||
sext:encode(RCsum)}];
|
|
||||||
undefined ->
|
|
||||||
[]
|
|
||||||
end,
|
|
||||||
Chunks = find(CsumT, Offset, Size),
|
|
||||||
DeleteOps = lists:map(fun({O, L, _}) ->
|
|
||||||
{delete, sext:encode({O, L})}
|
|
||||||
end, Chunks),
|
|
||||||
%% io:format(user, "PutOps: ~P\n", [PutOps, 20]),
|
|
||||||
%% io:format(user, "DelOps: ~P\n", [DeleteOps, 20]),
|
|
||||||
eleveldb:write(T, DeleteOps ++ PutOps, [{sync, true}]).
|
|
||||||
|
|
||||||
-spec find_leftneighbor(table(), non_neg_integer()) ->
|
|
||||||
undefined | chunk().
|
|
||||||
find_leftneighbor(CsumT, Offset) ->
|
|
||||||
case find(CsumT, Offset, 1) of
|
|
||||||
[] -> undefined;
|
|
||||||
[{Offset, _, _}] -> undefined;
|
|
||||||
[{LOffset, _, CsumOrTrimmed}] -> {LOffset, Offset - LOffset, CsumOrTrimmed}
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec find_rightneighbor(table(), non_neg_integer()) ->
|
|
||||||
undefined | chunk().
|
|
||||||
find_rightneighbor(CsumT, Offset) ->
|
|
||||||
case find(CsumT, Offset, 1) of
|
|
||||||
[] -> undefined;
|
|
||||||
[{Offset, _, _}] -> undefined;
|
|
||||||
[{ROffset, RSize, CsumOrTrimmed}] ->
|
|
||||||
{Offset, ROffset + RSize - Offset, CsumOrTrimmed}
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec write(table(), machi_dt:file_offset(), machi_dt:file_size(),
|
|
||||||
machi_dt:chunk_csum()|none|trimmed) ->
|
|
||||||
ok | {error, trimmed|file:posix()}.
|
|
||||||
write(CsumT, Offset, Size, CSum) ->
|
|
||||||
write(CsumT, Offset, Size, CSum, undefined, undefined).
|
|
||||||
|
|
||||||
trim(CsumT, Offset, Size, LeftUpdate, RightUpdate) ->
|
|
||||||
write(CsumT, Offset, Size,
|
|
||||||
trimmed, %% Should this be much smaller like $t or just 't'
|
|
||||||
LeftUpdate, RightUpdate).
|
|
||||||
|
|
||||||
%% @doc returns whether all bytes in a specific window is continously
|
|
||||||
%% trimmed or not
|
|
||||||
-spec all_trimmed(table(), non_neg_integer(), non_neg_integer()) -> boolean().
|
|
||||||
all_trimmed(#machi_csum_table{table=T}, Left, Right) ->
|
|
||||||
FoldFun = fun({_, _}, false) ->
|
|
||||||
false;
|
|
||||||
({K, V}, Pos) when is_integer(Pos) andalso Pos =< Right ->
|
|
||||||
case {sext:decode(K), sext:decode(V)} of
|
|
||||||
{{Pos, Size}, trimmed} ->
|
|
||||||
Pos + Size;
|
|
||||||
{{Offset, Size}, _}
|
|
||||||
when Offset + Size =< Left ->
|
|
||||||
Left;
|
|
||||||
_Eh ->
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
case eleveldb:fold(T, FoldFun, Left, [{verify_checksums, true}]) of
|
|
||||||
false -> false;
|
|
||||||
Right -> true;
|
|
||||||
LastTrimmed when LastTrimmed < Right -> false;
|
|
||||||
_ -> %% LastTrimmed > Pos0, which is a irregular case but ok
|
|
||||||
true
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% @doc returns whether all bytes 0-Pos0 is continously trimmed or
|
|
||||||
%% not, including header.
|
|
||||||
-spec all_trimmed(table(), non_neg_integer()) -> boolean().
|
|
||||||
all_trimmed(CsumT, Pos0) ->
|
|
||||||
all_trimmed(CsumT, 0, Pos0).
|
|
||||||
|
|
||||||
-spec any_trimmed(table(),
|
|
||||||
pos_integer(),
|
|
||||||
machi_dt:chunk_size()) -> boolean().
|
|
||||||
any_trimmed(CsumT, Offset, Size) ->
|
|
||||||
Chunks = find(CsumT, Offset, Size),
|
|
||||||
lists:any(fun({_, _, State}) -> State =:= trimmed end, Chunks).
|
|
||||||
|
|
||||||
-spec calc_unwritten_bytes(table()) -> [byte_sequence()].
|
|
||||||
calc_unwritten_bytes(#machi_csum_table{table=_} = CsumT) ->
|
|
||||||
case lists:sort(all(CsumT)) of
|
|
||||||
[] ->
|
|
||||||
[{?MINIMUM_OFFSET, infinity}];
|
|
||||||
Sorted ->
|
|
||||||
{LastOffset, _, _} = hd(Sorted),
|
|
||||||
build_unwritten_bytes_list(Sorted, LastOffset, [])
|
|
||||||
end.
|
|
||||||
|
|
||||||
all(CsumT) ->
|
|
||||||
FoldFun = fun(E, Acc) -> [E|Acc] end,
|
|
||||||
lists:reverse(foldl_chunks(FoldFun, [], CsumT)).
|
|
||||||
|
|
||||||
-spec close(table()) -> ok.
|
|
||||||
close(#machi_csum_table{table=T}) ->
|
|
||||||
ok = eleveldb:close(T).
|
|
||||||
|
|
||||||
-spec delete(table()) -> ok.
|
|
||||||
delete(#machi_csum_table{table=T, file=F}) ->
|
|
||||||
catch eleveldb:close(T),
|
|
||||||
%% TODO change this to directory walk
|
|
||||||
case os:cmd("rm -rf " ++ F) of
|
|
||||||
"" -> ok;
|
|
||||||
E -> E
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec foldl_chunks(fun((chunk(), Acc0 :: term()) -> Acc :: term()),
|
|
||||||
Acc0 :: term(), table()) -> Acc :: term().
|
|
||||||
foldl_chunks(Fun, Acc0, #machi_csum_table{table=T}) ->
|
|
||||||
FoldFun = fun({K, V}, Acc) ->
|
|
||||||
{Offset, Len} = sext:decode(K),
|
|
||||||
Fun({Offset, Len, sext:decode(V)}, Acc);
|
|
||||||
(_K, Acc) ->
|
|
||||||
_ = lager:error("~p: wrong option?", [_K]),
|
|
||||||
Acc
|
|
||||||
end,
|
|
||||||
eleveldb:fold(T, FoldFun, Acc0, [{verify_checksums, true}]).
|
|
||||||
|
|
||||||
-spec build_unwritten_bytes_list( CsumData :: [{ Offset :: non_neg_integer(),
|
|
||||||
Size :: pos_integer(),
|
|
||||||
Checksum :: binary() }],
|
|
||||||
LastOffset :: non_neg_integer(),
|
|
||||||
Acc :: list() ) -> [byte_sequence()].
|
|
||||||
% @private Given a <b>sorted</b> list of checksum data tuples, return a sorted
|
|
||||||
% list of unwritten byte ranges. The output list <b>always</b> has at least one
|
|
||||||
% entry: the last tuple in the list is guaranteed to be the current end of
|
|
||||||
% bytes written to a particular file with the special space moniker
|
|
||||||
% `infinity'.
|
|
||||||
build_unwritten_bytes_list([], Last, Acc) ->
|
|
||||||
NewAcc = [ {Last, infinity} | Acc ],
|
|
||||||
lists:reverse(NewAcc);
|
|
||||||
build_unwritten_bytes_list([{CurrentOffset, CurrentSize, _Csum}|Rest], LastOffset, Acc) when
|
|
||||||
CurrentOffset /= LastOffset ->
|
|
||||||
Hole = CurrentOffset - LastOffset,
|
|
||||||
build_unwritten_bytes_list(Rest, (CurrentOffset+CurrentSize), [{LastOffset, Hole}|Acc]);
|
|
||||||
build_unwritten_bytes_list([{CO, CS, _Ck}|Rest], _LastOffset, Acc) ->
|
|
||||||
build_unwritten_bytes_list(Rest, CO + CS, Acc).
|
|
||||||
|
|
||||||
%% @doc If you want to find an overlap among two areas [x, y] and [a,
|
|
||||||
%% b] where x < y and a < b; if (a-y)*(b-x) < 0 then there's a
|
|
||||||
%% overlap, else, > 0 then there're no overlap. border condition = 0
|
|
||||||
%% is not overlap in this offset-size case.
|
|
||||||
%% inclusion_match_spec(Offset, Size) ->
|
|
||||||
%% {'>', 0,
|
|
||||||
%% {'*',
|
|
||||||
%% {'-', Offset + Size, '$1'},
|
|
||||||
%% {'-', Offset, {'+', '$1', '$2'}}}}.
|
|
||||||
|
|
||||||
-spec eleveldb_fold(eleveldb:db_ref(), binary(), binary(),
|
|
||||||
fun(({binary(), binary()}, AccType::term()) -> AccType::term()),
|
|
||||||
AccType0::term()) ->
|
|
||||||
AccType::term().
|
|
||||||
eleveldb_fold(Ref, Start, End, FoldFun, InitAcc) ->
|
|
||||||
{ok, Iterator} = eleveldb:iterator(Ref, []),
|
|
||||||
try
|
|
||||||
eleveldb_do_fold(eleveldb:iterator_move(Iterator, Start),
|
|
||||||
Iterator, End, FoldFun, InitAcc)
|
|
||||||
catch throw:IteratorClosed ->
|
|
||||||
{error, IteratorClosed}
|
|
||||||
after
|
|
||||||
eleveldb:iterator_close(Iterator)
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec eleveldb_do_fold({ok, binary(), binary()}|{error, iterator_closed|invalid_iterator}|{ok,binary()},
|
|
||||||
eleveldb:itr_ref(), binary(),
|
|
||||||
fun(({binary(), binary()}, AccType::term()) -> AccType::term()),
|
|
||||||
AccType::term()) ->
|
|
||||||
AccType::term().
|
|
||||||
eleveldb_do_fold({ok, Key, Value}, _, End, FoldFun, Acc)
|
|
||||||
when End < Key ->
|
|
||||||
FoldFun({Key, Value}, Acc);
|
|
||||||
eleveldb_do_fold({ok, Key, Value}, Iterator, End, FoldFun, Acc) ->
|
|
||||||
eleveldb_do_fold(eleveldb:iterator_move(Iterator, next),
|
|
||||||
Iterator, End, FoldFun,
|
|
||||||
FoldFun({Key, Value}, Acc));
|
|
||||||
eleveldb_do_fold({error, iterator_closed}, _, _, _, Acc) ->
|
|
||||||
%% It's really an error which is not expected
|
|
||||||
throw({iterator_closed, Acc});
|
|
||||||
eleveldb_do_fold({error, invalid_iterator}, _, _, _, Acc) ->
|
|
||||||
%% Probably reached to end
|
|
||||||
Acc.
|
|
|
@ -20,24 +20,15 @@
|
||||||
|
|
||||||
-module(machi_dt).
|
-module(machi_dt).
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
-include("machi_projection.hrl").
|
||||||
|
|
||||||
-type append_opts() :: #append_opts{}.
|
-type chunk() :: chunk_bin() | {chunk_csum(), chunk_bin()}.
|
||||||
-type chunk() :: chunk_bin() | iolist(). % client can choose either rep.
|
-type chunk_bin() :: binary() | iolist(). % client can use either
|
||||||
-type chunk_bin() :: binary(). % server returns binary() only.
|
-type chunk_csum() :: binary(). % 1 byte tag, N-1 bytes checksum
|
||||||
-type chunk_csum() :: <<>> | chunk_csum_bin() | {csum_tag(), binary()}.
|
-type chunk_summary() :: {file_offset(), chunk_size(), binary()}.
|
||||||
-type chunk_csum_bin() :: binary(). % 1 byte tag, N-1 bytes checksum
|
-type chunk_s() :: binary(). % server always uses binary()
|
||||||
-type chunk_cstrm() :: 'trimmed' | chunk_csum().
|
|
||||||
-type chunk_summary() :: {file_offset(), chunk_size(), chunk_bin(), chunk_cstrm()}.
|
|
||||||
-type chunk_pos() :: {file_offset(), chunk_size(), file_name_s()}.
|
-type chunk_pos() :: {file_offset(), chunk_size(), file_name_s()}.
|
||||||
-type chunk_size() :: non_neg_integer().
|
-type chunk_size() :: non_neg_integer().
|
||||||
|
|
||||||
%% Tags that stand for how that checksum was generated. See
|
|
||||||
%% machi_util:make_tagged_csum/{1,2} for further documentation and
|
|
||||||
%% implementation.
|
|
||||||
-type csum_tag() :: none | client_sha | server_sha | server_regen_sha.
|
|
||||||
|
|
||||||
-type error_general() :: 'bad_arg' | 'wedged' | 'bad_checksum'.
|
-type error_general() :: 'bad_arg' | 'wedged' | 'bad_checksum'.
|
||||||
-type epoch_csum() :: binary().
|
-type epoch_csum() :: binary().
|
||||||
-type epoch_num() :: -1 | non_neg_integer().
|
-type epoch_num() :: -1 | non_neg_integer().
|
||||||
|
@ -50,26 +41,17 @@
|
||||||
-type file_prefix() :: binary() | list().
|
-type file_prefix() :: binary() | list().
|
||||||
-type inet_host() :: inet:ip_address() | inet:hostname().
|
-type inet_host() :: inet:ip_address() | inet:hostname().
|
||||||
-type inet_port() :: inet:port_number().
|
-type inet_port() :: inet:port_number().
|
||||||
-type locator() :: number().
|
|
||||||
-type namespace() :: binary().
|
|
||||||
-type namespace_version() :: non_neg_integer().
|
|
||||||
-type ns_info() :: #ns_info{}.
|
|
||||||
-type projection() :: #projection_v1{}.
|
-type projection() :: #projection_v1{}.
|
||||||
-type projection_type() :: 'public' | 'private'.
|
-type projection_type() :: 'public' | 'private'.
|
||||||
-type read_opts() :: #read_opts{}.
|
|
||||||
-type read_opts_x() :: 'undefined' | 'noopt' | 'none' | #read_opts{}.
|
|
||||||
|
|
||||||
-export_type([
|
-export_type([
|
||||||
append_opts/0,
|
|
||||||
chunk/0,
|
chunk/0,
|
||||||
chunk_bin/0,
|
chunk_bin/0,
|
||||||
chunk_csum/0,
|
chunk_csum/0,
|
||||||
chunk_csum_bin/0,
|
|
||||||
chunk_cstrm/0,
|
|
||||||
chunk_summary/0,
|
chunk_summary/0,
|
||||||
|
chunk_s/0,
|
||||||
chunk_pos/0,
|
chunk_pos/0,
|
||||||
chunk_size/0,
|
chunk_size/0,
|
||||||
csum_tag/0,
|
|
||||||
error_general/0,
|
error_general/0,
|
||||||
epoch_csum/0,
|
epoch_csum/0,
|
||||||
epoch_num/0,
|
epoch_num/0,
|
||||||
|
@ -82,13 +64,7 @@
|
||||||
file_prefix/0,
|
file_prefix/0,
|
||||||
inet_host/0,
|
inet_host/0,
|
||||||
inet_port/0,
|
inet_port/0,
|
||||||
locator/0,
|
|
||||||
namespace/0,
|
|
||||||
namespace_version/0,
|
|
||||||
ns_info/0,
|
|
||||||
projection/0,
|
projection/0,
|
||||||
projection_type/0,
|
projection_type/0
|
||||||
read_opts/0,
|
|
||||||
read_opts_x/0
|
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
|
|
@ -1,908 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc This is a proxy process which mediates access to Machi FLU
|
|
||||||
%% controlled files. In particular, it manages the "write-once register"
|
|
||||||
%% conceit at the heart of Machi's design.
|
|
||||||
%%
|
|
||||||
%% Read, write and append requests for a single file will be managed
|
|
||||||
%% through this proxy. Clients can also request syncs for specific
|
|
||||||
%% types of filehandles.
|
|
||||||
%%
|
|
||||||
%% As operations are requested, the proxy keeps track of how many
|
|
||||||
%% operations it has performed (and how many errors were generated.)
|
|
||||||
%% After a sufficient number of inactivity, the server terminates
|
|
||||||
%% itself.
|
|
||||||
%%
|
|
||||||
%% TODO:
|
|
||||||
%% 1. Some way to transition the proxy into a wedged state that
|
|
||||||
%% doesn't rely on message delivery.
|
|
||||||
%%
|
|
||||||
%% 2. Check max file size on appends. Writes we take on faith we can
|
|
||||||
%% and should handle.
|
|
||||||
%%
|
|
||||||
%% 3. Async checksum reads on startup.
|
|
||||||
|
|
||||||
-module(machi_file_proxy).
|
|
||||||
-behaviour(gen_server).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
-export([
|
|
||||||
start_link/3,
|
|
||||||
stop/1,
|
|
||||||
sync/1,
|
|
||||||
sync/2,
|
|
||||||
read/3,
|
|
||||||
read/4,
|
|
||||||
write/3,
|
|
||||||
write/4,
|
|
||||||
trim/4,
|
|
||||||
append/2,
|
|
||||||
append/4,
|
|
||||||
checksum_list/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
-export([
|
|
||||||
init/1,
|
|
||||||
handle_call/3,
|
|
||||||
handle_cast/2,
|
|
||||||
handle_info/2,
|
|
||||||
terminate/2,
|
|
||||||
code_change/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
-define(TICK, 5*1000).
|
|
||||||
-define(TICK_THRESHOLD, 5). %% After this + 1 more quiescent ticks, shutdown
|
|
||||||
-define(TIMEOUT, 10*1000).
|
|
||||||
-define(TOO_MANY_ERRORS_RATIO, 50).
|
|
||||||
|
|
||||||
-type op_stats() :: { Total :: non_neg_integer(),
|
|
||||||
Errors :: non_neg_integer() }.
|
|
||||||
|
|
||||||
-record(state, {
|
|
||||||
fluname :: atom(),
|
|
||||||
data_dir :: string() | undefined,
|
|
||||||
filename :: string() | undefined,
|
|
||||||
data_path :: string() | undefined,
|
|
||||||
wedged = false :: boolean(),
|
|
||||||
csum_file :: string()|undefined,
|
|
||||||
csum_path :: string()|undefined,
|
|
||||||
data_filehandle :: file:io_device(),
|
|
||||||
csum_table :: machi_csum_table:table(),
|
|
||||||
eof_position = 0 :: non_neg_integer(),
|
|
||||||
max_file_size = ?DEFAULT_MAX_FILE_SIZE :: pos_integer(),
|
|
||||||
rollover = false :: boolean(),
|
|
||||||
tref :: reference(), %% timer ref
|
|
||||||
ticks = 0 :: non_neg_integer(), %% ticks elapsed with no new operations
|
|
||||||
ops = 0 :: non_neg_integer(), %% sum of all ops
|
|
||||||
reads = {0, 0} :: op_stats(),
|
|
||||||
writes = {0, 0} :: op_stats(),
|
|
||||||
appends = {0, 0} :: op_stats(),
|
|
||||||
trims = {0, 0} :: op_stats()
|
|
||||||
}).
|
|
||||||
|
|
||||||
%% Public API
|
|
||||||
|
|
||||||
% @doc Start a new instance of the file proxy service. Takes the filename
|
|
||||||
% and data directory as arguments. This function is typically called by the
|
|
||||||
% `machi_file_proxy_sup:start_proxy/2' function.
|
|
||||||
-spec start_link(FluName :: atom(), Filename :: string(), DataDir :: string()) -> any().
|
|
||||||
start_link(FluName, Filename, DataDir) ->
|
|
||||||
gen_server:start_link(?MODULE, {FluName, Filename, DataDir}, []).
|
|
||||||
|
|
||||||
% @doc Request to stop an instance of the file proxy service.
|
|
||||||
-spec stop(Pid :: pid()) -> ok.
|
|
||||||
stop(Pid) when is_pid(Pid) ->
|
|
||||||
gen_server:call(Pid, {stop}, ?TIMEOUT).
|
|
||||||
|
|
||||||
% @doc Force a sync of all filehandles
|
|
||||||
-spec sync(Pid :: pid()) -> ok|{error, term()}.
|
|
||||||
sync(Pid) when is_pid(Pid) ->
|
|
||||||
sync(Pid, all);
|
|
||||||
sync(_Pid) ->
|
|
||||||
lager:warning("Bad pid to sync"),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
% @doc Force a sync of a specific filehandle type. Valid types are `all', `csum' and `data'.
|
|
||||||
-spec sync(Pid :: pid(), Type :: all|data|csum) -> ok|{error, term()}.
|
|
||||||
sync(Pid, Type) when is_pid(Pid) andalso
|
|
||||||
( Type =:= all orelse Type =:= csum orelse Type =:= data ) ->
|
|
||||||
gen_server:call(Pid, {sync, Type}, ?TIMEOUT);
|
|
||||||
sync(_Pid, Type) ->
|
|
||||||
lager:warning("Bad arg to sync: Type ~p", [Type]),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
% @doc Read file at offset for length. This returns a sequence of all
|
|
||||||
% written and trimmed (optional) bytes that overlaps with requested
|
|
||||||
% offset and length. Borders are not aligned.
|
|
||||||
-spec read(Pid :: pid(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Length :: non_neg_integer()) ->
|
|
||||||
{ok, [{Filename::string(), Offset :: non_neg_integer(),
|
|
||||||
Data :: binary(), Checksum :: binary()}]} |
|
|
||||||
{error, Reason :: term()}.
|
|
||||||
read(Pid, Offset, Length) ->
|
|
||||||
read(Pid, Offset, Length, #read_opts{}).
|
|
||||||
|
|
||||||
-spec read(Pid :: pid(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Length :: non_neg_integer(),
|
|
||||||
machi_dt:read_opts_x()) ->
|
|
||||||
{ok, [{Filename::string(), Offset :: non_neg_integer(),
|
|
||||||
Data :: binary(), Checksum :: binary()}]} |
|
|
||||||
{error, Reason :: term()}.
|
|
||||||
read(Pid, Offset, Length, #read_opts{}=Opts)
|
|
||||||
when is_pid(Pid) andalso is_integer(Offset) andalso Offset >= 0
|
|
||||||
andalso is_integer(Length) andalso Length > 0 ->
|
|
||||||
gen_server:call(Pid, {read, Offset, Length, Opts}, ?TIMEOUT);
|
|
||||||
read(_Pid, Offset, Length, Opts) ->
|
|
||||||
lager:warning("Bad args to read: Offset ~p, Length ~p, Options ~p", [Offset, Length, Opts]),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
% @doc Write data at offset
|
|
||||||
-spec write(Pid :: pid(), Offset :: non_neg_integer(), Data :: binary()) -> ok|{error, term()}.
|
|
||||||
write(Pid, Offset, Data) when is_pid(Pid) andalso is_integer(Offset) andalso Offset >= 0
|
|
||||||
andalso is_binary(Data) ->
|
|
||||||
write(Pid, Offset, [], Data);
|
|
||||||
write(_Pid, Offset, _Data) ->
|
|
||||||
lager:warning("Bad arg to write: Offset ~p", [Offset]),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
% @doc Write data at offset, including the client metadata. ClientMeta is a proplist
|
|
||||||
% that expects the following keys and values:
|
|
||||||
% <ul>
|
|
||||||
% <li>`client_csum_tag' - the type of checksum from the client as defined in the machi.hrl file</li>
|
|
||||||
% <li>`client_csum' - the checksum value from the client</li>
|
|
||||||
% </ul>
|
|
||||||
-spec write(Pid :: pid(), Offset :: non_neg_integer(), ClientMeta :: proplists:proplist(),
|
|
||||||
Data :: binary()) -> ok|{error, term()}.
|
|
||||||
write(Pid, Offset, ClientMeta, Data) when is_pid(Pid) andalso is_integer(Offset) andalso Offset >= 0
|
|
||||||
andalso is_list(ClientMeta) andalso is_binary(Data) ->
|
|
||||||
gen_server:call(Pid, {write, Offset, ClientMeta, Data}, ?TIMEOUT);
|
|
||||||
write(_Pid, Offset, ClientMeta, _Data) ->
|
|
||||||
lager:warning("Bad arg to write: Offset ~p, ClientMeta: ~p", [Offset, ClientMeta]),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
trim(Pid, Offset, Size, TriggerGC) when is_pid(Pid),
|
|
||||||
is_integer(Offset) andalso Offset >= 0,
|
|
||||||
is_integer(Size) andalso Size > 0,
|
|
||||||
is_boolean(TriggerGC) ->
|
|
||||||
gen_server:call(Pid, {trim ,Offset, Size, TriggerGC}, ?TIMEOUT).
|
|
||||||
|
|
||||||
% @doc Append data
|
|
||||||
-spec append(Pid :: pid(), Data :: binary()) -> {ok, File :: string(), Offset :: non_neg_integer()}
|
|
||||||
|{error, term()}.
|
|
||||||
append(Pid, Data) when is_pid(Pid) andalso is_binary(Data) ->
|
|
||||||
append(Pid, [], 0, Data);
|
|
||||||
append(_Pid, _Data) ->
|
|
||||||
lager:warning("Bad arguments to append/2"),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
% @doc Append data to file, supplying client metadata and (if desired) a
|
|
||||||
% reservation for additional space. ClientMeta is a proplist and expects the
|
|
||||||
% same keys as write/4.
|
|
||||||
-spec append(Pid :: pid(), ClientMeta :: proplists:proplist(),
|
|
||||||
Extra :: non_neg_integer(), Data :: binary()) -> {ok, File :: string(), Offset :: non_neg_integer()}
|
|
||||||
|{error, term()}.
|
|
||||||
append(Pid, ClientMeta, Extra, Data) when is_pid(Pid) andalso is_list(ClientMeta)
|
|
||||||
andalso is_integer(Extra) andalso Extra >= 0
|
|
||||||
andalso is_binary(Data) ->
|
|
||||||
gen_server:call(Pid, {append, ClientMeta, Extra, Data}, ?TIMEOUT);
|
|
||||||
append(_Pid, ClientMeta, Extra, _Data) ->
|
|
||||||
lager:warning("Bad arg to append: ClientMeta ~p, Extra ~p", [ClientMeta, Extra]),
|
|
||||||
{error, bad_arg}.
|
|
||||||
|
|
||||||
-spec checksum_list(pid()) -> {ok, list()}.
|
|
||||||
checksum_list(Pid) ->
|
|
||||||
gen_server:call(Pid, {checksum_list}, ?TIMEOUT).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
|
|
||||||
% @private
|
|
||||||
init({FluName, Filename, DataDir}) ->
|
|
||||||
CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
|
|
||||||
{_, DPath} = machi_util:make_data_filename(DataDir, Filename),
|
|
||||||
ok = filelib:ensure_dir(CsumFile),
|
|
||||||
ok = filelib:ensure_dir(DPath),
|
|
||||||
{ok, CsumTable} = machi_csum_table:open(CsumFile, []),
|
|
||||||
UnwrittenBytes = machi_csum_table:calc_unwritten_bytes(CsumTable),
|
|
||||||
{Eof, infinity} = lists:last(UnwrittenBytes),
|
|
||||||
{ok, FHd} = file:open(DPath, [read, write, binary, raw]),
|
|
||||||
%% Reserve for EC and stuff, to prevent eof when read
|
|
||||||
ok = file:pwrite(FHd, 0, binary:copy(<<"so what?">>, ?MINIMUM_OFFSET div 8)),
|
|
||||||
Tref = schedule_tick(),
|
|
||||||
St = #state{
|
|
||||||
fluname = FluName,
|
|
||||||
filename = Filename,
|
|
||||||
data_dir = DataDir,
|
|
||||||
data_path = DPath,
|
|
||||||
csum_file = CsumFile,
|
|
||||||
data_filehandle = FHd,
|
|
||||||
csum_table = CsumTable,
|
|
||||||
tref = Tref,
|
|
||||||
eof_position = erlang:max(Eof, ?MINIMUM_OFFSET),
|
|
||||||
max_file_size = machi_config:max_file_size()},
|
|
||||||
lager:debug("Starting file proxy ~p for filename ~p, state = ~p, Eof = ~p",
|
|
||||||
[self(), Filename, St, Eof]),
|
|
||||||
{ok, St}.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
handle_call({stop}, _From, State) ->
|
|
||||||
lager:debug("Requested to stop."),
|
|
||||||
{stop, normal, State};
|
|
||||||
|
|
||||||
handle_call({sync, data}, _From, State = #state{ data_filehandle = FHd }) ->
|
|
||||||
R = file:sync(FHd),
|
|
||||||
{reply, R, State};
|
|
||||||
|
|
||||||
handle_call({sync, csum}, _From, State) ->
|
|
||||||
%% machi_csum_table always writes in {sync, true} option, so here
|
|
||||||
%% explicit sync isn't actually needed.
|
|
||||||
{reply, ok, State};
|
|
||||||
|
|
||||||
handle_call({sync, all}, _From, State = #state{filename = F,
|
|
||||||
data_filehandle = FHd,
|
|
||||||
csum_table = _T
|
|
||||||
}) ->
|
|
||||||
Resp = case file:sync(FHd) of
|
|
||||||
ok ->
|
|
||||||
ok;
|
|
||||||
Error ->
|
|
||||||
lager:error("Got ~p syncing all files for file ~p",
|
|
||||||
[Error, F]),
|
|
||||||
Error
|
|
||||||
end,
|
|
||||||
{reply, Resp, State};
|
|
||||||
|
|
||||||
%%% READS
|
|
||||||
|
|
||||||
handle_call({read, _Offset, _Length, _}, _From,
|
|
||||||
State = #state{wedged = true,
|
|
||||||
reads = {T, Err}
|
|
||||||
}) ->
|
|
||||||
{reply, {error, wedged}, State#state{writes = {T + 1, Err + 1}}};
|
|
||||||
|
|
||||||
handle_call({read, Offset, Length, _Opts}, _From,
|
|
||||||
State = #state{eof_position = Eof,
|
|
||||||
reads = {T, Err}
|
|
||||||
}) when Offset > Eof ->
|
|
||||||
%% make sure [Offset, Offset+Length) has an overlap with file range
|
|
||||||
lager:error("Read request at offset ~p for ~p bytes is past the last write offset of ~p",
|
|
||||||
[Offset, Length, Eof]),
|
|
||||||
{reply, {error, not_written}, State#state{reads = {T + 1, Err + 1}}};
|
|
||||||
|
|
||||||
handle_call({read, Offset, Length, Opts}, _From,
|
|
||||||
State = #state{filename = F,
|
|
||||||
data_filehandle = FH,
|
|
||||||
csum_table = CsumTable,
|
|
||||||
reads = {T, Err}
|
|
||||||
}) ->
|
|
||||||
%% TODO: use these options - NoChunk prevents reading from disks
|
|
||||||
%% NoChecksum doesn't check checksums
|
|
||||||
#read_opts{no_checksum=NoChecksum, no_chunk=NoChunk,
|
|
||||||
needs_trimmed=NeedsTrimmed} = Opts,
|
|
||||||
{Resp, NewErr} =
|
|
||||||
case do_read(FH, F, CsumTable, Offset, Length, NoChunk, NoChecksum) of
|
|
||||||
{ok, {[], []}} ->
|
|
||||||
{{error, not_written}, Err + 1};
|
|
||||||
{ok, {Chunks0, Trimmed0}} ->
|
|
||||||
Chunks = slice_both_side(Chunks0, Offset, Offset+Length),
|
|
||||||
Trimmed = case NeedsTrimmed of
|
|
||||||
true -> Trimmed0;
|
|
||||||
false -> []
|
|
||||||
end,
|
|
||||||
{{ok, {Chunks, Trimmed}}, Err};
|
|
||||||
Error ->
|
|
||||||
lager:error("Can't read ~p, ~p at File ~p", [Offset, Length, F]),
|
|
||||||
{Error, Err + 1}
|
|
||||||
end,
|
|
||||||
{reply, Resp, State#state{reads = {T+1, NewErr}}};
|
|
||||||
|
|
||||||
%%% WRITES
|
|
||||||
|
|
||||||
handle_call({write, _Offset, _ClientMeta, _Data}, _From,
|
|
||||||
State = #state{wedged = true,
|
|
||||||
writes = {T, Err}
|
|
||||||
}) ->
|
|
||||||
{reply, {error, wedged}, State#state{writes = {T + 1, Err + 1}}};
|
|
||||||
|
|
||||||
handle_call({write, Offset, ClientMeta, Data}, _From,
|
|
||||||
State = #state{filename = F,
|
|
||||||
writes = {T, Err},
|
|
||||||
data_filehandle = FHd,
|
|
||||||
csum_table = CsumTable}) ->
|
|
||||||
|
|
||||||
ClientCsumTag = proplists:get_value(client_csum_tag, ClientMeta, ?CSUM_TAG_NONE),
|
|
||||||
ClientCsum = proplists:get_value(client_csum, ClientMeta, <<>>),
|
|
||||||
|
|
||||||
{Resp, NewErr} =
|
|
||||||
case check_or_make_tagged_csum(ClientCsumTag, ClientCsum, Data) of
|
|
||||||
{error, {bad_csum, Bad}} ->
|
|
||||||
lager:error("Bad checksum on write; client sent ~p, we computed ~p",
|
|
||||||
[ClientCsum, Bad]),
|
|
||||||
{{error, bad_checksum}, Err + 1};
|
|
||||||
TaggedCsum ->
|
|
||||||
case handle_write(FHd, CsumTable, F, TaggedCsum, Offset, Data) of
|
|
||||||
ok ->
|
|
||||||
{ok, Err};
|
|
||||||
Error ->
|
|
||||||
{Error, Err + 1}
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
{NewEof, infinity} = lists:last(machi_csum_table:calc_unwritten_bytes(CsumTable)),
|
|
||||||
lager:debug("Wrote ~p bytes at ~p of file ~p, NewEOF = ~p~n",
|
|
||||||
[iolist_size(Data), Offset, F, NewEof]),
|
|
||||||
{reply, Resp, State#state{writes = {T+1, NewErr},
|
|
||||||
eof_position = NewEof}};
|
|
||||||
|
|
||||||
|
|
||||||
%%% TRIMS
|
|
||||||
|
|
||||||
handle_call({trim, _Offset, _ClientMeta, _Data}, _From,
|
|
||||||
State = #state{wedged = true,
|
|
||||||
writes = {T, Err}
|
|
||||||
}) ->
|
|
||||||
{reply, {error, wedged}, State#state{writes = {T + 1, Err + 1}}};
|
|
||||||
|
|
||||||
handle_call({trim, Offset, Size, _TriggerGC}, _From,
|
|
||||||
State = #state{data_filehandle=FHd,
|
|
||||||
ops = Ops,
|
|
||||||
trims = {T, Err},
|
|
||||||
csum_table = CsumTable}) ->
|
|
||||||
|
|
||||||
case machi_csum_table:all_trimmed(CsumTable, Offset, Offset+Size) of
|
|
||||||
true ->
|
|
||||||
NewState = State#state{ops=Ops+1, trims={T, Err+1}},
|
|
||||||
%% All bytes of that range was already trimmed returns ok
|
|
||||||
%% here, not {error, trimmed}, which means the whole file
|
|
||||||
%% was trimmed
|
|
||||||
maybe_gc(ok, NewState);
|
|
||||||
false ->
|
|
||||||
LUpdate = maybe_regenerate_checksum(
|
|
||||||
FHd,
|
|
||||||
machi_csum_table:find_leftneighbor(CsumTable, Offset)),
|
|
||||||
RUpdate = maybe_regenerate_checksum(
|
|
||||||
FHd,
|
|
||||||
machi_csum_table:find_rightneighbor(CsumTable, Offset+Size)),
|
|
||||||
|
|
||||||
case machi_csum_table:trim(CsumTable, Offset, Size, LUpdate, RUpdate) of
|
|
||||||
ok ->
|
|
||||||
{NewEof, infinity} = lists:last(machi_csum_table:calc_unwritten_bytes(CsumTable)),
|
|
||||||
NewState = State#state{ops=Ops+1,
|
|
||||||
trims={T+1, Err},
|
|
||||||
eof_position=NewEof},
|
|
||||||
maybe_gc(ok, NewState);
|
|
||||||
Error ->
|
|
||||||
{reply, Error, State#state{ops=Ops+1, trims={T, Err+1}}}
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
%% APPENDS
|
|
||||||
|
|
||||||
handle_call({append, _ClientMeta, _Extra, _Data}, _From,
|
|
||||||
State = #state{wedged = true,
|
|
||||||
appends = {T, Err}
|
|
||||||
}) ->
|
|
||||||
{reply, {error, wedged}, State#state{appends = {T+1, Err+1}}};
|
|
||||||
|
|
||||||
handle_call({append, ClientMeta, Extra, Data}, _From,
|
|
||||||
State = #state{eof_position = EofP,
|
|
||||||
filename = F,
|
|
||||||
appends = {T, Err},
|
|
||||||
data_filehandle = FHd,
|
|
||||||
csum_table = CsumTable
|
|
||||||
}) ->
|
|
||||||
|
|
||||||
ClientCsumTag = proplists:get_value(client_csum_tag, ClientMeta, ?CSUM_TAG_NONE),
|
|
||||||
ClientCsum = proplists:get_value(client_csum, ClientMeta, <<>>),
|
|
||||||
|
|
||||||
{Resp, NewErr} =
|
|
||||||
case check_or_make_tagged_csum(ClientCsumTag, ClientCsum, Data) of
|
|
||||||
{error, {bad_csum, Bad}} ->
|
|
||||||
lager:error("Bad checksum; client sent ~p, we computed ~p",
|
|
||||||
[ClientCsum, Bad]),
|
|
||||||
{{error, bad_checksum}, Err + 1};
|
|
||||||
TaggedCsum ->
|
|
||||||
case handle_write(FHd, CsumTable, F, TaggedCsum, EofP, Data) of
|
|
||||||
ok ->
|
|
||||||
{{ok, F, EofP}, Err};
|
|
||||||
Error ->
|
|
||||||
{Error, Err + 1}
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
NewEof = EofP + byte_size(Data) + Extra,
|
|
||||||
lager:debug("appended ~p bytes at ~p file ~p. NewEofP = ~p",
|
|
||||||
[iolist_size(Data), EofP, F, NewEof]),
|
|
||||||
{reply, Resp, State#state{appends = {T+1, NewErr},
|
|
||||||
eof_position = NewEof}};
|
|
||||||
|
|
||||||
handle_call({checksum_list}, _FRom, State = #state{csum_table=T}) ->
|
|
||||||
All = machi_csum_table:all(T),
|
|
||||||
{reply, {ok, All}, State};
|
|
||||||
|
|
||||||
handle_call(Req, _From, State) ->
|
|
||||||
lager:warning("Unknown call: ~p", [Req]),
|
|
||||||
{reply, whoaaaaaaaaaaaa, State}.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
handle_cast(Cast, State) ->
|
|
||||||
lager:warning("Unknown cast: ~p", [Cast]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
handle_info(tick, State = #state{fluname = FluName,
|
|
||||||
filename = F,
|
|
||||||
eof_position = Eof,
|
|
||||||
max_file_size = MaxFileSize}) when Eof >= MaxFileSize ->
|
|
||||||
%% Older code halted here with {stop, file_rollover, State}.
|
|
||||||
%% However, there may be other requests in our mailbox already
|
|
||||||
%% and/or not yet delivered but in a race with the
|
|
||||||
%% machi_flu_metadata_mgr. So we close our eleveldb instance (to
|
|
||||||
%% avoid double-open attempt by a new file proxy proc), tell
|
|
||||||
%% machi_flu_metadata_mgr that we request a rollover, then stop.
|
|
||||||
%% terminate() will take care of forwarding messages that are
|
|
||||||
%% caught in the race.
|
|
||||||
lager:notice("Eof ~s position ~p >= max file size ~p. Shutting down.",
|
|
||||||
[F, Eof, MaxFileSize]),
|
|
||||||
State2 = close_files(State),
|
|
||||||
machi_flu_metadata_mgr:stop_proxy_pid_rollover(FluName, {file, F}),
|
|
||||||
{stop, normal, State2#state{rollover = true}};
|
|
||||||
|
|
||||||
%% XXX Is this a good idea? Need to think this through a bit.
|
|
||||||
handle_info(tick, State = #state{wedged = true}) ->
|
|
||||||
{stop, wedged, State};
|
|
||||||
|
|
||||||
%% I dunno. This may not be a good idea, but it seems like if we're throwing lots of
|
|
||||||
%% errors, we ought to shut down and give up our file descriptors.
|
|
||||||
handle_info(tick, State = #state{
|
|
||||||
ops = Ops,
|
|
||||||
reads = {RT, RE},
|
|
||||||
writes = {WT, WE},
|
|
||||||
appends = {AT, AE}
|
|
||||||
}) when Ops > 100 andalso
|
|
||||||
trunc(((RE+WE+AE) / (RT+WT+AT)) * 100) > ?TOO_MANY_ERRORS_RATIO ->
|
|
||||||
Errors = RE + WE + AE,
|
|
||||||
lager:notice("Got ~p errors. Shutting down.", [Errors]),
|
|
||||||
{stop, too_many_errors, State};
|
|
||||||
|
|
||||||
handle_info(tick, State = #state{
|
|
||||||
ticks = Ticks,
|
|
||||||
ops = Ops,
|
|
||||||
reads = {RT, _RE},
|
|
||||||
writes = {WT, _WE},
|
|
||||||
appends = {AT, _AE}}) when Ops == RT + WT + AT, Ticks == ?TICK_THRESHOLD ->
|
|
||||||
lager:debug("Got 5 ticks with no new activity. Shutting down."),
|
|
||||||
{stop, normal, State};
|
|
||||||
|
|
||||||
handle_info(tick, State = #state{
|
|
||||||
ticks = Ticks,
|
|
||||||
ops = Ops,
|
|
||||||
reads = {RT, _RE},
|
|
||||||
writes = {WT, _WE},
|
|
||||||
appends = {AT, _AE}}) when Ops == RT + WT + AT ->
|
|
||||||
lager:debug("No new activity since last tick. Incrementing tick counter."),
|
|
||||||
Tref = schedule_tick(),
|
|
||||||
{noreply, State#state{tref = Tref, ticks = Ticks + 1}};
|
|
||||||
|
|
||||||
handle_info(tick, State = #state{
|
|
||||||
reads = {RT, _RE},
|
|
||||||
writes = {WT, _WE},
|
|
||||||
appends = {AT, _AE}
|
|
||||||
}) ->
|
|
||||||
Ops = RT + WT + AT,
|
|
||||||
lager:debug("Setting ops counter to ~p", [Ops]),
|
|
||||||
Tref = schedule_tick(),
|
|
||||||
{noreply, State#state{tref = Tref, ops = Ops}};
|
|
||||||
|
|
||||||
%handle_info({wedged, EpochId} State = #state{epoch = E}) when E /= EpochId ->
|
|
||||||
% lager:notice("Wedge epoch ~p but ignoring because our epoch id is ~p", [EpochId, E]),
|
|
||||||
% {noreply, State};
|
|
||||||
|
|
||||||
%handle_info({wedged, EpochId}, State = #state{epoch = E}) when E == EpochId ->
|
|
||||||
% lager:notice("Wedge epoch ~p same as our epoch id ~p; we are wedged. Bummer.", [EpochId, E]),
|
|
||||||
% {noreply, State#state{wedged = true}};
|
|
||||||
|
|
||||||
% flu1.erl:
|
|
||||||
% ProxyPid = get_proxy_pid(Filename),
|
|
||||||
% Are we wedged? if not
|
|
||||||
% machi_file_proxy:read(Pid, Offset, Length)
|
|
||||||
% otherwise -> error,wedged
|
|
||||||
%
|
|
||||||
% get_proxy_pid(Filename) ->
|
|
||||||
% Pid = lookup_pid(Filename)
|
|
||||||
% is_pid_alive(Pid)
|
|
||||||
% Pid
|
|
||||||
% if not alive then start one
|
|
||||||
|
|
||||||
handle_info(Req, State) ->
|
|
||||||
lager:warning("Unknown info message: ~p", [Req]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
terminate(Reason, State = #state{fluname = FluName,
|
|
||||||
filename = F,
|
|
||||||
rollover = Rollover_p,
|
|
||||||
reads = {RT, RE},
|
|
||||||
writes = {WT, WE},
|
|
||||||
appends = {AT, AE}
|
|
||||||
}) ->
|
|
||||||
lager:info("Shutting down proxy for file ~p because ~p", [F, Reason]),
|
|
||||||
lager:info(" Op Tot/Error", []),
|
|
||||||
lager:info(" Reads: ~p/~p", [RT, RE]),
|
|
||||||
lager:info(" Writes: ~p/~p", [WT, WE]),
|
|
||||||
lager:info("Appends: ~p/~p", [AT, AE]),
|
|
||||||
close_files(State),
|
|
||||||
if Rollover_p ->
|
|
||||||
forward_late_messages(FluName, F, 500);
|
|
||||||
true ->
|
|
||||||
ok
|
|
||||||
end,
|
|
||||||
ok.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
code_change(_OldVsn, State, _Extra) ->
|
|
||||||
{ok, State}.
|
|
||||||
|
|
||||||
%% Private functions
|
|
||||||
|
|
||||||
-spec schedule_tick() -> reference().
|
|
||||||
schedule_tick() ->
|
|
||||||
erlang:send_after(?TICK, self(), tick).
|
|
||||||
|
|
||||||
-spec check_or_make_tagged_csum(Type :: non_neg_integer(),
|
|
||||||
Checksum :: binary(),
|
|
||||||
Data :: binary() ) -> binary() |
|
|
||||||
{error, {bad_csum, Bad :: binary()}}.
|
|
||||||
check_or_make_tagged_csum(?CSUM_TAG_NONE, _Csum, Data) ->
|
|
||||||
%% We are making a checksum here
|
|
||||||
Csum = machi_util:checksum_chunk(Data),
|
|
||||||
machi_util:make_tagged_csum(server_sha, Csum);
|
|
||||||
check_or_make_tagged_csum(Tag, InCsum, Data) when Tag == ?CSUM_TAG_CLIENT_SHA;
|
|
||||||
Tag == ?CSUM_TAG_SERVER_SHA ->
|
|
||||||
Csum = machi_util:checksum_chunk(Data),
|
|
||||||
case Csum =:= InCsum of
|
|
||||||
true ->
|
|
||||||
machi_util:make_tagged_csum(server_sha, Csum);
|
|
||||||
false ->
|
|
||||||
{error, {bad_csum, Csum}}
|
|
||||||
end;
|
|
||||||
check_or_make_tagged_csum(?CSUM_TAG_SERVER_REGEN_SHA,
|
|
||||||
InCsum, Data) ->
|
|
||||||
Csum = machi_util:checksum_chunk(Data),
|
|
||||||
case Csum =:= InCsum of
|
|
||||||
true ->
|
|
||||||
machi_util:make_tagged_csum(server_regen_sha, Csum);
|
|
||||||
false ->
|
|
||||||
{error, {bad_csum, Csum}}
|
|
||||||
end;
|
|
||||||
check_or_make_tagged_csum(OtherTag, _ClientCsum, _Data) ->
|
|
||||||
lager:warning("Unknown checksum tag ~p", [OtherTag]),
|
|
||||||
{error, bad_checksum}.
|
|
||||||
|
|
||||||
-spec do_read(FHd :: file:io_device(),
|
|
||||||
Filename :: string(),
|
|
||||||
CsumTable :: machi_csum_table:table(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Size :: non_neg_integer(),
|
|
||||||
NoChunk :: boolean(),
|
|
||||||
NoChecksum :: boolean()
|
|
||||||
) -> {ok, {Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum :: binary()}],
|
|
||||||
Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]}} |
|
|
||||||
{error, bad_checksum} |
|
|
||||||
{error, partial_read} |
|
|
||||||
{error, file:posix()} |
|
|
||||||
{error, Other :: term() }.
|
|
||||||
% @private Attempt a read operation on the given offset and length.
|
|
||||||
% <li>
|
|
||||||
% <ul> If the byte range is not yet written, `{error, not_written}' is
|
|
||||||
% returned.</ul>
|
|
||||||
% <ul> If the checksum given does not match what comes off the disk,
|
|
||||||
% `{error, bad_checksum}' is returned.</ul>
|
|
||||||
% <ul> If the number of bytes that comes off the disk is not the requested length,
|
|
||||||
% `{error, partial_read}' is returned.</ul>
|
|
||||||
% <ul> If the offset is at or beyond the current file boundary, `eof' is returned.</ul>
|
|
||||||
% <ul> If some kind of POSIX error occurs, the OTP version of that POSIX error
|
|
||||||
% tuple is returned.</ul>
|
|
||||||
% </li>
|
|
||||||
%
|
|
||||||
do_read(FHd, Filename, CsumTable, Offset, Size, _, _) ->
|
|
||||||
%% Note that find/3 only returns overlapping chunks, both borders
|
|
||||||
%% are not aligned to original Offset and Size.
|
|
||||||
ChunkCsums = machi_csum_table:find(CsumTable, Offset, Size),
|
|
||||||
read_all_ranges(FHd, Filename, ChunkCsums, [], []).
|
|
||||||
|
|
||||||
-spec read_all_ranges(file:io_device(), string(),
|
|
||||||
[{non_neg_integer(),non_neg_integer(),trimmed|binary()}],
|
|
||||||
Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum::binary()}],
|
|
||||||
Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]) ->
|
|
||||||
{ok, {
|
|
||||||
Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum::binary()}],
|
|
||||||
Trimmed :: [{string(), Offset::non_neg_integer(), Size::non_neg_integer()}]}} |
|
|
||||||
{erorr, term()|partial_read}.
|
|
||||||
read_all_ranges(_, _, [], ReadChunks, TrimmedChunks) ->
|
|
||||||
%% TODO: currently returns empty list of trimmed chunks
|
|
||||||
{ok, {lists:reverse(ReadChunks), lists:reverse(TrimmedChunks)}};
|
|
||||||
|
|
||||||
read_all_ranges(FHd, Filename, [{Offset, Size, trimmed}|T], ReadChunks, TrimmedChunks) ->
|
|
||||||
read_all_ranges(FHd, Filename, T, ReadChunks, [{Filename, Offset, Size}|TrimmedChunks]);
|
|
||||||
|
|
||||||
read_all_ranges(FHd, Filename, [{Offset, Size, TaggedCsum}|T], ReadChunks, TrimmedChunks) ->
|
|
||||||
case file:pread(FHd, Offset, Size) of
|
|
||||||
eof ->
|
|
||||||
read_all_ranges(FHd, Filename, T, ReadChunks, TrimmedChunks);
|
|
||||||
{ok, Bytes} when byte_size(Bytes) == Size, TaggedCsum =:= none ->
|
|
||||||
read_all_ranges(FHd, Filename, T,
|
|
||||||
[{Filename, Offset, Bytes,
|
|
||||||
machi_util:make_tagged_csum(none, <<>>)}|ReadChunks],
|
|
||||||
TrimmedChunks);
|
|
||||||
{ok, Bytes} when byte_size(Bytes) == Size ->
|
|
||||||
{Tag, Ck} = machi_util:unmake_tagged_csum(TaggedCsum),
|
|
||||||
case check_or_make_tagged_csum(Tag, Ck, Bytes) of
|
|
||||||
{error, Bad} ->
|
|
||||||
lager:error("Bad checksum; got ~p, expected ~p",
|
|
||||||
[Bad, Ck]),
|
|
||||||
{error, bad_checksum};
|
|
||||||
TaggedCsum ->
|
|
||||||
read_all_ranges(FHd, Filename, T,
|
|
||||||
[{Filename, Offset, Bytes, TaggedCsum}|ReadChunks],
|
|
||||||
TrimmedChunks);
|
|
||||||
OtherCsum when Tag =:= ?CSUM_TAG_NONE ->
|
|
||||||
%% XXX FIXME: Should we return something other than
|
|
||||||
%% {ok, ....} in this case?
|
|
||||||
read_all_ranges(FHd, Filename, T,
|
|
||||||
[{Filename, Offset, Bytes, OtherCsum}|ReadChunks],
|
|
||||||
TrimmedChunks)
|
|
||||||
end;
|
|
||||||
{ok, Partial} ->
|
|
||||||
lager:error("In file ~p, offset ~p, wanted to read ~p bytes, but got ~p",
|
|
||||||
[Filename, Offset, Size, byte_size(Partial)]),
|
|
||||||
{error, partial_read};
|
|
||||||
Other ->
|
|
||||||
lager:error("While reading file ~p, offset ~p, length ~p, got ~p",
|
|
||||||
[Filename, Offset, Size, Other]),
|
|
||||||
{error, Other}
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec handle_write( FHd :: file:io_device(),
|
|
||||||
CsumTable :: machi_csum_table:table(),
|
|
||||||
Filename :: string(),
|
|
||||||
TaggedCsum :: binary(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Data :: binary()
|
|
||||||
) -> ok |
|
|
||||||
{error, written} |
|
|
||||||
{error, Reason :: term()}.
|
|
||||||
% @private Implements the write and append operation. The first task is to
|
|
||||||
% determine if the offset and data size has been written. If not, the write
|
|
||||||
% is allowed proceed. A special case is made when an offset and data size
|
|
||||||
% match a checksum. In that case we read the data off the disk, validate the
|
|
||||||
% checksum and return a "fake" ok response as if the write had been performed
|
|
||||||
% when it hasn't really.
|
|
||||||
%
|
|
||||||
% If a write proceeds, the offset, size and checksum are written to a
|
|
||||||
% metadata file, and the internal list of unwritten bytes is modified
|
|
||||||
% to reflect the just-performed write. This is then returned to the
|
|
||||||
% caller as `ok'
|
|
||||||
handle_write(FHd, CsumTable, Filename, TaggedCsum, Offset, Data) ->
|
|
||||||
Size = iolist_size(Data),
|
|
||||||
|
|
||||||
case machi_csum_table:find(CsumTable, Offset, Size) of
|
|
||||||
[] -> %% Nothing should be there
|
|
||||||
try
|
|
||||||
do_write(FHd, CsumTable, Filename, TaggedCsum, Offset, Size, Data)
|
|
||||||
catch
|
|
||||||
%% XXX FIXME: be more specific on badmatch that might
|
|
||||||
%% occur around line 593 when we write the checksum
|
|
||||||
%% file entry for the data blob we just put on the disk
|
|
||||||
error:Reason ->
|
|
||||||
{error, Reason}
|
|
||||||
end;
|
|
||||||
[{Offset, Size, TaggedCsum}] ->
|
|
||||||
case do_read(FHd, Filename, CsumTable, Offset, Size, false, false) of
|
|
||||||
{error, _} = E ->
|
|
||||||
lager:warning("This should never happen: got ~p while reading"
|
|
||||||
" at offset ~p in file ~p that's supposedly written",
|
|
||||||
[E, Offset, Filename]),
|
|
||||||
{error, server_insanity};
|
|
||||||
{ok, {[{_, Offset, Data, TaggedCsum}], _}} ->
|
|
||||||
%% TODO: what if different checksum got from do_read()?
|
|
||||||
ok;
|
|
||||||
{ok, _Other} ->
|
|
||||||
%% TODO: leave some debug/warning message here?
|
|
||||||
{error, written}
|
|
||||||
end;
|
|
||||||
[{Offset, Size, OtherCsum}] ->
|
|
||||||
%% Got a checksum, but it doesn't match the data block's
|
|
||||||
lager:error("During a potential write at offset ~p in file ~p,"
|
|
||||||
" a check for unwritten bytes gave us checksum ~p"
|
|
||||||
" but the data we were trying to write has checksum ~p",
|
|
||||||
[Offset, Filename, OtherCsum, TaggedCsum]),
|
|
||||||
{error, written};
|
|
||||||
_Chunks ->
|
|
||||||
%% TODO: Do we try to read all continuous chunks to see
|
|
||||||
%% wether its total checksum matches client-provided checksum?
|
|
||||||
case machi_csum_table:any_trimmed(CsumTable, Offset, Size) of
|
|
||||||
true ->
|
|
||||||
%% More than a byte is trimmed, besides, do we
|
|
||||||
%% have to return exact written bytes? No. Clients
|
|
||||||
%% must issue read_chunk() with needs_trimmed
|
|
||||||
%% option as true
|
|
||||||
{error, trimmed};
|
|
||||||
false ->
|
|
||||||
%% No byte is trimmed, but at least one byte is written
|
|
||||||
{error, written}
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
% @private Implements the disk writes for both the write and append
|
|
||||||
% operation.
|
|
||||||
-spec do_write( FHd :: file:io_device(),
|
|
||||||
CsumTable :: machi_csum_table:table(),
|
|
||||||
Filename :: string(),
|
|
||||||
TaggedCsum :: binary(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Size :: non_neg_integer(),
|
|
||||||
Data :: binary()
|
|
||||||
) -> ok | {error, Reason :: term()}.
|
|
||||||
do_write(FHd, CsumTable, Filename, TaggedCsum, Offset, Size, Data) ->
|
|
||||||
case file:pwrite(FHd, Offset, Data) of
|
|
||||||
ok ->
|
|
||||||
lager:debug("Successful write in file ~p at offset ~p, length ~p",
|
|
||||||
[Filename, Offset, Size]),
|
|
||||||
|
|
||||||
%% Overlapping chunk; calculate checksum
|
|
||||||
%% read {LOffset, Offset - LOffset} and make csum
|
|
||||||
%% as server_sha
|
|
||||||
LUpdate = maybe_regenerate_checksum(
|
|
||||||
FHd,
|
|
||||||
machi_csum_table:find_leftneighbor(CsumTable, Offset)),
|
|
||||||
RUpdate = maybe_regenerate_checksum(
|
|
||||||
FHd,
|
|
||||||
machi_csum_table:find_rightneighbor(CsumTable, Offset+Size)),
|
|
||||||
ok = machi_csum_table:write(CsumTable, Offset, Size,
|
|
||||||
TaggedCsum, LUpdate, RUpdate),
|
|
||||||
lager:debug("Successful write to checksum file for ~p",
|
|
||||||
[Filename]),
|
|
||||||
ok;
|
|
||||||
Other ->
|
|
||||||
lager:error("Got ~p during write to file ~p at offset ~p, length ~p",
|
|
||||||
[Other, Filename, Offset, Size]),
|
|
||||||
{error, Other}
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% @doc Trim both right and left border of chunks to fit in to given
|
|
||||||
%% range [LeftPos, RightPos]. TODO: write unit tests for this function.
|
|
||||||
|
|
||||||
%% Dialyzer 'can never match': slice_both_side([], _, _) ->
|
|
||||||
%% [];
|
|
||||||
slice_both_side([], _, _) ->
|
|
||||||
[];
|
|
||||||
slice_both_side([{F, Offset, Chunk, _Csum}|L], LeftPos, RightPos)
|
|
||||||
when Offset < LeftPos andalso LeftPos < RightPos ->
|
|
||||||
TrashLen = (LeftPos - Offset),
|
|
||||||
<<_:TrashLen/binary, NewChunk/binary>> = Chunk,
|
|
||||||
NewChecksum = machi_util:make_tagged_csum(?CSUM_TAG_SERVER_REGEN_SHA_ATOM, Chunk),
|
|
||||||
NewH = {F, LeftPos, NewChunk, NewChecksum},
|
|
||||||
slice_both_side([NewH|L], LeftPos, RightPos);
|
|
||||||
slice_both_side(Chunks, LeftPos, RightPos) when LeftPos =< RightPos ->
|
|
||||||
%% TODO: optimize
|
|
||||||
[{F, Offset, Chunk, _Csum}|L] = lists:reverse(Chunks),
|
|
||||||
Size = iolist_size(Chunk),
|
|
||||||
if RightPos < Offset + Size ->
|
|
||||||
NewSize = RightPos - Offset,
|
|
||||||
<<NewChunk:NewSize/binary, _/binary>> = Chunk,
|
|
||||||
NewChecksum = machi_util:make_tagged_csum(?CSUM_TAG_SERVER_REGEN_SHA_ATOM, Chunk),
|
|
||||||
lists:reverse([{F, Offset, NewChunk, NewChecksum}|L]);
|
|
||||||
true ->
|
|
||||||
Chunks
|
|
||||||
end.
|
|
||||||
|
|
||||||
maybe_regenerate_checksum(_, undefined) ->
|
|
||||||
undefined;
|
|
||||||
maybe_regenerate_checksum(_, {_, _, trimmed} = Change) ->
|
|
||||||
Change;
|
|
||||||
maybe_regenerate_checksum(FHd, {Offset, Size, _Csum}) ->
|
|
||||||
case file:pread(FHd, Offset, Size) of
|
|
||||||
eof ->
|
|
||||||
error({eof, Offset, Size});
|
|
||||||
{ok, Bytes} when byte_size(Bytes) =:= Size ->
|
|
||||||
|
|
||||||
TaggedCsum = machi_util:make_tagged_csum(server_regen_sha,
|
|
||||||
machi_util:checksum_chunk(Bytes)),
|
|
||||||
{Offset, Size, TaggedCsum};
|
|
||||||
Error ->
|
|
||||||
throw(Error)
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% GC: make sure unwritten bytes = [{Eof, infinity}] and Eof is > max
|
|
||||||
%% file size walk through the checksum table and make sure all chunks
|
|
||||||
%% trimmed Then unlink the file
|
|
||||||
-spec maybe_gc(term(), #state{}) ->
|
|
||||||
{reply, term(), #state{}} | {stop, normal, term(), #state{}}.
|
|
||||||
maybe_gc(Reply, S = #state{eof_position = Eof,
|
|
||||||
max_file_size = MaxFileSize}) when Eof < MaxFileSize ->
|
|
||||||
lager:debug("The file is still small; not trying GC (Eof, MaxFileSize) = (~p, ~p)~n",
|
|
||||||
[Eof, MaxFileSize]),
|
|
||||||
{reply, Reply, S};
|
|
||||||
maybe_gc(Reply, S = #state{fluname=FluName,
|
|
||||||
data_filehandle = FHd,
|
|
||||||
data_dir = DataDir,
|
|
||||||
filename = Filename,
|
|
||||||
eof_position = Eof,
|
|
||||||
csum_table=CsumTable}) ->
|
|
||||||
case machi_csum_table:all_trimmed(CsumTable, ?MINIMUM_OFFSET, Eof) of
|
|
||||||
true ->
|
|
||||||
lager:debug("GC? Let's do it: ~p.~n", [Filename]),
|
|
||||||
%% Before unlinking a file, it should inform
|
|
||||||
%% machi_flu_filename_mgr that this file is
|
|
||||||
%% deleted and mark it as "trimmed" to avoid
|
|
||||||
%% filename reuse and resurrection. Maybe garbage
|
|
||||||
%% will remain if a process crashed but it also
|
|
||||||
%% should be recovered at filename_mgr startup.
|
|
||||||
|
|
||||||
%% Also, this should be informed *before* file proxy
|
|
||||||
%% deletes files.
|
|
||||||
ok = machi_flu_metadata_mgr:trim_file(FluName, {file, Filename}),
|
|
||||||
ok = file:close(FHd),
|
|
||||||
{_, DPath} = machi_util:make_data_filename(DataDir, Filename),
|
|
||||||
ok = file:delete(DPath),
|
|
||||||
machi_csum_table:delete(CsumTable),
|
|
||||||
{stop, normal, Reply,
|
|
||||||
S#state{data_filehandle=undefined,
|
|
||||||
csum_table=undefined}};
|
|
||||||
false ->
|
|
||||||
{reply, Reply, S}
|
|
||||||
end.
|
|
||||||
|
|
||||||
close_files(State = #state{data_filehandle = FHd,
|
|
||||||
csum_table = T}) ->
|
|
||||||
case FHd of
|
|
||||||
undefined ->
|
|
||||||
noop; %% file deleted
|
|
||||||
_ ->
|
|
||||||
ok = file:sync(FHd),
|
|
||||||
ok = file:close(FHd)
|
|
||||||
end,
|
|
||||||
case T of
|
|
||||||
undefined ->
|
|
||||||
noop; %% file deleted
|
|
||||||
_ ->
|
|
||||||
ok = machi_csum_table:close(T)
|
|
||||||
end,
|
|
||||||
State#state{data_filehandle = undefined, csum_table = undefined}.
|
|
||||||
|
|
||||||
forward_late_messages(FluName, F, Timeout) ->
|
|
||||||
receive
|
|
||||||
M ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
Pid ! M;
|
|
||||||
{error, trimmed} ->
|
|
||||||
lager:error("TODO: FLU ~p file ~p reports trimmed status "
|
|
||||||
"when forwarding ~P\n",
|
|
||||||
[FluName, F, M, 20])
|
|
||||||
end,
|
|
||||||
forward_late_messages(FluName, F, Timeout)
|
|
||||||
after Timeout ->
|
|
||||||
ok
|
|
||||||
end.
|
|
|
@ -1,57 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc This is the main supervisor for the file proxies.
|
|
||||||
-module(machi_file_proxy_sup).
|
|
||||||
-behaviour(supervisor).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
-export([
|
|
||||||
child_spec/1,
|
|
||||||
start_link/1,
|
|
||||||
start_proxy/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% supervisor callback
|
|
||||||
-export([
|
|
||||||
init/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
child_spec(FluName) ->
|
|
||||||
Name = make_proxy_name(FluName),
|
|
||||||
{Name,
|
|
||||||
{?MODULE, start_link, [FluName]},
|
|
||||||
permanent, 5000, supervisor, [?MODULE]}.
|
|
||||||
|
|
||||||
start_link(FluName) ->
|
|
||||||
supervisor:start_link({local, make_proxy_name(FluName)}, ?MODULE, []).
|
|
||||||
|
|
||||||
start_proxy(FluName, DataDir, Filename) ->
|
|
||||||
supervisor:start_child(make_proxy_name(FluName),
|
|
||||||
[FluName, Filename, DataDir]).
|
|
||||||
|
|
||||||
init([]) ->
|
|
||||||
SupFlags = {simple_one_for_one, 1000, 10},
|
|
||||||
ChildSpec = {unused, {machi_file_proxy, start_link, []},
|
|
||||||
temporary, 2000, worker, [machi_file_proxy]},
|
|
||||||
{ok, {SupFlags, [ChildSpec]}}.
|
|
||||||
|
|
||||||
make_proxy_name(FluName) when is_atom(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_file_proxy_sup").
|
|
|
@ -1,473 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_fitness).
|
|
||||||
|
|
||||||
-behaviour(gen_server).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-endif. % TEST
|
|
||||||
|
|
||||||
-define(LWWREG, riak_dt_lwwreg).
|
|
||||||
-define(MAP, riak_dt_map).
|
|
||||||
|
|
||||||
-define(DELAY_TIME_MS, 300). % TODO make configurable!
|
|
||||||
|
|
||||||
%% API
|
|
||||||
-export([start_link/1,
|
|
||||||
get_unfit_list/1, update_local_down_list/3,
|
|
||||||
add_admin_down/3, delete_admin_down/2,
|
|
||||||
send_fitness_update_spam/3,
|
|
||||||
send_spam_to_everyone/1,
|
|
||||||
trigger_early_adjustment/2]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
|
||||||
terminate/2, code_change/3, format_status/2]).
|
|
||||||
|
|
||||||
-record(state, {
|
|
||||||
my_flu_name :: atom() | binary(),
|
|
||||||
reg_name :: atom(),
|
|
||||||
local_down=[] :: list(),
|
|
||||||
admin_down=[] :: list({term(),term()}),
|
|
||||||
members_dict=orddict:new() :: orddict:orddict(),
|
|
||||||
proxies_dict=orddict:new() :: orddict:orddict(),
|
|
||||||
active_unfit=[] :: list(),
|
|
||||||
pending_map=?MAP:new() :: ?MAP:riak_dt_map(),
|
|
||||||
partition_simulator_p :: boolean()
|
|
||||||
}).
|
|
||||||
|
|
||||||
start_link(Args) ->
|
|
||||||
gen_server:start_link(?MODULE, Args, []).
|
|
||||||
|
|
||||||
get_unfit_list(PidSpec) ->
|
|
||||||
gen_server:call(PidSpec, {get_unfit_list}, infinity).
|
|
||||||
|
|
||||||
update_local_down_list(PidSpec, Down, MembersDict) ->
|
|
||||||
gen_server:call(PidSpec, {update_local_down_list, Down, MembersDict},
|
|
||||||
infinity).
|
|
||||||
|
|
||||||
add_admin_down(PidSpec, DownFLU, DownProps) ->
|
|
||||||
gen_server:call(PidSpec, {add_admin_down, DownFLU, DownProps},
|
|
||||||
infinity).
|
|
||||||
|
|
||||||
delete_admin_down(PidSpec, DownFLU) ->
|
|
||||||
gen_server:call(PidSpec, {delete_admin_down, DownFLU},
|
|
||||||
infinity).
|
|
||||||
|
|
||||||
send_fitness_update_spam(Pid, FromName, Dict) ->
|
|
||||||
gen_server:call(Pid, {incoming_spam, FromName, Dict}, infinity).
|
|
||||||
|
|
||||||
send_spam_to_everyone(Pid) ->
|
|
||||||
gen_server:call(Pid, {send_spam_to_everyone}, infinity).
|
|
||||||
|
|
||||||
%% @doc For testing purposes, we don't want a test to wait for
|
|
||||||
%% wall-clock time to elapse before the fitness server makes a
|
|
||||||
%% down->up status decision.
|
|
||||||
|
|
||||||
trigger_early_adjustment(Pid, FLU) ->
|
|
||||||
Pid ! {adjust_down_list, FLU}.
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
init([{MyFluName}|Args]) ->
|
|
||||||
RegName = machi_flu_psup:make_fitness_regname(MyFluName),
|
|
||||||
register(RegName, self()),
|
|
||||||
{ok, _} = timer:send_interval(5000, debug_dump),
|
|
||||||
UseSimulatorP = proplists:get_value(use_partition_simulator, Args, false),
|
|
||||||
{ok, #state{my_flu_name=MyFluName, reg_name=RegName,
|
|
||||||
partition_simulator_p=UseSimulatorP,
|
|
||||||
local_down=[complete_bogus_here_to_trigger_initial_spam]
|
|
||||||
}}.
|
|
||||||
|
|
||||||
handle_call({get_unfit_list}, _From, #state{active_unfit=ActiveUnfit}=S) ->
|
|
||||||
Reply = ActiveUnfit,
|
|
||||||
{reply, Reply, S};
|
|
||||||
handle_call({update_local_down_list, Down, MembersDict}, _From,
|
|
||||||
#state{my_flu_name=MyFluName, pending_map=OldMap,
|
|
||||||
local_down=OldDown, members_dict=OldMembersDict,
|
|
||||||
admin_down=AdminDown}=S) ->
|
|
||||||
verbose("FITNESS: ~w has down suspect ~w\n", [MyFluName, Down]),
|
|
||||||
NewMap = store_in_map(OldMap, MyFluName, erlang:now(), Down,
|
|
||||||
AdminDown, [props_yo]),
|
|
||||||
S2 = if Down == OldDown, MembersDict == OldMembersDict ->
|
|
||||||
%% Do nothing only if both are equal. If members_dict is
|
|
||||||
%% changing, that's sufficient reason to spam.
|
|
||||||
S;
|
|
||||||
true ->
|
|
||||||
do_map_change(NewMap, [MyFluName], MembersDict, S)
|
|
||||||
end,
|
|
||||||
{reply, ok, S2#state{local_down=Down}};
|
|
||||||
handle_call({add_admin_down, DownFLU, DownProps}, _From,
|
|
||||||
#state{my_flu_name=MyFluName,
|
|
||||||
local_down=OldDown, admin_down=AdminDown}=S) ->
|
|
||||||
verbose("FITNESS: ~w add admin down ~w\n", [MyFluName, DownFLU]),
|
|
||||||
NewAdminDown = [{DownFLU,DownProps}|lists:keydelete(DownFLU, 1, AdminDown)],
|
|
||||||
S3 = finish_admin_down(erlang:now(), OldDown, NewAdminDown,
|
|
||||||
[props_yo], S),
|
|
||||||
{reply, ok, S3};
|
|
||||||
handle_call({delete_admin_down, DownFLU}, _From,
|
|
||||||
#state{my_flu_name=MyFluName,
|
|
||||||
local_down=OldDown, admin_down=AdminDown}=S) ->
|
|
||||||
verbose("FITNESS: ~w delete admin down ~w\n", [MyFluName, DownFLU]),
|
|
||||||
NewAdminDown = lists:keydelete(DownFLU, 1, AdminDown),
|
|
||||||
S3 = finish_admin_down(erlang:now(), OldDown, NewAdminDown,
|
|
||||||
[props_yo], S),
|
|
||||||
{reply, ok, S3};
|
|
||||||
handle_call({incoming_spam, Author, Dict}, _From, S) ->
|
|
||||||
{Res, S2} = do_incoming_spam(Author, Dict, S),
|
|
||||||
{reply, Res, S2};
|
|
||||||
handle_call({send_spam_to_everyone}, _From, S) ->
|
|
||||||
{Res, S2} = do_send_spam_to_everyone(S),
|
|
||||||
{reply, Res, S2};
|
|
||||||
handle_call(_Request, _From, S) ->
|
|
||||||
Reply = whhhhhhhhhhhhhhaaaaaaaaaaaaaaa,
|
|
||||||
{reply, Reply, S}.
|
|
||||||
|
|
||||||
handle_cast(_Msg, S) ->
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
handle_info({adjust_down_list, FLU}, #state{my_flu_name=MyFluName,
|
|
||||||
active_unfit=ActiveUnfit}=S) ->
|
|
||||||
NewUnfit = make_unfit_list(S),
|
|
||||||
Added_to_new = NewUnfit -- ActiveUnfit,
|
|
||||||
Dropped_from_new = ActiveUnfit -- NewUnfit,
|
|
||||||
%% io:format(user, "adjust_down_list: ~w: adjust ~w: add ~p drop ~p\n", [S#state.my_flu_name, FLU, Added_to_new, Dropped_from_new]),
|
|
||||||
%% We need to schedule a new round of adjustment messages. They might
|
|
||||||
%% be redundant, or they might not. Here's a case where the current
|
|
||||||
%% code needs the extra:
|
|
||||||
%%
|
|
||||||
%% SET partitions = [{a,c},{b,c},{c,b}] (11 of 26) at {23,37,44}
|
|
||||||
%% We are stable spam/gossip at:
|
|
||||||
%% [{a,problem_with,b},{b,problem_with,c},
|
|
||||||
%% {c,problem_with,a},{c,problem_with,b}]
|
|
||||||
%% So everyone agrees unfit=[c].
|
|
||||||
%%
|
|
||||||
%% SET partitions = [{c,a}] (12 of 26) at {23,37,48}
|
|
||||||
%% We are stable spam/gossip at:
|
|
||||||
%% [{a,problem_with,c},{c,problem_with,a}]
|
|
||||||
%% So everyone *ought* to agree that unfit=[a].
|
|
||||||
%%
|
|
||||||
%% In this case, when the partition list changes to [{c,a}],
|
|
||||||
%% then we will discover via spam gossip that reports by B & C will
|
|
||||||
%% change. However, our calc_unfit() via
|
|
||||||
%% make_unfit_list() algorithm will decide that *a* is the bad guy
|
|
||||||
%% and needs to go into our active_unfit list! And the only way
|
|
||||||
%% to get added is via an {adjust_down_list,...} message. The
|
|
||||||
%% usual place for generating them isn't wise enough because it
|
|
||||||
%% doesn't call make_unfit_list().
|
|
||||||
%%
|
|
||||||
%% The cost is that there will (at least) a 2x delay to the
|
|
||||||
%% ?DELAY_TIME_MS waiting period to detect all partitions.
|
|
||||||
%%
|
|
||||||
%% Aside: for all I know right now, there may be a corner case
|
|
||||||
%% hiding where we need this extra round of messages to *remove* a
|
|
||||||
%% FLU from the active_unfit list?
|
|
||||||
|
|
||||||
_ = schedule_adjust_messages(lists:usort(Added_to_new ++ Dropped_from_new)),
|
|
||||||
case {lists:member(FLU,Added_to_new), lists:member(FLU,Dropped_from_new)} of
|
|
||||||
{true, true} ->
|
|
||||||
error({bad, ?MODULE, ?LINE, FLU, ActiveUnfit, NewUnfit});
|
|
||||||
{true, false} ->
|
|
||||||
NewActive = wrap_active(MyFluName,lists:usort(ActiveUnfit++[FLU])),
|
|
||||||
{noreply, S#state{active_unfit=NewActive}};
|
|
||||||
{false, true} ->
|
|
||||||
NewActive = wrap_active(MyFluName,ActiveUnfit--[FLU]),
|
|
||||||
{noreply, S#state{active_unfit=NewActive}};
|
|
||||||
{false, false} ->
|
|
||||||
{noreply, S}
|
|
||||||
end;
|
|
||||||
handle_info(debug_dump, #state{my_flu_name=_MyFluName,active_unfit=_ActiveUnfit,
|
|
||||||
pending_map=_Map}=S) ->
|
|
||||||
%% io:format(user, "DUMP: ~w/~w: ~p ~W\n", [_MyFluName, self(), _ActiveUnfit, map_value(_Map), 13]),
|
|
||||||
%% io:format(user, "DUMP ~w: ~w, ", [MyFluName, ActiveUnfit]),
|
|
||||||
{noreply, S};
|
|
||||||
handle_info(_Info, S) ->
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
terminate(_Reason, _S) ->
|
|
||||||
ok.
|
|
||||||
|
|
||||||
format_status(_Opt, [_PDict, Status]) ->
|
|
||||||
Fields = record_info(fields, state),
|
|
||||||
[_Name | Values] = tuple_to_list(Status),
|
|
||||||
lists:zip(Fields, Values).
|
|
||||||
|
|
||||||
code_change(_OldVsn, S, _Extra) ->
|
|
||||||
{ok, S}.
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
make_unfit_list(#state{members_dict=MembersDict}=S) ->
|
|
||||||
Now = erlang:now(),
|
|
||||||
F = fun({Server, {UpdateTime, DownList, AdminDown, _Props}},
|
|
||||||
{ProblemAcc, AdminAcc}) ->
|
|
||||||
case timer:now_diff(Now, UpdateTime) div (1000*1000) of
|
|
||||||
N when N > 900 -> % TODO make configurable
|
|
||||||
{ProblemAcc, AdminAcc};
|
|
||||||
_ ->
|
|
||||||
Probs = [{Server,problem_with,D} || D <- DownList],
|
|
||||||
{[Probs|ProblemAcc], AdminDown++AdminAcc}
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
{Problems0, AdminDown} = map_fold(F, {[], []}, S#state.pending_map),
|
|
||||||
Problems = lists:flatten(Problems0),
|
|
||||||
All_list = [K || {K,_V} <- orddict:to_list(MembersDict)],
|
|
||||||
Unfit = calc_unfit(All_list, Problems),
|
|
||||||
lists:usort(Unfit ++ AdminDown).
|
|
||||||
|
|
||||||
store_in_map(Map, Name, Now, Down, AdminDown, Props) ->
|
|
||||||
{AdminDownServers, AdminDownProps0} = lists:unzip(AdminDown),
|
|
||||||
AdminDownProps = lists:append(AdminDownProps0), % flatten one level
|
|
||||||
Val = {Now, Down, AdminDownServers, Props ++ AdminDownProps},
|
|
||||||
map_set(Name, Map, Name, Val).
|
|
||||||
|
|
||||||
send_spam(NewMap, DontSendList, MembersDict, #state{my_flu_name=MyFluName}=S) ->
|
|
||||||
Send = fun(FLU, #p_srvr{address=Host, port=TcpPort}) ->
|
|
||||||
SpamProj = machi_projection:update_checksum(
|
|
||||||
#projection_v1{epoch_number=?SPAM_PROJ_EPOCH,
|
|
||||||
author_server=MyFluName,
|
|
||||||
dbg=[NewMap],
|
|
||||||
%% stuff only to make PB happy
|
|
||||||
all_members=[],
|
|
||||||
witnesses=[],
|
|
||||||
creation_time={1,2,3},
|
|
||||||
mode=ap_mode,
|
|
||||||
upi=[], repairing=[], down=[],
|
|
||||||
dbg2=[],
|
|
||||||
members_dict=[] }),
|
|
||||||
%% Best effort, don't care about failure.
|
|
||||||
spawn(fun() ->
|
|
||||||
send_projection(FLU, Host, TcpPort, SpamProj, S)
|
|
||||||
end)
|
|
||||||
end,
|
|
||||||
F = fun(FLU, P_srvr, Acc) ->
|
|
||||||
case lists:member(FLU, DontSendList) of
|
|
||||||
true ->
|
|
||||||
Acc;
|
|
||||||
false ->
|
|
||||||
Send(FLU, P_srvr),
|
|
||||||
[FLU|Acc]
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
_Sent = orddict:fold(F, [], MembersDict),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
send_projection(FLU, _Host, _TcpPort, SpamProj,
|
|
||||||
#state{my_flu_name=MyFluName, members_dict=MembersDict,
|
|
||||||
partition_simulator_p=SimulatorP}=S) ->
|
|
||||||
%% At the moment, we're using utterly-temporary-hack method of tunneling
|
|
||||||
%% our messages through the write_projection API. Eventually the PB
|
|
||||||
%% API should be expanded to accomodate this new fitness service.
|
|
||||||
%% This is "best effort" only, use catch to ignore failures.
|
|
||||||
ProxyPid = (catch proxy_pid(FLU, S)),
|
|
||||||
DoIt = fun(_ArgIgnored) ->
|
|
||||||
machi_proxy_flu1_client:write_projection(ProxyPid,
|
|
||||||
public, SpamProj)
|
|
||||||
end,
|
|
||||||
ProxyPidPlaceholder = proxy_pid_unused,
|
|
||||||
if SimulatorP ->
|
|
||||||
AllMembers = [K || {K,_V} <- orddict:to_list(MembersDict)],
|
|
||||||
{Partitions, _Islands} = machi_partition_simulator:get(AllMembers),
|
|
||||||
machi_chain_manager1:init_remember_down_list(),
|
|
||||||
Res = (catch machi_chain_manager1:perhaps_call(ProxyPidPlaceholder,
|
|
||||||
MyFluName,
|
|
||||||
Partitions, FLU, DoIt)),
|
|
||||||
%% case machi_chain_manager1:get_remember_down_list() of
|
|
||||||
%% [] ->
|
|
||||||
%% ok;
|
|
||||||
%% _ ->
|
|
||||||
%% io:format(user, "fitness error ~w -> ~w\n",
|
|
||||||
%% [MyFluName, FLU])
|
|
||||||
%% end,
|
|
||||||
Res;
|
|
||||||
true ->
|
|
||||||
(catch DoIt(ProxyPidPlaceholder))
|
|
||||||
end.
|
|
||||||
|
|
||||||
proxy_pid(Name, #state{proxies_dict=ProxiesDict}) ->
|
|
||||||
orddict:fetch(Name, ProxiesDict).
|
|
||||||
|
|
||||||
calc_unfit(All_list, HosedAnnotations) ->
|
|
||||||
G = digraph:new(),
|
|
||||||
_ = [digraph:add_vertex(G, V) || V <- All_list],
|
|
||||||
_ = [digraph:add_edge(G, V1, V2) || {V1, problem_with, V2} <- HosedAnnotations],
|
|
||||||
calc_unfit2(lists:sort(digraph:vertices(G)), G).
|
|
||||||
|
|
||||||
calc_unfit2([], G) ->
|
|
||||||
digraph:delete(G),
|
|
||||||
[];
|
|
||||||
calc_unfit2([H|T], G) ->
|
|
||||||
case digraph:in_degree(G, H) of
|
|
||||||
0 ->
|
|
||||||
calc_unfit2(T, G);
|
|
||||||
1 ->
|
|
||||||
Neighbors = digraph:in_neighbours(G, H),
|
|
||||||
case [V || V <- Neighbors, digraph:in_degree(G, V) == 1] of
|
|
||||||
[AlsoOne|_] ->
|
|
||||||
%% TODO: be smarter here about the choice of which is down.
|
|
||||||
[H|calc_unfit2(T -- [AlsoOne], G)];
|
|
||||||
[] ->
|
|
||||||
%% H is "on the end", e.g. 1-2-1, so it's OK.
|
|
||||||
calc_unfit2(T, G)
|
|
||||||
end;
|
|
||||||
N when N > 1 ->
|
|
||||||
[H|calc_unfit2(T, G)]
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_incoming_spam(_Author, Map,
|
|
||||||
#state{my_flu_name=MyFluName,pending_map=OldMap,
|
|
||||||
members_dict=MembersDict}=S) ->
|
|
||||||
OldMapV = map_value(OldMap),
|
|
||||||
MapV = map_value(Map),
|
|
||||||
if MapV == OldMapV ->
|
|
||||||
{ok, S};
|
|
||||||
true ->
|
|
||||||
%% io:format(user, "YY1 ~p\n", [OldMapV]),
|
|
||||||
%% io:format(user, "YY2 ~p\n", [MapV]),
|
|
||||||
NewMap = map_merge(OldMap, Map),
|
|
||||||
%% NewMapV = map_value(NewMap),
|
|
||||||
%% io:format(user, "YY3 ~p\n", [NewMapV]),
|
|
||||||
|
|
||||||
%% Hrm, we may have changes that are interesting to the
|
|
||||||
%% Author of this update, so perhaps we shouldn't exclude
|
|
||||||
%% Author from our update, right?
|
|
||||||
S2 = do_map_change(NewMap, [MyFluName], MembersDict, S),
|
|
||||||
{ok, S2}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_send_spam_to_everyone(#state{my_flu_name=MyFluName,
|
|
||||||
pending_map=Map,members_dict=MembersDict}=S) ->
|
|
||||||
_ = send_spam(Map, [MyFluName], MembersDict, S),
|
|
||||||
{ok, S}.
|
|
||||||
|
|
||||||
do_map_change(NewMap, DontSendList, MembersDict,
|
|
||||||
#state{my_flu_name=_MyFluName, pending_map=OldMap}=S) ->
|
|
||||||
send_spam(NewMap, DontSendList, MembersDict, S),
|
|
||||||
ChangedServers = find_changed_servers(OldMap, NewMap, _MyFluName),
|
|
||||||
_ = schedule_adjust_messages(ChangedServers),
|
|
||||||
%% _OldMapV = map_value(OldMap),
|
|
||||||
%% _MapV = map_value(NewMap),
|
|
||||||
%% io:format(user, "TODO: ~w async tick trigger/scheduling... ~w for:\n"
|
|
||||||
%% " ~p\n ~p\n",[_MyFluName,ChangedServers,_OldMapV,_MapV]),
|
|
||||||
S2 = perhaps_adjust_members_proxies_dicts(MembersDict, S),
|
|
||||||
S2#state{pending_map=NewMap}.
|
|
||||||
|
|
||||||
perhaps_adjust_members_proxies_dicts(SameMembersDict,
|
|
||||||
#state{members_dict=SameMembersDict}=S) ->
|
|
||||||
S;
|
|
||||||
perhaps_adjust_members_proxies_dicts(MembersDict,
|
|
||||||
#state{proxies_dict=OldProxiesDict}=S) ->
|
|
||||||
_ = machi_proxy_flu1_client:stop_proxies(OldProxiesDict),
|
|
||||||
ProxiesDict = machi_proxy_flu1_client:start_proxies(MembersDict),
|
|
||||||
S#state{members_dict=MembersDict, proxies_dict=ProxiesDict}.
|
|
||||||
|
|
||||||
find_changed_servers(OldMap, NewMap, _MyFluName) ->
|
|
||||||
AddBad = fun({_Who, {_Time, BadList, AdminDown, _Props}}, Acc) ->
|
|
||||||
BadList ++ AdminDown ++ Acc
|
|
||||||
end,
|
|
||||||
OldBad = map_fold(AddBad, [], OldMap),
|
|
||||||
NewBad = map_fold(AddBad, [], NewMap),
|
|
||||||
lists:usort((OldBad -- NewBad) ++ (NewBad -- OldBad)).
|
|
||||||
|
|
||||||
schedule_adjust_messages(FLU_list) ->
|
|
||||||
[erlang:send_after(?DELAY_TIME_MS, self(), {adjust_down_list, FLU}) ||
|
|
||||||
FLU <- FLU_list].
|
|
||||||
|
|
||||||
finish_admin_down(Time, Down, NewAdminDown, Props,
|
|
||||||
#state{my_flu_name=MyFluName, local_down=Down,
|
|
||||||
pending_map=OldMap, members_dict=MembersDict}=S) ->
|
|
||||||
NewMap = store_in_map(OldMap, MyFluName, Time, Down, NewAdminDown, Props),
|
|
||||||
S2 = S#state{admin_down=NewAdminDown},
|
|
||||||
do_map_change(NewMap, [MyFluName], MembersDict, S2).
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
map_set(Actor, Map, Key, ValTerm) ->
|
|
||||||
Field = {Key, ?LWWREG},
|
|
||||||
Val = term_to_binary(ValTerm),
|
|
||||||
{ok, Map2} = ?MAP:update({update, [{update, Field, {assign, Val}}]},
|
|
||||||
Actor, Map),
|
|
||||||
Map2.
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
map_get(Map, Key) ->
|
|
||||||
Field = {Key, ?LWWREG},
|
|
||||||
case lists:keyfind(Field, 1, ?MAP:value(Map)) of
|
|
||||||
false ->
|
|
||||||
error;
|
|
||||||
{Field, ValBin} ->
|
|
||||||
{ok, binary_to_term(ValBin)}
|
|
||||||
end.
|
|
||||||
-endif. % TEST
|
|
||||||
|
|
||||||
map_fold(Fun, Acc, Map) ->
|
|
||||||
Vs = map_value(Map),
|
|
||||||
lists:foldl(Fun, Acc, lists:sort(Vs)).
|
|
||||||
|
|
||||||
map_value(Map) ->
|
|
||||||
lists:sort([{K, binary_to_term(V)} || {{K, _Type}, V} <- ?MAP:value(Map)]).
|
|
||||||
|
|
||||||
map_merge(Map1, Map2) ->
|
|
||||||
?MAP:merge(Map1, Map2).
|
|
||||||
|
|
||||||
wrap_active(MyFluName, L) ->
|
|
||||||
verbose("FITNESS: ~w has new down list ~w\n", [MyFluName, L]),
|
|
||||||
L.
|
|
||||||
|
|
||||||
verbose(Fmt, Args) ->
|
|
||||||
case application:get_env(machi, fitness_verbose) of
|
|
||||||
{ok, true} ->
|
|
||||||
error_logger:info_msg(Fmt, Args);
|
|
||||||
_ ->
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
dt_understanding_test() ->
|
|
||||||
F1 = {'X', riak_dt_lwwreg},
|
|
||||||
F2 = {'Y', riak_dt_lwwreg},
|
|
||||||
{ok, Map1} = ?MAP:update({update, [{update, F1, {assign, <<"A">>}}]}, a, ?MAP:new()),
|
|
||||||
{ok, Map2} = ?MAP:update({update, [{update, F2, {assign, <<"B2">>}}]}, b, ?MAP:new()),
|
|
||||||
|
|
||||||
%% io:format(user, "\n", []),
|
|
||||||
%% io:format(user, "Merge comparison: ~p\n", [?MAP:merge(Map1, Map2) == ?MAP:merge(Map2, Map1)]),
|
|
||||||
%% io:format(user, "M12 Val: ~p\n", [?MAP:value(?MAP:merge(Map1, Map2))]),
|
|
||||||
%% io:format(user, "M21 Val: ~p\n", [?MAP:value(?MAP:merge(Map2, Map1))]),
|
|
||||||
?MAP:merge(Map1, Map2) == ?MAP:merge(Map2, Map1).
|
|
||||||
|
|
||||||
smoke_test() ->
|
|
||||||
Map1 = map_set(a, ?MAP:new(), k1, val1),
|
|
||||||
Map2 = map_set(a, Map1, k2, val2),
|
|
||||||
{ok, val1} = map_get(Map2, k1),
|
|
||||||
{ok, val2} = map_get(Map2, k2),
|
|
||||||
error = map_get(Map2, does_not_exist),
|
|
||||||
Map3 = map_set(a, Map2, k3, val3),
|
|
||||||
|
|
||||||
[{k3,1},{k2,1},{k1,1}] = map_fold(fun({K,_}, Acc) -> [{K,1}|Acc] end,
|
|
||||||
[], Map3),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-endif. % TEST
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,193 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc Machi FLU1 append serialization server process
|
|
||||||
|
|
||||||
-module(machi_flu1_append_server).
|
|
||||||
|
|
||||||
-behavior(gen_server).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-endif. % TEST
|
|
||||||
|
|
||||||
-export([start_link/4]).
|
|
||||||
|
|
||||||
-export([init/1]).
|
|
||||||
-export([handle_call/3, handle_cast/2, handle_info/2,
|
|
||||||
terminate/2, code_change/3]).
|
|
||||||
-export([int_update_wedge_state/3, int_wedge_myself/2]).
|
|
||||||
-export([current_state/1, format_state/1]).
|
|
||||||
|
|
||||||
-record(state, {
|
|
||||||
flu_name :: atom(),
|
|
||||||
witness = false :: boolean(),
|
|
||||||
wedged = true :: boolean(),
|
|
||||||
etstab :: ets:tid(),
|
|
||||||
epoch_id :: 'undefined' | machi_dt:epoch_id()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-define(INIT_TIMEOUT, 60*1000).
|
|
||||||
-define(CALL_TIMEOUT, 60*1000).
|
|
||||||
|
|
||||||
-spec start_link(pv1_server(), boolean(), boolean(),
|
|
||||||
undefined | machi_dt:epoch_id()) -> {ok, pid()}.
|
|
||||||
start_link(Fluname, Witness_p, Wedged_p, EpochId) ->
|
|
||||||
%% Reminder: Name is the "main" name of the FLU, i.e., no suffix
|
|
||||||
gen_server:start_link({local, Fluname},
|
|
||||||
?MODULE, [Fluname, Witness_p, Wedged_p, EpochId],
|
|
||||||
[{timeout, ?INIT_TIMEOUT}]).
|
|
||||||
|
|
||||||
-spec current_state(atom() | pid()) -> term().
|
|
||||||
current_state(PidSpec) ->
|
|
||||||
gen_server:call(PidSpec, current_state, ?CALL_TIMEOUT).
|
|
||||||
|
|
||||||
format_state(State) ->
|
|
||||||
Fields = record_info(fields, state),
|
|
||||||
[_Name | Values] = tuple_to_list(State),
|
|
||||||
lists:zip(Fields, Values).
|
|
||||||
|
|
||||||
int_update_wedge_state(PidSpec, Boolean, EpochId)
|
|
||||||
when is_boolean(Boolean), is_tuple(EpochId) ->
|
|
||||||
gen_server:cast(PidSpec, {wedge_state_change, Boolean, EpochId}).
|
|
||||||
|
|
||||||
int_wedge_myself(PidSpec, EpochId)
|
|
||||||
when is_tuple(EpochId) ->
|
|
||||||
gen_server:cast(PidSpec, {wedge_myself, EpochId}).
|
|
||||||
|
|
||||||
init([Fluname, Witness_p, Wedged_p, EpochId]) ->
|
|
||||||
TID = ets:new(machi_flu1:ets_table_name(Fluname),
|
|
||||||
[set, protected, named_table, {read_concurrency, true}]),
|
|
||||||
ets:insert(TID, {epoch, {Wedged_p, EpochId}}),
|
|
||||||
{ok, #state{flu_name=Fluname, witness=Witness_p, wedged=Wedged_p,
|
|
||||||
etstab=TID, epoch_id=EpochId}}.
|
|
||||||
|
|
||||||
handle_call({seq_append, _From2, _NSInfo, _EpochID, _Prefix, _Chunk, _TCSum, _Opts},
|
|
||||||
_From, #state{witness=true}=S) ->
|
|
||||||
%% The FLU's machi_flu1_net_server process ought to filter all
|
|
||||||
%% witness states, but we'll keep this clause for extra
|
|
||||||
%% paranoia.
|
|
||||||
{reply, witness, S};
|
|
||||||
handle_call({seq_append, _From2, _NSInfo, _EpochID, _Prefix, _Chunk, _TCSum, _Opts},
|
|
||||||
_From, #state{wedged=true}=S) ->
|
|
||||||
{reply, wedged, S};
|
|
||||||
handle_call({seq_append, _From2, NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, TCSum, Opts},
|
|
||||||
From, #state{flu_name=FluName, epoch_id=OldEpochId}=S) ->
|
|
||||||
%% Old is the one from our state, plain old 'EpochID' comes
|
|
||||||
%% from the client.
|
|
||||||
_ = case OldEpochId of
|
|
||||||
EpochID ->
|
|
||||||
spawn(fun() ->
|
|
||||||
append_server_dispatch(From, NSInfo,
|
|
||||||
Prefix, Chunk, TCSum, Opts,
|
|
||||||
FluName, EpochID)
|
|
||||||
end),
|
|
||||||
{noreply, S};
|
|
||||||
_ ->
|
|
||||||
{reply, {error, bad_epoch}, S}
|
|
||||||
end;
|
|
||||||
%% TODO: Who sends this message?
|
|
||||||
handle_call(wedge_status, _From,
|
|
||||||
#state{wedged=Wedged_p, epoch_id=EpochId} = S) ->
|
|
||||||
{reply, {wedge_status_reply, Wedged_p, EpochId}, S};
|
|
||||||
handle_call(current_state, _From, S) ->
|
|
||||||
{reply, S, S};
|
|
||||||
handle_call(Else, From, S) ->
|
|
||||||
io:format(user, "~s:handle_call: WHA? from=~w ~w\n", [?MODULE, From, Else]),
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
handle_cast({wedge_myself, WedgeEpochId},
|
|
||||||
#state{flu_name=FluName, wedged=Wedged_p, epoch_id=OldEpochId}=S) ->
|
|
||||||
if not Wedged_p andalso WedgeEpochId == OldEpochId ->
|
|
||||||
true = ets:insert(S#state.etstab,
|
|
||||||
{epoch, {true, OldEpochId}}),
|
|
||||||
%% Tell my chain manager that it might want to react to
|
|
||||||
%% this new world.
|
|
||||||
Chmgr = machi_chain_manager1:make_chmgr_regname(FluName),
|
|
||||||
spawn(fun() ->
|
|
||||||
catch machi_chain_manager1:trigger_react_to_env(Chmgr)
|
|
||||||
end),
|
|
||||||
{noreply, S#state{wedged=true}};
|
|
||||||
true ->
|
|
||||||
{noreply, S}
|
|
||||||
end;
|
|
||||||
handle_cast({wedge_state_change, Boolean, {NewEpoch, _}=NewEpochId},
|
|
||||||
#state{epoch_id=OldEpochId}=S) ->
|
|
||||||
OldEpoch = case OldEpochId of {OldE, _} -> OldE;
|
|
||||||
undefined -> -1
|
|
||||||
end,
|
|
||||||
if NewEpoch >= OldEpoch ->
|
|
||||||
true = ets:insert(S#state.etstab,
|
|
||||||
{epoch, {Boolean, NewEpochId}}),
|
|
||||||
{noreply, S#state{wedged=Boolean, epoch_id=NewEpochId}};
|
|
||||||
true ->
|
|
||||||
{noreply, S}
|
|
||||||
end;
|
|
||||||
handle_cast(Else, S) ->
|
|
||||||
io:format(user, "~s:handle_cast: WHA? ~p\n", [?MODULE, Else]),
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
handle_info(Else, S) ->
|
|
||||||
io:format(user, "~s:handle_info: WHA? ~p\n", [?MODULE, Else]),
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
terminate(normal, _S) ->
|
|
||||||
ok;
|
|
||||||
terminate(Reason, _S) ->
|
|
||||||
lager:warning("~s:terminate: ~w", [?MODULE, Reason]),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
code_change(_OldVsn, S, _Extra) ->
|
|
||||||
{ok, S}.
|
|
||||||
|
|
||||||
append_server_dispatch(From, NSInfo,
|
|
||||||
Prefix, Chunk, TCSum, Opts, FluName, EpochId) ->
|
|
||||||
Result = case handle_append(NSInfo,
|
|
||||||
Prefix, Chunk, TCSum, Opts, FluName, EpochId) of
|
|
||||||
{ok, File, Offset} ->
|
|
||||||
{assignment, Offset, File};
|
|
||||||
Other ->
|
|
||||||
Other
|
|
||||||
end,
|
|
||||||
_ = gen_server:reply(From, Result),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
handle_append(NSInfo,
|
|
||||||
Prefix, Chunk, TCSum, Opts, FluName, EpochId) ->
|
|
||||||
Res = machi_flu_filename_mgr:find_or_make_filename_from_prefix(
|
|
||||||
FluName, EpochId, {prefix, Prefix}, NSInfo),
|
|
||||||
case Res of
|
|
||||||
{file, F} ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
{Tag, CS} = machi_util:unmake_tagged_csum(TCSum),
|
|
||||||
Meta = [{client_csum_tag, Tag}, {client_csum, CS}],
|
|
||||||
Extra = Opts#append_opts.chunk_extra,
|
|
||||||
machi_file_proxy:append(Pid, Meta, Extra, Chunk);
|
|
||||||
{error, trimmed} = E ->
|
|
||||||
E
|
|
||||||
end;
|
|
||||||
Error ->
|
|
||||||
Error
|
|
||||||
end.
|
|
|
@ -38,71 +38,6 @@
|
||||||
%% TODO This EDoc was written first, and the EDoc and also `-type' and
|
%% TODO This EDoc was written first, and the EDoc and also `-type' and
|
||||||
%% `-spec' definitions for {@link machi_proxy_flu1_client} and {@link
|
%% `-spec' definitions for {@link machi_proxy_flu1_client} and {@link
|
||||||
%% machi_cr_client} must be improved.
|
%% machi_cr_client} must be improved.
|
||||||
%%
|
|
||||||
%% == Client API implementation notes ==
|
|
||||||
%%
|
|
||||||
%% At the moment, there are several modules that implement various
|
|
||||||
%% subsets of the Machi API. The table below attempts to show how and
|
|
||||||
%% why they differ.
|
|
||||||
%%
|
|
||||||
%% ```
|
|
||||||
%% |--------------------------+-------+-----+------+------+-------+----------------|
|
|
||||||
%% | | PB | | # | | Conn | Epoch & NS |
|
|
||||||
%% | Module name | Level | CR? | FLUS | Impl | Life? | version aware? |
|
|
||||||
%% |--------------------------+-------+-----+------+------+-------+----------------|
|
|
||||||
%% | machi_pb_high_api_client | high | yes | many | proc | long | no |
|
|
||||||
%% | machi_cr_client | low | yes | many | proc | long | no |
|
|
||||||
%% | machi_proxy_flu1_client | low | no | 1 | proc | long | yes |
|
|
||||||
%% | machi_flu1_client | low | no | 1 | lib | short | yes |
|
|
||||||
%% |--------------------------+-------+-----+------+------+-------+----------------|
|
|
||||||
%% '''
|
|
||||||
%%
|
|
||||||
%% In terms of use and API layering, the table rows are in highest`->'lowest
|
|
||||||
%% order: each level calls the layer immediately below it.
|
|
||||||
%%
|
|
||||||
%% <dl>
|
|
||||||
%% <dt> <b> PB Level</b> </dt>
|
|
||||||
%% <dd> The Protocol Buffers API is divided logically into two levels,
|
|
||||||
%% "low" and "high". The low-level protocol is used for intra-chain
|
|
||||||
%% communication. The high-level protocol is used for clients outside
|
|
||||||
%% of a Machi chain or Machi cluster of chains.
|
|
||||||
%% </dd>
|
|
||||||
%% <dt> <b> CR?</b> </dt>
|
|
||||||
%% <dd> Does this API support (directly or indirectly) Chain
|
|
||||||
%% Replication? If `no', then the API has no awareness of multiple
|
|
||||||
%% replicas of any file or file chunk; unaware clients can only
|
|
||||||
%% perform operations at a single Machi FLU's file service or
|
|
||||||
%% projection store service.
|
|
||||||
%% </dd>
|
|
||||||
%% <dt> <b> # FLUs</b> </dt>
|
|
||||||
%% <dd> Now many FLUs does this API layer communicate with
|
|
||||||
%% simultaneously? Note that there is a one-to-one correspondence
|
|
||||||
%% between this value and the "CR?" column's value.
|
|
||||||
%% </dd>
|
|
||||||
%% <dt> <b> Impl</b> </dt>
|
|
||||||
%% <dd> Implementation: library-only or an Erlang process,
|
|
||||||
%% e.g., `gen_server'.
|
|
||||||
%% </dd>
|
|
||||||
%% <dt> <b> Conn Life?</b> </dt>
|
|
||||||
%% <dd> Expected TCP session connection life: short or long. At the
|
|
||||||
%% lowest level, the {@link machi_flu1_client} API implementation takes
|
|
||||||
%% no effort to reconnect to a remote FLU when its single TCP session
|
|
||||||
%% is broken. For long-lived connection life APIs, the server side will
|
|
||||||
%% automatically attempt to reconnect to remote FLUs when a TCP session
|
|
||||||
%% is broken.
|
|
||||||
%% </dd>
|
|
||||||
%% <dt> <b> Epoch & NS version aware?</b> </dt>
|
|
||||||
%% <dd> Are clients of this API responsible for knowing a chain's EpochID
|
|
||||||
%% and namespace version numbers? If `no', then the server side of the
|
|
||||||
%% API will automatically attempt to discover/re-discover the EpochID and
|
|
||||||
%% namespace version numbers whenever they change.
|
|
||||||
%% </dd>
|
|
||||||
%% </dl>
|
|
||||||
%%
|
|
||||||
%% The only protocol that we expect to be used by entities outside of
|
|
||||||
%% a single Machi chain or a multi-chain cluster is the "high"
|
|
||||||
%% Protocol Buffers API. The {@link riak_pb_high_api_client} module
|
|
||||||
%% is an Erlang reference implementation of this PB API.
|
|
||||||
|
|
||||||
-module(machi_flu1_client).
|
-module(machi_flu1_client).
|
||||||
|
|
||||||
|
@ -115,15 +50,14 @@
|
||||||
-include_lib("pulse_otp/include/pulse_otp.hrl").
|
-include_lib("pulse_otp/include/pulse_otp.hrl").
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-define(SHORT_TIMEOUT, 2500).
|
-define(HARD_TIMEOUT, 2500).
|
||||||
-define(LONG_TIMEOUT, (60*1000)).
|
|
||||||
|
|
||||||
-export([
|
-export([
|
||||||
%% File API
|
%% File API
|
||||||
append_chunk/6, append_chunk/7,
|
append_chunk/4, append_chunk/5,
|
||||||
append_chunk/8, append_chunk/9,
|
append_chunk_extra/5, append_chunk_extra/6,
|
||||||
read_chunk/7, read_chunk/8,
|
read_chunk/5, read_chunk/6,
|
||||||
checksum_list/2, checksum_list/3,
|
checksum_list/3, checksum_list/4,
|
||||||
list_files/2, list_files/3,
|
list_files/2, list_files/3,
|
||||||
wedge_status/1, wedge_status/2,
|
wedge_status/1, wedge_status/2,
|
||||||
|
|
||||||
|
@ -145,113 +79,103 @@
|
||||||
]).
|
]).
|
||||||
%% For "internal" replication only.
|
%% For "internal" replication only.
|
||||||
-export([
|
-export([
|
||||||
write_chunk/7, write_chunk/8,
|
write_chunk/5, write_chunk/6,
|
||||||
trim_chunk/6,
|
|
||||||
delete_migration/3, delete_migration/4,
|
delete_migration/3, delete_migration/4,
|
||||||
trunc_hack/3, trunc_hack/4
|
trunc_hack/3, trunc_hack/4
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-type port_wrap() :: {w,atom(),term()}.
|
-type port_wrap() :: {w,atom(),term()}.
|
||||||
|
|
||||||
-spec append_chunk(port_wrap(),
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(),
|
%% with `Prefix'.
|
||||||
machi_dt:file_prefix(), machi_dt:chunk(),
|
|
||||||
machi_dt:chunk_csum()) ->
|
-spec append_chunk(port_wrap(), machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk()) ->
|
||||||
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
append_chunk(Sock, NSInfo, EpochID, Prefix, Chunk, CSum) ->
|
append_chunk(Sock, EpochID, Prefix, Chunk) ->
|
||||||
append_chunk(Sock, NSInfo, EpochID, Prefix, Chunk, CSum,
|
append_chunk2(Sock, EpochID, Prefix, Chunk, 0).
|
||||||
#append_opts{}, ?LONG_TIMEOUT).
|
|
||||||
|
|
||||||
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
%% with `Prefix' and also request an additional `Extra' bytes.
|
%% with `Prefix'.
|
||||||
%%
|
|
||||||
%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then
|
|
||||||
%% the file offsets that follow `Chunk''s position for the following 4K will
|
|
||||||
%% be reserved by the file sequencer for later write(s) by the
|
|
||||||
%% `write_chunk()' API.
|
|
||||||
|
|
||||||
-spec append_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
|
-spec append_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
|
||||||
'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(),
|
machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk()) ->
|
||||||
machi_dt:file_prefix(), machi_dt:chunk(),
|
|
||||||
machi_dt:chunk_csum()) ->
|
|
||||||
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
append_chunk(Host, TcpPort, NSInfo, EpochID, Prefix, Chunk, CSum) ->
|
append_chunk(Host, TcpPort, EpochID, Prefix, Chunk) ->
|
||||||
append_chunk(Host, TcpPort, NSInfo, EpochID, Prefix, Chunk, CSum,
|
|
||||||
#append_opts{}, ?LONG_TIMEOUT).
|
|
||||||
|
|
||||||
-spec append_chunk(port_wrap(),
|
|
||||||
'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(),
|
|
||||||
machi_dt:file_prefix(), machi_dt:chunk(),
|
|
||||||
machi_dt:chunk_csum(), machi_dt:append_opts(), timeout()) ->
|
|
||||||
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
|
||||||
append_chunk(Sock, NSInfo0, EpochID, Prefix, Chunk, CSum, Opts, Timeout) ->
|
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
|
||||||
append_chunk2(Sock, NSInfo, EpochID, Prefix, Chunk, CSum, Opts, Timeout).
|
|
||||||
|
|
||||||
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
|
||||||
%% with `Prefix' and also request an additional `Extra' bytes.
|
|
||||||
%%
|
|
||||||
%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then
|
|
||||||
%% the file offsets that follow `Chunk''s position for the following 4K will
|
|
||||||
%% be reserved by the file sequencer for later write(s) by the
|
|
||||||
%% `write_chunk()' API.
|
|
||||||
|
|
||||||
-spec append_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
|
|
||||||
'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(),
|
|
||||||
machi_dt:file_prefix(), machi_dt:chunk(),
|
|
||||||
machi_dt:chunk_csum(), machi_dt:append_opts(), timeout()) ->
|
|
||||||
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
|
||||||
append_chunk(Host, TcpPort, NSInfo0, EpochID,
|
|
||||||
Prefix, Chunk, CSum, Opts, Timeout) ->
|
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
try
|
try
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
append_chunk2(Sock, EpochID, Prefix, Chunk, 0)
|
||||||
append_chunk2(Sock, NSInfo, EpochID,
|
after
|
||||||
Prefix, Chunk, CSum, Opts, Timeout)
|
disconnect(Sock)
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
|
%% with `Prefix' and also request an additional `Extra' bytes.
|
||||||
|
%%
|
||||||
|
%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then
|
||||||
|
%% the file offsets that follow `Chunk''s position for the following 4K will
|
||||||
|
%% be reserved by the file sequencer for later write(s) by the
|
||||||
|
%% `write_chunk()' API.
|
||||||
|
|
||||||
|
-spec append_chunk_extra(port_wrap(), machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) ->
|
||||||
|
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
|
append_chunk_extra(Sock, EpochID, Prefix, Chunk, ChunkExtra)
|
||||||
|
when is_integer(ChunkExtra), ChunkExtra >= 0 ->
|
||||||
|
append_chunk2(Sock, EpochID, Prefix, Chunk, ChunkExtra).
|
||||||
|
|
||||||
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
|
%% with `Prefix' and also request an additional `Extra' bytes.
|
||||||
|
%%
|
||||||
|
%% For example, if the `Chunk' size is 1 KByte and `Extra' is 4K Bytes, then
|
||||||
|
%% the file offsets that follow `Chunk''s position for the following 4K will
|
||||||
|
%% be reserved by the file sequencer for later write(s) by the
|
||||||
|
%% `write_chunk()' API.
|
||||||
|
|
||||||
|
-spec append_chunk_extra(machi_dt:inet_host(), machi_dt:inet_port(),
|
||||||
|
machi_dt:epoch_id(), machi_dt:file_prefix(), machi_dt:chunk(), machi_dt:chunk_size()) ->
|
||||||
|
{ok, machi_dt:chunk_pos()} | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
|
append_chunk_extra(Host, TcpPort, EpochID, Prefix, Chunk, ChunkExtra)
|
||||||
|
when is_integer(ChunkExtra), ChunkExtra >= 0 ->
|
||||||
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
|
try
|
||||||
|
append_chunk2(Sock, EpochID, Prefix, Chunk, ChunkExtra)
|
||||||
after
|
after
|
||||||
disconnect(Sock)
|
disconnect(Sock)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
||||||
|
|
||||||
-spec read_chunk(port_wrap(), 'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size(),
|
-spec read_chunk(port_wrap(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size()) ->
|
||||||
machi_dt:read_opts_x()) ->
|
{ok, machi_dt:chunk_s()} |
|
||||||
{ok, {[machi_dt:chunk_summary()], [machi_dt:chunk_pos()]}} |
|
|
||||||
{error, machi_dt:error_general() | 'not_written' | 'partial_read'} |
|
{error, machi_dt:error_general() | 'not_written' | 'partial_read'} |
|
||||||
{error, term()}.
|
{error, term()}.
|
||||||
read_chunk(Sock, NSInfo0, EpochID, File, Offset, Size, Opts0)
|
read_chunk(Sock, EpochID, File, Offset, Size)
|
||||||
when Offset >= ?MINIMUM_OFFSET, Size >= 0 ->
|
when Offset >= ?MINIMUM_OFFSET, Size >= 0 ->
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
read_chunk2(Sock, EpochID, File, Offset, Size).
|
||||||
Opts = machi_util:read_opts_default(Opts0),
|
|
||||||
read_chunk2(Sock, NSInfo, EpochID, File, Offset, Size, Opts).
|
|
||||||
|
|
||||||
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
||||||
|
|
||||||
-spec read_chunk(machi_dt:inet_host(), machi_dt:inet_port(), 'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(),
|
-spec read_chunk(machi_dt:inet_host(), machi_dt:inet_port(), machi_dt:epoch_id(),
|
||||||
machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size(),
|
machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size()) ->
|
||||||
machi_dt:read_opts_x()) ->
|
{ok, machi_dt:chunk_s()} |
|
||||||
{ok, [machi_dt:chunk_summary()]} |
|
|
||||||
{error, machi_dt:error_general() | 'not_written' | 'partial_read'} |
|
{error, machi_dt:error_general() | 'not_written' | 'partial_read'} |
|
||||||
{error, term()}.
|
{error, term()}.
|
||||||
read_chunk(Host, TcpPort, NSInfo0, EpochID, File, Offset, Size, Opts0)
|
read_chunk(Host, TcpPort, EpochID, File, Offset, Size)
|
||||||
when Offset >= ?MINIMUM_OFFSET, Size >= 0 ->
|
when Offset >= ?MINIMUM_OFFSET, Size >= 0 ->
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
|
||||||
Opts = machi_util:read_opts_default(Opts0),
|
|
||||||
try
|
try
|
||||||
read_chunk2(Sock, NSInfo, EpochID, File, Offset, Size, Opts)
|
read_chunk2(Sock, EpochID, File, Offset, Size)
|
||||||
after
|
after
|
||||||
disconnect(Sock)
|
disconnect(Sock)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
|
|
||||||
-spec checksum_list(port_wrap(), machi_dt:file_name()) ->
|
-spec checksum_list(port_wrap(), machi_dt:epoch_id(), machi_dt:file_name()) ->
|
||||||
{ok, binary()} |
|
{ok, binary()} |
|
||||||
{error, machi_dt:error_general() | 'no_such_file' | 'partial_read'} |
|
{error, machi_dt:error_general() | 'no_such_file' | 'partial_read'} |
|
||||||
{error, term()}.
|
{error, term()}.
|
||||||
checksum_list(Sock, File) ->
|
checksum_list(Sock, EpochID, File) ->
|
||||||
checksum_list2(Sock, File).
|
checksum_list2(Sock, EpochID, File).
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
%%
|
%%
|
||||||
|
@ -275,13 +199,13 @@ checksum_list(Sock, File) ->
|
||||||
%% Details of the encoding used inside the `binary()' blog can be found
|
%% Details of the encoding used inside the `binary()' blog can be found
|
||||||
%% in the EDoc comments for {@link machi_flu1:decode_csum_file_entry/1}.
|
%% in the EDoc comments for {@link machi_flu1:decode_csum_file_entry/1}.
|
||||||
|
|
||||||
-spec checksum_list(machi_dt:inet_host(), machi_dt:inet_port(), machi_dt:file_name()) ->
|
-spec checksum_list(machi_dt:inet_host(), machi_dt:inet_port(), machi_dt:epoch_id(), machi_dt:file_name()) ->
|
||||||
{ok, binary()} |
|
{ok, binary()} |
|
||||||
{error, machi_dt:error_general() | 'no_such_file'} | {error, term()}.
|
{error, machi_dt:error_general() | 'no_such_file'} | {error, term()}.
|
||||||
checksum_list(Host, TcpPort, File) when is_integer(TcpPort) ->
|
checksum_list(Host, TcpPort, EpochID, File) when is_integer(TcpPort) ->
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
try
|
try
|
||||||
checksum_list2(Sock, File)
|
checksum_list2(Sock, EpochID, File)
|
||||||
after
|
after
|
||||||
disconnect(Sock)
|
disconnect(Sock)
|
||||||
end.
|
end.
|
||||||
|
@ -308,7 +232,7 @@ list_files(Host, TcpPort, EpochID) when is_integer(TcpPort) ->
|
||||||
%% @doc Fetch the wedge status from the remote FLU.
|
%% @doc Fetch the wedge status from the remote FLU.
|
||||||
|
|
||||||
-spec wedge_status(port_wrap()) ->
|
-spec wedge_status(port_wrap()) ->
|
||||||
{ok, {boolean(), machi_dt:epoch_id(), machi_dt:namespace_version(),machi_dt:namespace()}} | {error, term()}.
|
{ok, {boolean(), machi_dt:epoch_id()}} | {error, term()}.
|
||||||
|
|
||||||
wedge_status(Sock) ->
|
wedge_status(Sock) ->
|
||||||
wedge_status2(Sock).
|
wedge_status2(Sock).
|
||||||
|
@ -316,7 +240,7 @@ wedge_status(Sock) ->
|
||||||
%% @doc Fetch the wedge status from the remote FLU.
|
%% @doc Fetch the wedge status from the remote FLU.
|
||||||
|
|
||||||
-spec wedge_status(machi_dt:inet_host(), machi_dt:inet_port()) ->
|
-spec wedge_status(machi_dt:inet_host(), machi_dt:inet_port()) ->
|
||||||
{ok, {boolean(), machi_dt:epoch_id(), machi_dt:namespace_version(),machi_dt:namespace()}} | {error, term()}.
|
{ok, {boolean(), machi_dt:epoch_id()}} | {error, term()}.
|
||||||
wedge_status(Host, TcpPort) when is_integer(TcpPort) ->
|
wedge_status(Host, TcpPort) when is_integer(TcpPort) ->
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
try
|
try
|
||||||
|
@ -527,46 +451,27 @@ disconnect(_) ->
|
||||||
%% @doc Restricted API: Write a chunk of already-sequenced data to
|
%% @doc Restricted API: Write a chunk of already-sequenced data to
|
||||||
%% `File' at `Offset'.
|
%% `File' at `Offset'.
|
||||||
|
|
||||||
-spec write_chunk(port_wrap(), 'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk(), machi_dt:chunk_csum()) ->
|
-spec write_chunk(port_wrap(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk()) ->
|
||||||
ok | {error, machi_dt:error_general()} | {error, term()}.
|
ok | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
write_chunk(Sock, NSInfo0, EpochID, File, Offset, Chunk, CSum)
|
write_chunk(Sock, EpochID, File, Offset, Chunk)
|
||||||
when Offset >= ?MINIMUM_OFFSET ->
|
when Offset >= ?MINIMUM_OFFSET ->
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
write_chunk2(Sock, EpochID, File, Offset, Chunk).
|
||||||
write_chunk2(Sock, NSInfo, EpochID, File, Offset, Chunk, CSum).
|
|
||||||
|
|
||||||
%% @doc Restricted API: Write a chunk of already-sequenced data to
|
%% @doc Restricted API: Write a chunk of already-sequenced data to
|
||||||
%% `File' at `Offset'.
|
%% `File' at `Offset'.
|
||||||
|
|
||||||
-spec write_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
|
-spec write_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
|
||||||
'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk(), machi_dt:chunk_csum()) ->
|
machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk()) ->
|
||||||
ok | {error, machi_dt:error_general()} | {error, term()}.
|
ok | {error, machi_dt:error_general()} | {error, term()}.
|
||||||
write_chunk(Host, TcpPort, NSInfo0, EpochID, File, Offset, Chunk, CSum)
|
write_chunk(Host, TcpPort, EpochID, File, Offset, Chunk)
|
||||||
when Offset >= ?MINIMUM_OFFSET ->
|
when Offset >= ?MINIMUM_OFFSET ->
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),
|
||||||
try
|
try
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
write_chunk2(Sock, EpochID, File, Offset, Chunk)
|
||||||
write_chunk2(Sock, NSInfo, EpochID, File, Offset, Chunk, CSum)
|
|
||||||
after
|
after
|
||||||
disconnect(Sock)
|
disconnect(Sock)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Restricted API: Write a chunk of already-sequenced data to
|
|
||||||
%% `File' at `Offset'.
|
|
||||||
|
|
||||||
-spec trim_chunk(port_wrap(), 'undefined' | machi_dt:ns_info(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size()) ->
|
|
||||||
ok | {error, machi_dt:error_general()} | {error, term()}.
|
|
||||||
trim_chunk(Sock, NSInfo0, EpochID, File0, Offset, Size)
|
|
||||||
when Offset >= ?MINIMUM_OFFSET ->
|
|
||||||
ReqID = <<"id">>,
|
|
||||||
NSInfo = machi_util:ns_info_default(NSInfo0),
|
|
||||||
#ns_info{version=NSVersion, name=NS} = NSInfo,
|
|
||||||
File = machi_util:make_binary(File0),
|
|
||||||
true = (Offset >= ?MINIMUM_OFFSET),
|
|
||||||
Req = machi_pb_translate:to_pb_request(
|
|
||||||
ReqID,
|
|
||||||
{low_trim_chunk, NSVersion, NS, EpochID, File, Offset, Size, 0}),
|
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
|
||||||
|
|
||||||
%% @doc Restricted API: Delete a file after it has been successfully
|
%% @doc Restricted API: Delete a file after it has been successfully
|
||||||
%% migrated.
|
%% migrated.
|
||||||
|
|
||||||
|
@ -611,88 +516,83 @@ trunc_hack(Host, TcpPort, EpochID, File) when is_integer(TcpPort) ->
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
read_chunk2(Sock, NSInfo, EpochID, File0, Offset, Size, Opts) ->
|
read_chunk2(Sock, EpochID, File0, Offset, Size) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
#ns_info{version=NSVersion, name=NS} = NSInfo,
|
|
||||||
File = machi_util:make_binary(File0),
|
File = machi_util:make_binary(File0),
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID,
|
ReqID,
|
||||||
{low_read_chunk, NSVersion, NS, EpochID, File, Offset, Size, Opts}),
|
{low_read_chunk, EpochID, File, Offset, Size, []}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
append_chunk2(Sock, NSInfo, EpochID,
|
append_chunk2(Sock, EpochID, Prefix0, Chunk0, ChunkExtra) ->
|
||||||
Prefix0, Chunk, CSum0, Opts, Timeout) ->
|
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Prefix = machi_util:make_binary(Prefix0),
|
{Chunk, CSum_tag, CSum} =
|
||||||
{CSum_tag, CSum} = case CSum0 of
|
case Chunk0 of
|
||||||
<<>> ->
|
X when is_binary(X) ->
|
||||||
{?CSUM_TAG_NONE, <<>>};
|
{Chunk0, ?CSUM_TAG_NONE, <<>>};
|
||||||
{_Tag, _CS} ->
|
{ChunkCSum, Chk} ->
|
||||||
CSum0;
|
{Tag, CS} = machi_util:unmake_tagged_csum(ChunkCSum),
|
||||||
B when is_binary(B) ->
|
{Chk, Tag, CS}
|
||||||
machi_util:unmake_tagged_csum(CSum0)
|
|
||||||
end,
|
end,
|
||||||
#ns_info{version=NSVersion, name=NS, locator=NSLocator} = NSInfo,
|
PKey = <<>>, % TODO
|
||||||
%% NOTE: The tuple position of NSLocator is a bit odd, because EpochID
|
Prefix = machi_util:make_binary(Prefix0),
|
||||||
%% _must_ be in the 4th position (as NSV & NS must be in 2nd & 3rd).
|
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID,
|
ReqID,
|
||||||
{low_append_chunk, NSVersion, NS, EpochID, NSLocator,
|
{low_append_chunk, EpochID, PKey, Prefix, Chunk, CSum_tag, CSum,
|
||||||
Prefix, Chunk, CSum_tag, CSum, Opts}),
|
ChunkExtra}),
|
||||||
do_pb_request_common(Sock, ReqID, Req, true, Timeout).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
write_chunk2(Sock, NSInfo, EpochID, File0, Offset, Chunk, CSum0) ->
|
write_chunk2(Sock, EpochID, File0, Offset, Chunk0) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
#ns_info{version=NSVersion, name=NS} = NSInfo,
|
|
||||||
File = machi_util:make_binary(File0),
|
File = machi_util:make_binary(File0),
|
||||||
true = (Offset >= ?MINIMUM_OFFSET),
|
true = (Offset >= ?MINIMUM_OFFSET),
|
||||||
{CSum_tag, CSum} = case CSum0 of
|
{Chunk, CSum_tag, CSum} =
|
||||||
<<>> ->
|
case Chunk0 of
|
||||||
{?CSUM_TAG_NONE, <<>>};
|
X when is_binary(X) ->
|
||||||
{_Tag, _CS} ->
|
{Chunk0, ?CSUM_TAG_NONE, <<>>};
|
||||||
CSum0;
|
{ChunkCSum, Chk} ->
|
||||||
B when is_binary(B) ->
|
{Tag, CS} = machi_util:unmake_tagged_csum(ChunkCSum),
|
||||||
machi_util:unmake_tagged_csum(CSum0)
|
{Chk, Tag, CS}
|
||||||
end,
|
end,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID,
|
ReqID,
|
||||||
{low_write_chunk, NSVersion, NS, EpochID, File, Offset, Chunk, CSum_tag, CSum}),
|
{low_write_chunk, EpochID, File, Offset, Chunk, CSum_tag, CSum}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
list2(Sock, EpochID) ->
|
list2(Sock, EpochID) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_list_files, EpochID}}),
|
ReqID, {low_list_files, EpochID}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
wedge_status2(Sock) ->
|
wedge_status2(Sock) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_wedge_status}}),
|
ReqID, {low_wedge_status, undefined}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
echo2(Sock, Message) ->
|
echo2(Sock, Message) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_echo, Message}}),
|
ReqID, {low_echo, undefined, Message}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
checksum_list2(Sock, File) ->
|
checksum_list2(Sock, EpochID, File) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_checksum_list, File}}),
|
ReqID, {low_checksum_list, EpochID, File}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
delete_migration2(Sock, EpochID, File) ->
|
delete_migration2(Sock, EpochID, File) ->
|
||||||
ReqID = <<"id">>,
|
ReqID = <<"id">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_delete_migration, EpochID, File}}),
|
ReqID, {low_delete_migration, EpochID, File}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
trunc_hack2(Sock, EpochID, File) ->
|
trunc_hack2(Sock, EpochID, File) ->
|
||||||
ReqID = <<"id-trunc">>,
|
ReqID = <<"id-trunc">>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_skip_wedge, {low_trunc_hack, EpochID, File}}),
|
ReqID, {low_trunc_hack, EpochID, File}),
|
||||||
do_pb_request_common(Sock, ReqID, Req).
|
do_pb_request_common(Sock, ReqID, Req).
|
||||||
|
|
||||||
get_latest_epochid2(Sock, ProjType) ->
|
get_latest_epochid2(Sock, ProjType) ->
|
||||||
|
@ -735,18 +635,18 @@ kick_projection_reaction2(Sock, _Options) ->
|
||||||
ReqID = <<42>>,
|
ReqID = <<42>>,
|
||||||
Req = machi_pb_translate:to_pb_request(
|
Req = machi_pb_translate:to_pb_request(
|
||||||
ReqID, {low_proj, {kick_projection_reaction}}),
|
ReqID, {low_proj, {kick_projection_reaction}}),
|
||||||
do_pb_request_common(Sock, ReqID, Req, false, ?LONG_TIMEOUT).
|
do_pb_request_common(Sock, ReqID, Req, false).
|
||||||
|
|
||||||
do_pb_request_common(Sock, ReqID, Req) ->
|
do_pb_request_common(Sock, ReqID, Req) ->
|
||||||
do_pb_request_common(Sock, ReqID, Req, true, ?LONG_TIMEOUT).
|
do_pb_request_common(Sock, ReqID, Req, true).
|
||||||
|
|
||||||
do_pb_request_common(Sock, ReqID, Req, GetReply_p, Timeout) ->
|
do_pb_request_common(Sock, ReqID, Req, GetReply_p) ->
|
||||||
erase(bad_sock),
|
erase(bad_sock),
|
||||||
try
|
try
|
||||||
ReqBin = list_to_binary(machi_pb:encode_mpb_ll_request(Req)),
|
ReqBin = list_to_binary(machi_pb:encode_mpb_ll_request(Req)),
|
||||||
ok = w_send(Sock, ReqBin),
|
ok = w_send(Sock, ReqBin),
|
||||||
if GetReply_p ->
|
if GetReply_p ->
|
||||||
case w_recv(Sock, 0, Timeout) of
|
case w_recv(Sock, 0) of
|
||||||
{ok, RespBin} ->
|
{ok, RespBin} ->
|
||||||
Resp = machi_pb:decode_mpb_ll_response(RespBin),
|
Resp = machi_pb:decode_mpb_ll_response(RespBin),
|
||||||
{ReqID2, Reply} = machi_pb_translate:from_pb_response(Resp),
|
{ReqID2, Reply} = machi_pb_translate:from_pb_response(Resp),
|
||||||
|
@ -767,17 +667,7 @@ do_pb_request_common(Sock, ReqID, Req, GetReply_p, Timeout) ->
|
||||||
{error, {badmatch, Noo, erlang:get_stacktrace()}};
|
{error, {badmatch, Noo, erlang:get_stacktrace()}};
|
||||||
error:{badmatch,_}=BadMatch ->
|
error:{badmatch,_}=BadMatch ->
|
||||||
put(bad_sock, Sock),
|
put(bad_sock, Sock),
|
||||||
{error, {badmatch, BadMatch, erlang:get_stacktrace()}};
|
{error, {badmatch, BadMatch, erlang:get_stacktrace()}}
|
||||||
error:Whoa ->
|
|
||||||
put(bad_sock, Sock),
|
|
||||||
%% TODO: The machi_chain_manager1_converge_demo:t() test can
|
|
||||||
%% create a large number of these errors when moving from
|
|
||||||
%% no partitions to many partitions:
|
|
||||||
%% Whoa undefined: function_clause
|
|
||||||
%% In theory this is harmless, because the client will retry
|
|
||||||
%% with a new socket. But, fix it anyway.
|
|
||||||
io:format(user, "DBG Whoa ~w: ~w at ~w ~P\n", [Sock, Whoa, time(), erlang:get_stacktrace(), 25]), timer:sleep(250),
|
|
||||||
{error, {whoa, Whoa, erlang:get_stacktrace()}}
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
filter_sock_error_result({error, closed}) ->
|
filter_sock_error_result({error, closed}) ->
|
||||||
|
@ -787,13 +677,11 @@ filter_sock_error_result(Error) ->
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
w_connect(#p_srvr{proto_mod=?MODULE, address=Host, port=Port, props=Props}=_P)->
|
w_connect(#p_srvr{proto_mod=?MODULE, address=Host, port=Port, props=Props})->
|
||||||
try
|
try
|
||||||
case proplists:get_value(session_proto, Props, tcp) of
|
case proplists:get_value(session_proto, Props, tcp) of
|
||||||
tcp ->
|
tcp ->
|
||||||
put(xxx, goofus),
|
Sock = machi_util:connect(Host, Port, ?HARD_TIMEOUT),
|
||||||
Sock = machi_util:connect(Host, Port, ?SHORT_TIMEOUT),
|
|
||||||
put(xxx, Sock),
|
|
||||||
ok = inet:setopts(Sock, ?PB_PACKET_OPTS),
|
ok = inet:setopts(Sock, ?PB_PACKET_OPTS),
|
||||||
{w,tcp,Sock};
|
{w,tcp,Sock};
|
||||||
%% sctp ->
|
%% sctp ->
|
||||||
|
@ -807,8 +695,7 @@ put(xxx, Sock),
|
||||||
{w,ssl,SslSock}
|
{w,ssl,SslSock}
|
||||||
end
|
end
|
||||||
catch
|
catch
|
||||||
_X:_Y ->
|
_:_ ->
|
||||||
%% io:format(user, "DBG Whoa ~w w_connect port ~w sock ~w err ~w ~w\n", [time(), Port, get(xxx), _X, _Y]),
|
|
||||||
undefined
|
undefined
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -816,8 +703,8 @@ w_close({w,tcp,Sock}) ->
|
||||||
catch gen_tcp:close(Sock),
|
catch gen_tcp:close(Sock),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
w_recv({w,tcp,Sock}, Amt, Timeout) ->
|
w_recv({w,tcp,Sock}, Amt) ->
|
||||||
gen_tcp:recv(Sock, Amt, Timeout).
|
gen_tcp:recv(Sock, Amt, ?HARD_TIMEOUT).
|
||||||
|
|
||||||
w_send({w,tcp,Sock}, IoData) ->
|
w_send({w,tcp,Sock}, IoData) ->
|
||||||
gen_tcp:send(Sock, IoData).
|
gen_tcp:send(Sock, IoData).
|
||||||
|
|
|
@ -1,634 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc Ranch protocol callback module to handle PB protocol over
|
|
||||||
%% transport, including both high and low modes.
|
|
||||||
|
|
||||||
%% TODO
|
|
||||||
%% - Two modes, high and low should be separated at listener level?
|
|
||||||
|
|
||||||
-module(machi_flu1_net_server).
|
|
||||||
|
|
||||||
-behaviour(gen_server).
|
|
||||||
-behaviour(ranch_protocol).
|
|
||||||
|
|
||||||
-export([start_link/4]).
|
|
||||||
-export([init/1]).
|
|
||||||
-export([handle_call/3, handle_cast/2, handle_info/2,
|
|
||||||
terminate/2, code_change/3]).
|
|
||||||
|
|
||||||
-include_lib("kernel/include/file.hrl").
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_pb.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-endif. % TEST
|
|
||||||
|
|
||||||
-record(state, {
|
|
||||||
%% Ranch's transport management stuff
|
|
||||||
ref :: ranch:ref(),
|
|
||||||
socket :: socket(),
|
|
||||||
transport :: module(),
|
|
||||||
|
|
||||||
%% Machi FLU configurations, common for low and high
|
|
||||||
data_dir :: string(),
|
|
||||||
witness :: boolean(),
|
|
||||||
pb_mode :: undefined | high | low,
|
|
||||||
%% - Used in projection related requests in low mode
|
|
||||||
%% - Used in spawning CR client in high mode
|
|
||||||
proj_store :: pid(),
|
|
||||||
|
|
||||||
%% Low mode only items
|
|
||||||
%% Current best knowledge, used for wedge_self / bad_epoch check
|
|
||||||
epoch_id :: undefined | machi_dt:epoch_id(),
|
|
||||||
%% Used in dispatching append_chunk* reqs to the
|
|
||||||
%% append serializing process
|
|
||||||
flu_name :: pv1_server(),
|
|
||||||
%% Used in server_wedge_status to lookup the table
|
|
||||||
epoch_tab :: ets:tab(),
|
|
||||||
%% Clustering: cluster map version number
|
|
||||||
namespace_version = 0 :: machi_dt:namespace_version(),
|
|
||||||
%% Clustering: my (and my chain's) assignment to a specific namespace
|
|
||||||
namespace = <<>> :: machi_dt:namespace(),
|
|
||||||
|
|
||||||
%% High mode only
|
|
||||||
high_clnt :: pid(),
|
|
||||||
|
|
||||||
%% anything you want
|
|
||||||
props = [] :: proplists:proplist()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-type socket() :: any().
|
|
||||||
-type state() :: #state{}.
|
|
||||||
|
|
||||||
-spec start_link(ranch:ref(), socket(), module(), [term()]) -> {ok, pid()}.
|
|
||||||
start_link(Ref, Socket, Transport, [FluName, Witness, DataDir, EpochTab, ProjStore, Props]) ->
|
|
||||||
NS = proplists:get_value(namespace, Props, <<>>),
|
|
||||||
true = is_binary(NS),
|
|
||||||
proc_lib:start_link(?MODULE, init, [#state{ref=Ref,
|
|
||||||
socket=Socket,
|
|
||||||
transport=Transport,
|
|
||||||
flu_name=FluName,
|
|
||||||
witness=Witness,
|
|
||||||
data_dir=DataDir,
|
|
||||||
epoch_tab=EpochTab,
|
|
||||||
proj_store=ProjStore,
|
|
||||||
namespace=NS,
|
|
||||||
props=Props}]).
|
|
||||||
|
|
||||||
-spec init(state()) -> no_return().
|
|
||||||
init(#state{ref=Ref, socket=Socket, transport=Transport}=State) ->
|
|
||||||
ok = proc_lib:init_ack({ok, self()}),
|
|
||||||
ok = ranch:accept_ack(Ref),
|
|
||||||
{_Wedged_p, CurrentEpochID} = lookup_epoch(State),
|
|
||||||
ok = Transport:setopts(Socket, [{active, once}|?PB_PACKET_OPTS]),
|
|
||||||
gen_server:enter_loop(?MODULE, [], State#state{epoch_id=CurrentEpochID}).
|
|
||||||
|
|
||||||
handle_call(Request, _From, S) ->
|
|
||||||
lager:warning("~s:handle_call UNKNOWN message: ~w", [?MODULE, Request]),
|
|
||||||
Reply = {error, {unknown_message, Request}},
|
|
||||||
{reply, Reply, S}.
|
|
||||||
|
|
||||||
handle_cast(_Msg, S) ->
|
|
||||||
lager:warning("~s:handle_cast UNKNOWN message: ~w", [?MODULE, _Msg]),
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
%% TODO: Other transport support needed?? TLS/SSL, SCTP
|
|
||||||
handle_info({tcp, Socket, Data}=_Info, #state{socket=Socket}=S) ->
|
|
||||||
lager:debug("~s:handle_info: ~w", [?MODULE, _Info]),
|
|
||||||
transport_received(Socket, Data, S);
|
|
||||||
handle_info({tcp_closed, Socket}=_Info, #state{socket=Socket}=S) ->
|
|
||||||
lager:debug("~s:handle_info: ~w", [?MODULE, _Info]),
|
|
||||||
transport_closed(Socket, S);
|
|
||||||
handle_info({tcp_error, Socket, Reason}=_Info, #state{socket=Socket}=S) ->
|
|
||||||
lager:warning("~s:handle_info (socket=~w) tcp_error: ~w", [?MODULE, Socket, Reason]),
|
|
||||||
transport_error(Socket, Reason, S);
|
|
||||||
handle_info(_Info, S) ->
|
|
||||||
lager:warning("~s:handle_info UNKNOWN message: ~w", [?MODULE, _Info]),
|
|
||||||
{noreply, S}.
|
|
||||||
|
|
||||||
terminate(normal, #state{socket=undefined}=_S) ->
|
|
||||||
ok;
|
|
||||||
terminate(Reason, #state{socket=undefined}=_S) ->
|
|
||||||
lager:warning("~s:terminate (socket=undefined): ~w", [?MODULE, Reason]),
|
|
||||||
ok;
|
|
||||||
terminate(normal, #state{socket=Socket}=_S) ->
|
|
||||||
(catch gen_tcp:close(Socket)),
|
|
||||||
ok;
|
|
||||||
terminate(Reason, #state{socket=Socket}=_S) ->
|
|
||||||
lager:warning("~s:terminate (socket=Socket): ~w", [?MODULE, Reason]),
|
|
||||||
(catch gen_tcp:close(Socket)),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
code_change(_OldVsn, S, _Extra) ->
|
|
||||||
{ok, S}.
|
|
||||||
|
|
||||||
%% -- private
|
|
||||||
|
|
||||||
%%%% Common transport handling
|
|
||||||
|
|
||||||
-spec transport_received(socket(), machi_dt:chunk(), state()) ->
|
|
||||||
{noreply, state()}.
|
|
||||||
transport_received(Socket, <<"QUIT\n">>, #state{socket=Socket}=S) ->
|
|
||||||
{stop, normal, S};
|
|
||||||
transport_received(Socket, Bin, #state{transport=Transport}=S) ->
|
|
||||||
{RespBin, S2} =
|
|
||||||
case machi_pb:decode_mpb_ll_request(Bin) of
|
|
||||||
LL_req when LL_req#mpb_ll_request.do_not_alter == 2 ->
|
|
||||||
{R, NewS} = do_pb_ll_request(LL_req, S),
|
|
||||||
{maybe_encode_response(R), set_mode(low, NewS)};
|
|
||||||
_ ->
|
|
||||||
HL_req = machi_pb:decode_mpb_request(Bin),
|
|
||||||
1 = HL_req#mpb_request.do_not_alter,
|
|
||||||
{R, NewS} = do_pb_hl_request(HL_req, make_high_clnt(S)),
|
|
||||||
{machi_pb:encode_mpb_response(R), set_mode(high, NewS)}
|
|
||||||
end,
|
|
||||||
case RespBin of
|
|
||||||
async_no_response ->
|
|
||||||
Transport:setopts(Socket, [{active, once}]),
|
|
||||||
{noreply, S2};
|
|
||||||
_ ->
|
|
||||||
case Transport:send(Socket, RespBin) of
|
|
||||||
ok ->
|
|
||||||
Transport:setopts(Socket, [{active, once}]),
|
|
||||||
{noreply, S2};
|
|
||||||
{error, Reason} ->
|
|
||||||
transport_error(Socket, Reason, S2)
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec transport_closed(socket(), state()) -> {stop, term(), state()}.
|
|
||||||
transport_closed(_Socket, S) ->
|
|
||||||
{stop, normal, S}.
|
|
||||||
|
|
||||||
-spec transport_error(socket(), term(), state()) -> no_return().
|
|
||||||
transport_error(Socket, Reason, #state{transport=Transport}=_S) ->
|
|
||||||
Msg = io_lib:format("Socket error ~w", [Reason]),
|
|
||||||
R = #mpb_ll_response{req_id= <<>>,
|
|
||||||
generic=#mpb_errorresp{code=1, msg=Msg}},
|
|
||||||
_Resp = machi_pb:encode_mpb_ll_response(R),
|
|
||||||
%% TODO for TODO comments: comments below with four %s are copy-n-paste'd,
|
|
||||||
%% then it should be considered they are still open and should be addressed.
|
|
||||||
%%%% TODO: Weird that sometimes neither catch nor try/catch
|
|
||||||
%%%% can prevent OTP's SASL from logging an error here.
|
|
||||||
%%%% Error in process <0.545.0> with exit value: {badarg,[{erlang,port_command,.......
|
|
||||||
%%%% TODO: is this what causes the intermittent PULSE deadlock errors?
|
|
||||||
%%%% _ = (catch gen_tcp:send(Sock, _Resp)), timer:sleep(1000),
|
|
||||||
(catch Transport:close(Socket)),
|
|
||||||
_ = lager:warning("Socket error (~w -> ~w): ~w",
|
|
||||||
[Transport:sockname(Socket), Transport:peername(Socket), Reason]),
|
|
||||||
%% TODO: better to exit with `Reason' without logging?
|
|
||||||
exit(normal).
|
|
||||||
|
|
||||||
maybe_encode_response(async_no_response=R) ->
|
|
||||||
R;
|
|
||||||
maybe_encode_response(R) ->
|
|
||||||
machi_pb:encode_mpb_ll_response(R).
|
|
||||||
|
|
||||||
set_mode(Mode, #state{pb_mode=undefined}=S) ->
|
|
||||||
S#state{pb_mode=Mode};
|
|
||||||
set_mode(_, S) ->
|
|
||||||
S.
|
|
||||||
|
|
||||||
%%%% Low PB mode %%%%
|
|
||||||
|
|
||||||
do_pb_ll_request(#mpb_ll_request{req_id=ReqID}, #state{pb_mode=high}=S) ->
|
|
||||||
Result = {high_error, 41, "Low protocol request while in high mode"},
|
|
||||||
{machi_pb_translate:to_pb_response(ReqID, unused, Result), S};
|
|
||||||
do_pb_ll_request(PB_request, S) ->
|
|
||||||
Req = machi_pb_translate:from_pb_request(PB_request),
|
|
||||||
{ReqID, Cmd, Result, S2} =
|
|
||||||
case Req of
|
|
||||||
{RqID, {low_skip_wedge, LowSubCmd}=Cmd0} ->
|
|
||||||
%% Skip wedge check for these unprivileged commands
|
|
||||||
{Rs, NewS} = do_pb_ll_request3(LowSubCmd, S),
|
|
||||||
{RqID, Cmd0, Rs, NewS};
|
|
||||||
{RqID, {low_proj, _LowSubCmd}=Cmd0} ->
|
|
||||||
{Rs, NewS} = do_pb_ll_request3(Cmd0, S),
|
|
||||||
{RqID, Cmd0, Rs, NewS};
|
|
||||||
{RqID, Cmd0} ->
|
|
||||||
%% All remaining must have NSVersion, NS, & EpochID at next pos
|
|
||||||
NSVersion = element(2, Cmd0),
|
|
||||||
NS = element(3, Cmd0),
|
|
||||||
EpochID = element(4, Cmd0),
|
|
||||||
{Rs, NewS} = do_pb_ll_request2(NSVersion, NS, EpochID, Cmd0, S),
|
|
||||||
{RqID, Cmd0, Rs, NewS}
|
|
||||||
end,
|
|
||||||
{machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}.
|
|
||||||
|
|
||||||
%% do_pb_ll_request2(): Verification of epoch details & namespace details.
|
|
||||||
|
|
||||||
do_pb_ll_request2(NSVersion, NS, EpochID, CMD, S) ->
|
|
||||||
{Wedged_p, CurrentEpochID} = lookup_epoch(S),
|
|
||||||
if not is_tuple(EpochID) orelse tuple_size(EpochID) /= 2 ->
|
|
||||||
exit({bad_epoch_id, EpochID, for, CMD});
|
|
||||||
Wedged_p == true ->
|
|
||||||
{{error, wedged}, S#state{epoch_id=CurrentEpochID}};
|
|
||||||
EpochID /= CurrentEpochID ->
|
|
||||||
{Epoch, _} = EpochID,
|
|
||||||
{CurrentEpoch, _} = CurrentEpochID,
|
|
||||||
if Epoch < CurrentEpoch ->
|
|
||||||
{{error, bad_epoch}, S};
|
|
||||||
true ->
|
|
||||||
_ = machi_flu1:wedge_myself(S#state.flu_name, CurrentEpochID),
|
|
||||||
{{error, wedged}, S#state{epoch_id=CurrentEpochID}}
|
|
||||||
end;
|
|
||||||
true ->
|
|
||||||
#state{namespace_version=MyNSVersion, namespace=MyNS} = S,
|
|
||||||
if NSVersion /= MyNSVersion ->
|
|
||||||
{{error, bad_epoch}, S};
|
|
||||||
NS /= MyNS ->
|
|
||||||
{{error, bad_arg}, S};
|
|
||||||
true ->
|
|
||||||
do_pb_ll_request3(CMD, S)
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
lookup_epoch(#state{epoch_tab=T}) ->
|
|
||||||
%% TODO: race in shutdown to access ets table after owner dies
|
|
||||||
ets:lookup_element(T, epoch, 2).
|
|
||||||
|
|
||||||
%% Witness status does not matter below.
|
|
||||||
do_pb_ll_request3({low_echo, Msg}, S) ->
|
|
||||||
{Msg, S};
|
|
||||||
do_pb_ll_request3({low_auth, _User, _Pass}, S) ->
|
|
||||||
{-6, S};
|
|
||||||
do_pb_ll_request3({low_wedge_status}, S) ->
|
|
||||||
{do_server_wedge_status(S), S};
|
|
||||||
do_pb_ll_request3({low_proj, PCMD}, S) ->
|
|
||||||
{do_server_proj_request(PCMD, S), S};
|
|
||||||
|
|
||||||
%% Witness status *matters* below
|
|
||||||
do_pb_ll_request3({low_append_chunk, NSVersion, NS, EpochID, NSLocator,
|
|
||||||
Prefix, Chunk, CSum_tag,
|
|
||||||
CSum, Opts},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
NSInfo = #ns_info{version=NSVersion, name=NS, locator=NSLocator},
|
|
||||||
{do_server_append_chunk(NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, CSum_tag, CSum,
|
|
||||||
Opts, S), S};
|
|
||||||
do_pb_ll_request3({low_write_chunk, _NSVersion, _NS, _EpochID, File, Offset, Chunk, CSum_tag,
|
|
||||||
CSum},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, S), S};
|
|
||||||
do_pb_ll_request3({low_read_chunk, _NSVersion, _NS, _EpochID, File, Offset, Size, Opts},
|
|
||||||
#state{witness=false} = S) ->
|
|
||||||
{do_server_read_chunk(File, Offset, Size, Opts, S), S};
|
|
||||||
do_pb_ll_request3({low_trim_chunk, _NSVersion, _NS, _EpochID, File, Offset, Size, TriggerGC},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_trim_chunk(File, Offset, Size, TriggerGC, S), S};
|
|
||||||
do_pb_ll_request3({low_checksum_list, File},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_checksum_listing(File, S), S};
|
|
||||||
do_pb_ll_request3({low_list_files, _EpochID},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_list_files(S), S};
|
|
||||||
do_pb_ll_request3({low_delete_migration, _EpochID, File},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_delete_migration(File, S),
|
|
||||||
#state{witness=false}=S};
|
|
||||||
do_pb_ll_request3({low_trunc_hack, _EpochID, File},
|
|
||||||
#state{witness=false}=S) ->
|
|
||||||
{do_server_trunc_hack(File, S), S};
|
|
||||||
|
|
||||||
do_pb_ll_request3(_, #state{witness=true}=S) ->
|
|
||||||
{{error, bad_arg}, S}. % TODO: new status code??
|
|
||||||
|
|
||||||
do_server_proj_request({get_latest_epochid, ProjType},
|
|
||||||
#state{proj_store=ProjStore}) ->
|
|
||||||
machi_projection_store:get_latest_epochid(ProjStore, ProjType);
|
|
||||||
do_server_proj_request({read_latest_projection, ProjType},
|
|
||||||
#state{proj_store=ProjStore}) ->
|
|
||||||
machi_projection_store:read_latest_projection(ProjStore, ProjType);
|
|
||||||
do_server_proj_request({read_projection, ProjType, Epoch},
|
|
||||||
#state{proj_store=ProjStore}) ->
|
|
||||||
machi_projection_store:read(ProjStore, ProjType, Epoch);
|
|
||||||
do_server_proj_request({write_projection, ProjType, Proj},
|
|
||||||
#state{flu_name=FluName, proj_store=ProjStore}) ->
|
|
||||||
if Proj#projection_v1.epoch_number == ?SPAM_PROJ_EPOCH ->
|
|
||||||
%% io:format(user, "DBG ~s ~w ~P\n", [?MODULE, ?LINE, Proj, 5]),
|
|
||||||
Chmgr = machi_flu_psup:make_fitness_regname(FluName),
|
|
||||||
[Map] = Proj#projection_v1.dbg,
|
|
||||||
catch machi_fitness:send_fitness_update_spam(
|
|
||||||
Chmgr, Proj#projection_v1.author_server, Map);
|
|
||||||
true ->
|
|
||||||
catch machi_projection_store:write(ProjStore, ProjType, Proj)
|
|
||||||
end;
|
|
||||||
do_server_proj_request({get_all_projections, ProjType},
|
|
||||||
#state{proj_store=ProjStore}) ->
|
|
||||||
machi_projection_store:get_all_projections(ProjStore, ProjType);
|
|
||||||
do_server_proj_request({list_all_projections, ProjType},
|
|
||||||
#state{proj_store=ProjStore}) ->
|
|
||||||
machi_projection_store:list_all_projections(ProjStore, ProjType);
|
|
||||||
do_server_proj_request({kick_projection_reaction},
|
|
||||||
#state{flu_name=FluName}) ->
|
|
||||||
%% Tell my chain manager that it might want to react to
|
|
||||||
%% this new world.
|
|
||||||
Chmgr = machi_chain_manager1:make_chmgr_regname(FluName),
|
|
||||||
spawn(fun() ->
|
|
||||||
catch machi_chain_manager1:trigger_react_to_env(Chmgr)
|
|
||||||
end),
|
|
||||||
async_no_response.
|
|
||||||
|
|
||||||
do_server_append_chunk(NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, CSum_tag, CSum,
|
|
||||||
Opts, S) ->
|
|
||||||
case sanitize_prefix(Prefix) of
|
|
||||||
ok ->
|
|
||||||
do_server_append_chunk2(NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, CSum_tag, CSum,
|
|
||||||
Opts, S);
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_append_chunk2(NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, CSum_tag, Client_CSum,
|
|
||||||
Opts, #state{flu_name=FluName,
|
|
||||||
epoch_id=EpochID}=_S) ->
|
|
||||||
%% TODO: Do anything with PKey?
|
|
||||||
try
|
|
||||||
TaggedCSum = check_or_make_tagged_checksum(CSum_tag, Client_CSum,Chunk),
|
|
||||||
R = {seq_append, self(), NSInfo, EpochID,
|
|
||||||
Prefix, Chunk, TaggedCSum, Opts},
|
|
||||||
case gen_server:call(FluName, R, 10*1000) of
|
|
||||||
{assignment, Offset, File} ->
|
|
||||||
Size = iolist_size(Chunk),
|
|
||||||
{ok, {Offset, Size, File}};
|
|
||||||
witness ->
|
|
||||||
{error, bad_arg};
|
|
||||||
wedged ->
|
|
||||||
{error, wedged};
|
|
||||||
{error, timeout} ->
|
|
||||||
{error, partition}
|
|
||||||
end
|
|
||||||
catch
|
|
||||||
throw:{bad_csum, _CS} ->
|
|
||||||
{error, bad_checksum};
|
|
||||||
error:badarg ->
|
|
||||||
lager:error("badarg at ~w:do_server_append_chunk2:~w ~w",
|
|
||||||
[?MODULE, ?LINE, erlang:get_stacktrace()]),
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, #state{flu_name=FluName}) ->
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
Meta = [{client_csum_tag, CSum_tag}, {client_csum, CSum}],
|
|
||||||
machi_file_proxy:write(Pid, Offset, Meta, Chunk);
|
|
||||||
{error, trimmed} = Error ->
|
|
||||||
Error
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_read_chunk(File, Offset, Size, Opts, #state{flu_name=FluName})->
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
case machi_file_proxy:read(Pid, Offset, Size, Opts) of
|
|
||||||
%% XXX FIXME
|
|
||||||
%% For now we are omiting the checksum data because it blows up
|
|
||||||
%% protobufs.
|
|
||||||
{ok, ChunksAndTrimmed} -> {ok, ChunksAndTrimmed};
|
|
||||||
Other -> Other
|
|
||||||
end;
|
|
||||||
{error, trimmed} = Error ->
|
|
||||||
Error
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_trim_chunk(File, Offset, Size, TriggerGC, #state{flu_name=FluName}) ->
|
|
||||||
lager:debug("Hi there! I'm trimming this: ~s, (~p, ~p), ~p~n",
|
|
||||||
[File, Offset, Size, TriggerGC]),
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
machi_file_proxy:trim(Pid, Offset, Size, TriggerGC);
|
|
||||||
{error, trimmed} = Trimmed ->
|
|
||||||
%% Should be returned back to (maybe) trigger repair
|
|
||||||
Trimmed
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_checksum_listing(File, #state{flu_name=FluName, data_dir=DataDir}=_S) ->
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
|
|
||||||
{ok, Pid} ->
|
|
||||||
{ok, List} = machi_file_proxy:checksum_list(Pid),
|
|
||||||
Bin = erlang:term_to_binary(List),
|
|
||||||
if byte_size(Bin) > (?PB_MAX_MSG_SIZE - 1024) ->
|
|
||||||
%% TODO: Fix this limitation by streaming the
|
|
||||||
%% binary in multiple smaller PB messages.
|
|
||||||
%% Also, don't read the file all at once. ^_^
|
|
||||||
error_logger:error_msg("~s:~w oversize ~s\n",
|
|
||||||
[?MODULE, ?LINE, DataDir]),
|
|
||||||
{error, bad_arg};
|
|
||||||
true ->
|
|
||||||
{ok, Bin}
|
|
||||||
end;
|
|
||||||
{error, trimmed} ->
|
|
||||||
{error, trimmed}
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_list_files(#state{data_dir=DataDir}=_S) ->
|
|
||||||
{_, WildPath} = machi_util:make_data_filename(DataDir, ""),
|
|
||||||
Files = filelib:wildcard("*", WildPath),
|
|
||||||
{ok, [begin
|
|
||||||
{ok, FI} = file:read_file_info(WildPath ++ "/" ++ File),
|
|
||||||
Size = FI#file_info.size,
|
|
||||||
{Size, File}
|
|
||||||
end || File <- Files]}.
|
|
||||||
|
|
||||||
do_server_wedge_status(#state{namespace_version=NSVersion, namespace=NS}=S) ->
|
|
||||||
{Wedged_p, CurrentEpochID0} = lookup_epoch(S),
|
|
||||||
CurrentEpochID = if CurrentEpochID0 == undefined ->
|
|
||||||
?DUMMY_PV1_EPOCH;
|
|
||||||
true ->
|
|
||||||
CurrentEpochID0
|
|
||||||
end,
|
|
||||||
{Wedged_p, CurrentEpochID, NSVersion, NS}.
|
|
||||||
|
|
||||||
do_server_delete_migration(File, #state{data_dir=DataDir}=_S) ->
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
{_, Path} = machi_util:make_data_filename(DataDir, File),
|
|
||||||
case file:delete(Path) of
|
|
||||||
ok ->
|
|
||||||
ok;
|
|
||||||
{error, enoent} ->
|
|
||||||
{error, no_such_file};
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_server_trunc_hack(File, #state{data_dir=DataDir}=_S) ->
|
|
||||||
case sanitize_file_string(File) of
|
|
||||||
ok ->
|
|
||||||
{_, Path} = machi_util:make_data_filename(DataDir, File),
|
|
||||||
case file:open(Path, [read, write, binary, raw]) of
|
|
||||||
{ok, FH} ->
|
|
||||||
try
|
|
||||||
{ok, ?MINIMUM_OFFSET} = file:position(FH,
|
|
||||||
?MINIMUM_OFFSET),
|
|
||||||
ok = file:truncate(FH),
|
|
||||||
ok
|
|
||||||
after
|
|
||||||
file:close(FH)
|
|
||||||
end;
|
|
||||||
{error, enoent} ->
|
|
||||||
{error, no_such_file};
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
{error, bad_arg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
sanitize_file_string(Str) ->
|
|
||||||
case has_no_prohibited_chars(Str) andalso machi_util:is_valid_filename(Str) of
|
|
||||||
true -> ok;
|
|
||||||
false -> error
|
|
||||||
end.
|
|
||||||
|
|
||||||
has_no_prohibited_chars(Str) ->
|
|
||||||
case re:run(Str, "/") of
|
|
||||||
nomatch ->
|
|
||||||
true;
|
|
||||||
_ ->
|
|
||||||
true
|
|
||||||
end.
|
|
||||||
|
|
||||||
sanitize_prefix(Prefix) ->
|
|
||||||
%% We are using '^' as our component delimiter
|
|
||||||
case re:run(Prefix, "/|\\^") of
|
|
||||||
nomatch ->
|
|
||||||
ok;
|
|
||||||
_ ->
|
|
||||||
error
|
|
||||||
end.
|
|
||||||
|
|
||||||
check_or_make_tagged_checksum(?CSUM_TAG_NONE, _Client_CSum, Chunk) ->
|
|
||||||
%% TODO: If the client was foolish enough to use
|
|
||||||
%% this type of non-checksum, then the client gets
|
|
||||||
%% what it deserves wrt data integrity, alas. In
|
|
||||||
%% the client-side Chain Replication method, each
|
|
||||||
%% server will calculated this independently, which
|
|
||||||
%% isn't exactly what ought to happen for best data
|
|
||||||
%% integrity checking. In server-side CR, the csum
|
|
||||||
%% should be calculated by the head and passed down
|
|
||||||
%% the chain together with the value.
|
|
||||||
CS = machi_util:checksum_chunk(Chunk),
|
|
||||||
machi_util:make_tagged_csum(server_sha, CS);
|
|
||||||
check_or_make_tagged_checksum(?CSUM_TAG_CLIENT_SHA, Client_CSum, Chunk) ->
|
|
||||||
CS = machi_util:checksum_chunk(Chunk),
|
|
||||||
if CS == Client_CSum ->
|
|
||||||
machi_util:make_tagged_csum(server_sha,
|
|
||||||
Client_CSum);
|
|
||||||
true ->
|
|
||||||
throw({bad_csum, CS})
|
|
||||||
end.
|
|
||||||
|
|
||||||
%%%% High PB mode %%%%
|
|
||||||
|
|
||||||
do_pb_hl_request(#mpb_request{req_id=ReqID}, #state{pb_mode=low}=S) ->
|
|
||||||
Result = {low_error, 41, "High protocol request while in low mode"},
|
|
||||||
{machi_pb_translate:to_pb_response(ReqID, unused, Result), S};
|
|
||||||
do_pb_hl_request(PB_request, S) ->
|
|
||||||
{ReqID, Cmd} = machi_pb_translate:from_pb_request(PB_request),
|
|
||||||
{Result, S2} = do_pb_hl_request2(Cmd, S),
|
|
||||||
{machi_pb_translate:to_pb_response(ReqID, Cmd, Result), S2}.
|
|
||||||
|
|
||||||
do_pb_hl_request2({high_echo, Msg}, S) ->
|
|
||||||
{Msg, S};
|
|
||||||
do_pb_hl_request2({high_auth, _User, _Pass}, S) ->
|
|
||||||
{-77, S};
|
|
||||||
do_pb_hl_request2({high_append_chunk=Op, NS, Prefix, Chunk, TaggedCSum, Opts},
|
|
||||||
#state{high_clnt=Clnt}=S) ->
|
|
||||||
NSInfo = #ns_info{name=NS}, % TODO populate other fields
|
|
||||||
todo_perhaps_remind_ns_locator_not_chosen(Op),
|
|
||||||
Res = machi_cr_client:append_chunk(Clnt, NSInfo,
|
|
||||||
Prefix, Chunk, TaggedCSum, Opts),
|
|
||||||
{Res, S};
|
|
||||||
do_pb_hl_request2({high_write_chunk=Op, File, Offset, Chunk, CSum},
|
|
||||||
#state{high_clnt=Clnt}=S) ->
|
|
||||||
NSInfo = undefined,
|
|
||||||
todo_perhaps_remind_ns_locator_not_chosen(Op),
|
|
||||||
Res = machi_cr_client:write_chunk(Clnt, NSInfo, File, Offset, Chunk, CSum),
|
|
||||||
{Res, S};
|
|
||||||
do_pb_hl_request2({high_read_chunk=Op, File, Offset, Size, Opts},
|
|
||||||
#state{high_clnt=Clnt}=S) ->
|
|
||||||
NSInfo = undefined,
|
|
||||||
todo_perhaps_remind_ns_locator_not_chosen(Op),
|
|
||||||
Res = machi_cr_client:read_chunk(Clnt, NSInfo, File, Offset, Size, Opts),
|
|
||||||
{Res, S};
|
|
||||||
do_pb_hl_request2({high_trim_chunk=Op, File, Offset, Size},
|
|
||||||
#state{high_clnt=Clnt}=S) ->
|
|
||||||
NSInfo = undefined,
|
|
||||||
todo_perhaps_remind_ns_locator_not_chosen(Op),
|
|
||||||
Res = machi_cr_client:trim_chunk(Clnt, NSInfo, File, Offset, Size),
|
|
||||||
{Res, S};
|
|
||||||
do_pb_hl_request2({high_checksum_list, File}, #state{high_clnt=Clnt}=S) ->
|
|
||||||
Res = machi_cr_client:checksum_list(Clnt, File),
|
|
||||||
{Res, S};
|
|
||||||
do_pb_hl_request2({high_list_files}, #state{high_clnt=Clnt}=S) ->
|
|
||||||
Res = machi_cr_client:list_files(Clnt),
|
|
||||||
{Res, S}.
|
|
||||||
|
|
||||||
make_high_clnt(#state{high_clnt=undefined}=S) ->
|
|
||||||
{ok, Proj} = machi_projection_store:read_latest_projection(
|
|
||||||
S#state.proj_store, private),
|
|
||||||
Ps = [P_srvr || {_, P_srvr} <- orddict:to_list(
|
|
||||||
Proj#projection_v1.members_dict)],
|
|
||||||
{ok, Clnt} = machi_cr_client:start_link(Ps),
|
|
||||||
S#state{high_clnt=Clnt};
|
|
||||||
make_high_clnt(S) ->
|
|
||||||
S.
|
|
||||||
|
|
||||||
todo_perhaps_remind_ns_locator_not_chosen(Op) ->
|
|
||||||
Key = {?MODULE, Op},
|
|
||||||
case get(Key) of
|
|
||||||
undefined ->
|
|
||||||
io:format(user, "TODO op ~w is using default locator value\n",
|
|
||||||
[Op]),
|
|
||||||
put(Key, true);
|
|
||||||
_ ->
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
|
@ -1,118 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc A supervisor to hold dynamic processes inside single
|
|
||||||
%% FLU service, ranch listener and append server.
|
|
||||||
|
|
||||||
%% TODO: This supervisor is maybe useless. First introduced for
|
|
||||||
%% workaround to start listener dynamically in flu1 initialization
|
|
||||||
%% phase. Because `machi_flu_psup' is being blocked in flu1
|
|
||||||
%% initialization time, adding a child to the supervisor leads to
|
|
||||||
%% deadlock. If initialization can be done only by static arguments,
|
|
||||||
%% then this supervisor should be removed and added as a direct child
|
|
||||||
%% of `machi_flu_psup'.
|
|
||||||
|
|
||||||
-module(machi_flu1_subsup).
|
|
||||||
-behaviour(supervisor).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
-export([start_link/1,
|
|
||||||
start_append_server/4,
|
|
||||||
stop_append_server/1,
|
|
||||||
start_listener/7,
|
|
||||||
stop_listener/1,
|
|
||||||
subsup_name/1,
|
|
||||||
listener_name/1]).
|
|
||||||
|
|
||||||
%% supervisor callback
|
|
||||||
-export([init/1]).
|
|
||||||
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-define(SHUTDOWN, 5000).
|
|
||||||
-define(BACKLOG, 8192).
|
|
||||||
|
|
||||||
-spec start_link(pv1_server()) -> {ok, pid()}.
|
|
||||||
start_link(FluName) ->
|
|
||||||
supervisor:start_link({local, subsup_name(FluName)}, ?MODULE, []).
|
|
||||||
|
|
||||||
-spec start_append_server(pv1_server(), boolean(), boolean(),
|
|
||||||
undefined | machi_dt:epoch_id()) ->
|
|
||||||
{ok, pid()}.
|
|
||||||
start_append_server(FluName, Witness_p, Wedged_p, EpochId) ->
|
|
||||||
supervisor:start_child(subsup_name(FluName),
|
|
||||||
append_server_spec(FluName, Witness_p, Wedged_p, EpochId)).
|
|
||||||
|
|
||||||
-spec stop_append_server(pv1_server()) -> ok.
|
|
||||||
stop_append_server(FluName) ->
|
|
||||||
SubSup = listener_name(FluName),
|
|
||||||
ok = supervisor:terminate_child(SubSup, FluName),
|
|
||||||
ok = supervisor:delete_child(SubSup, FluName).
|
|
||||||
|
|
||||||
-spec start_listener(pv1_server(), inet:port_number(), boolean(),
|
|
||||||
string(), ets:tab(), atom() | pid(),
|
|
||||||
proplists:proplist()) -> {ok, pid()}.
|
|
||||||
start_listener(FluName, TcpPort, Witness, DataDir, EpochTab, ProjStore,
|
|
||||||
Props) ->
|
|
||||||
supervisor:start_child(subsup_name(FluName),
|
|
||||||
listener_spec(FluName, TcpPort, Witness, DataDir,
|
|
||||||
EpochTab, ProjStore, Props)).
|
|
||||||
|
|
||||||
-spec stop_listener(pv1_server()) -> ok.
|
|
||||||
stop_listener(FluName) ->
|
|
||||||
SupName = subsup_name(FluName),
|
|
||||||
ListenerName = listener_name(FluName),
|
|
||||||
ok = supervisor:terminate_child(SupName, ListenerName),
|
|
||||||
ok = supervisor:delete_child(SupName, ListenerName).
|
|
||||||
|
|
||||||
-spec subsup_name(pv1_server()) -> atom().
|
|
||||||
subsup_name(FluName) when is_atom(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_flu1_subsup").
|
|
||||||
|
|
||||||
-spec listener_name(pv1_server()) -> atom().
|
|
||||||
listener_name(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_listener").
|
|
||||||
|
|
||||||
%% Supervisor callback
|
|
||||||
|
|
||||||
init([]) ->
|
|
||||||
SupFlags = {one_for_all, 1000, 10},
|
|
||||||
{ok, {SupFlags, []}}.
|
|
||||||
|
|
||||||
%% private
|
|
||||||
|
|
||||||
-spec listener_spec(pv1_server(), inet:port_number(), boolean(),
|
|
||||||
string(), ets:tab(), atom() | pid(),
|
|
||||||
proplists:proplist()) -> supervisor:child_spec().
|
|
||||||
listener_spec(FluName, TcpPort, Witness, DataDir, EpochTab, ProjStore, Props) ->
|
|
||||||
ListenerName = listener_name(FluName),
|
|
||||||
NbAcceptors = 10,
|
|
||||||
TcpOpts = [{port, TcpPort}, {backlog, ?BACKLOG}],
|
|
||||||
NetServerOpts = [FluName, Witness, DataDir, EpochTab, ProjStore, Props],
|
|
||||||
ranch:child_spec(ListenerName, NbAcceptors,
|
|
||||||
ranch_tcp, TcpOpts,
|
|
||||||
machi_flu1_net_server, NetServerOpts).
|
|
||||||
|
|
||||||
-spec append_server_spec(pv1_server(), boolean(), boolean(),
|
|
||||||
undefined | machi_dt:epoch_id()) -> supervisor:child_spec().
|
|
||||||
append_server_spec(FluName, Witness_p, Wedged_p, EpochId) ->
|
|
||||||
{FluName, {machi_flu1_append_server, start_link,
|
|
||||||
[FluName, Witness_p, Wedged_p, EpochId]},
|
|
||||||
permanent, ?SHUTDOWN, worker, [machi_flu1_append_server]}.
|
|
|
@ -1,232 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% @doc This process is responsible for managing filenames assigned to
|
|
||||||
%% prefixes. It's started out of `machi_flu_psup'.
|
|
||||||
%%
|
|
||||||
%% Supported operations include finding the "current" filename assigned to
|
|
||||||
%% a prefix. Incrementing the sequence number and returning a new file name
|
|
||||||
%% and listing all data files assigned to a given prefix.
|
|
||||||
%%
|
|
||||||
%% All prefixes should have the form of `{prefix, P}'. Single filename
|
|
||||||
%% return values have the form of `{file, F}'.
|
|
||||||
%%
|
|
||||||
%% <h2>Finding the current file associated with a sequence</h2>
|
|
||||||
%% First it looks up the sequence number from the prefix name. If
|
|
||||||
%% no sequence file is found, it uses 0 as the sequence number and searches
|
|
||||||
%% for a matching file with the prefix and 0 as the sequence number.
|
|
||||||
%% If no file is found, the it generates a new filename by incorporating
|
|
||||||
%% the given prefix, a randomly generated (v4) UUID and 0 as the
|
|
||||||
%% sequence number.
|
|
||||||
%%
|
|
||||||
%% If the sequence number is > 0, then the process scans the filesystem
|
|
||||||
%% looking for a filename which matches the prefix and given sequence number and
|
|
||||||
%% returns that.
|
|
||||||
|
|
||||||
-module(machi_flu_filename_mgr).
|
|
||||||
-behavior(gen_server).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-compile(export_all).
|
|
||||||
-endif.
|
|
||||||
|
|
||||||
-export([
|
|
||||||
child_spec/2,
|
|
||||||
start_link/2,
|
|
||||||
find_or_make_filename_from_prefix/4,
|
|
||||||
increment_prefix_sequence/3,
|
|
||||||
list_files_by_prefix/2
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
-export([
|
|
||||||
init/1,
|
|
||||||
handle_cast/2,
|
|
||||||
handle_call/3,
|
|
||||||
handle_info/2,
|
|
||||||
terminate/2,
|
|
||||||
code_change/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
-define(TIMEOUT, 10 * 1000).
|
|
||||||
-include("machi.hrl"). %% included for #ns_info record
|
|
||||||
-include("machi_projection.hrl"). %% included for pv1_epoch type
|
|
||||||
|
|
||||||
-record(state, {fluname :: atom(),
|
|
||||||
tid :: ets:tid(),
|
|
||||||
datadir :: string(),
|
|
||||||
epoch :: pv1_epoch()
|
|
||||||
}).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
|
|
||||||
child_spec(FluName, DataDir) ->
|
|
||||||
Name = make_filename_mgr_name(FluName),
|
|
||||||
{Name,
|
|
||||||
{?MODULE, start_link, [FluName, DataDir]},
|
|
||||||
permanent, 5000, worker, [?MODULE]}.
|
|
||||||
|
|
||||||
start_link(FluName, DataDir) when is_atom(FluName) andalso is_list(DataDir) ->
|
|
||||||
N = make_filename_mgr_name(FluName),
|
|
||||||
gen_server:start_link({local, N}, ?MODULE, [FluName, DataDir], []).
|
|
||||||
|
|
||||||
-spec find_or_make_filename_from_prefix( FluName :: atom(),
|
|
||||||
EpochId :: pv1_epoch(),
|
|
||||||
Prefix :: {prefix, string()},
|
|
||||||
machi_dt:ns_info()) ->
|
|
||||||
{file, Filename :: string()} | {error, Reason :: term() } | timeout.
|
|
||||||
% @doc Find the latest available or make a filename from a prefix. A prefix
|
|
||||||
% should be in the form of a tagged tuple `{prefix, P}'. Returns a tagged
|
|
||||||
% tuple in the form of `{file, F}' or an `{error, Reason}'
|
|
||||||
find_or_make_filename_from_prefix(FluName, EpochId,
|
|
||||||
{prefix, Prefix},
|
|
||||||
#ns_info{}=NSInfo)
|
|
||||||
when is_atom(FluName) ->
|
|
||||||
N = make_filename_mgr_name(FluName),
|
|
||||||
gen_server:call(N, {find_filename, FluName, EpochId, NSInfo, Prefix}, ?TIMEOUT);
|
|
||||||
find_or_make_filename_from_prefix(_FluName, _EpochId, Other, Other2) ->
|
|
||||||
lager:error("~p is not a valid prefix/locator ~p", [Other, Other2]),
|
|
||||||
error(badarg).
|
|
||||||
|
|
||||||
-spec increment_prefix_sequence( FluName :: atom(), NSInfo :: machi_dt:ns_info(), Prefix :: {prefix, string()} ) ->
|
|
||||||
ok | {error, Reason :: term() } | timeout.
|
|
||||||
% @doc Increment the sequence counter for a given prefix. Prefix should
|
|
||||||
% be in the form of `{prefix, P}'.
|
|
||||||
increment_prefix_sequence(FluName, #ns_info{}=NSInfo, {prefix, Prefix}) when is_atom(FluName) ->
|
|
||||||
gen_server:call(make_filename_mgr_name(FluName), {increment_sequence, NSInfo, Prefix}, ?TIMEOUT);
|
|
||||||
increment_prefix_sequence(_FluName, _NSInfo, Other) ->
|
|
||||||
lager:error("~p is not a valid prefix.", [Other]),
|
|
||||||
error(badarg).
|
|
||||||
|
|
||||||
-spec list_files_by_prefix( FluName :: atom(), Prefix :: {prefix, string()} ) ->
|
|
||||||
[ file:name() ] | timeout | {error, Reason :: term() }.
|
|
||||||
% @doc Given a prefix in the form of `{prefix, P}' return
|
|
||||||
% all the data files associated with that prefix. Returns
|
|
||||||
% a list.
|
|
||||||
list_files_by_prefix(FluName, {prefix, Prefix}) when is_atom(FluName) ->
|
|
||||||
gen_server:call(make_filename_mgr_name(FluName), {list_files, Prefix}, ?TIMEOUT);
|
|
||||||
list_files_by_prefix(_FluName, Other) ->
|
|
||||||
lager:error("~p is not a valid prefix.", [Other]),
|
|
||||||
error(badarg).
|
|
||||||
|
|
||||||
%% gen_server API
|
|
||||||
init([FluName, DataDir]) ->
|
|
||||||
Tid = ets:new(make_filename_mgr_name(FluName), [named_table, {read_concurrency, true}]),
|
|
||||||
{ok, #state{fluname = FluName,
|
|
||||||
epoch = ?DUMMY_PV1_EPOCH,
|
|
||||||
datadir = DataDir,
|
|
||||||
tid = Tid}}.
|
|
||||||
|
|
||||||
handle_cast(Req, State) ->
|
|
||||||
lager:warning("Got unknown cast ~p", [Req]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
%% Important assumption: by the time we reach here the EpochId is kosher.
|
|
||||||
%% the FLU has already validated that the caller's epoch id and the FLU's epoch id
|
|
||||||
%% are the same. So we *assume* that remains the case here - that is to say, we
|
|
||||||
%% are not wedged.
|
|
||||||
handle_call({find_filename, FluName, EpochId, NSInfo, Prefix}, _From,
|
|
||||||
S = #state{ datadir = DataDir, epoch = EpochId, tid = Tid }) ->
|
|
||||||
%% Our state and the caller's epoch ids are the same. Business as usual.
|
|
||||||
File = handle_find_file(FluName, Tid, NSInfo, Prefix, DataDir),
|
|
||||||
{reply, {file, File}, S};
|
|
||||||
|
|
||||||
handle_call({find_filename, _FluName, EpochId, NSInfo, Prefix}, _From, S = #state{ datadir = DataDir, tid = Tid }) ->
|
|
||||||
%% If the epoch id in our state and the caller's epoch id were the same, it would've
|
|
||||||
%% matched the above clause. Since we're here, we know that they are different.
|
|
||||||
%% If epoch ids between our state and the caller's are different, we must increment the
|
|
||||||
%% sequence number, generate a filename and then cache it.
|
|
||||||
File = increment_and_cache_filename(Tid, DataDir, NSInfo, Prefix),
|
|
||||||
{reply, {file, File}, S#state{epoch = EpochId}};
|
|
||||||
|
|
||||||
handle_call({increment_sequence, #ns_info{name=NS, locator=NSLocator}, Prefix}, _From, S = #state{ datadir = DataDir, tid=Tid }) ->
|
|
||||||
NSInfo = #ns_info{name=NS, locator=NSLocator},
|
|
||||||
_File = increment_and_cache_filename(Tid, DataDir, NSInfo, Prefix),
|
|
||||||
{reply, ok, S};
|
|
||||||
handle_call({list_files, Prefix}, From, S = #state{ datadir = DataDir }) ->
|
|
||||||
spawn(fun() ->
|
|
||||||
L = list_files(DataDir, Prefix),
|
|
||||||
gen_server:reply(From, L)
|
|
||||||
end),
|
|
||||||
{noreply, S};
|
|
||||||
|
|
||||||
handle_call(Req, From, State) ->
|
|
||||||
lager:warning("Got unknown call ~p from ~p", [Req, From]),
|
|
||||||
{reply, hoge, State}.
|
|
||||||
|
|
||||||
handle_info(Info, State) ->
|
|
||||||
lager:warning("Got unknown info ~p", [Info]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
terminate(Reason, _State) ->
|
|
||||||
lager:info("Shutting down because ~p", [Reason]),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
code_change(_OldVsn, State, _Extra) ->
|
|
||||||
{ok, State}.
|
|
||||||
|
|
||||||
%% private
|
|
||||||
|
|
||||||
%% Quoted from https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
|
|
||||||
%% MIT License
|
|
||||||
generate_uuid_v4_str() ->
|
|
||||||
<<A:32, B:16, C:16, D:16, E:48>> = crypto:strong_rand_bytes(16),
|
|
||||||
io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b",
|
|
||||||
[A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]).
|
|
||||||
|
|
||||||
list_files(DataDir, Prefix) ->
|
|
||||||
{F_bin, Path} = machi_util:make_data_filename(DataDir, "*^" ++ Prefix ++ "^*"),
|
|
||||||
filelib:wildcard(binary_to_list(F_bin), filename:dirname(Path)).
|
|
||||||
|
|
||||||
make_filename_mgr_name(FluName) when is_atom(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_filename_mgr").
|
|
||||||
|
|
||||||
handle_find_file(_FluName, Tid, #ns_info{name=NS, locator=NSLocator}, Prefix, DataDir) ->
|
|
||||||
case ets:lookup(Tid, {NS, NSLocator, Prefix}) of
|
|
||||||
[] ->
|
|
||||||
N = machi_util:read_max_filenum(DataDir, NS, NSLocator, Prefix),
|
|
||||||
F = generate_filename(DataDir, NS, NSLocator, Prefix, N),
|
|
||||||
true = ets:insert(Tid, {{NS, NSLocator, Prefix}, F}),
|
|
||||||
F;
|
|
||||||
[{_Key, File}] ->
|
|
||||||
File
|
|
||||||
end.
|
|
||||||
|
|
||||||
generate_filename(DataDir, NS, NSLocator, Prefix, N) ->
|
|
||||||
{F, _Q} = machi_util:make_data_filename(
|
|
||||||
DataDir,
|
|
||||||
NS, NSLocator, Prefix,
|
|
||||||
generate_uuid_v4_str(),
|
|
||||||
N),
|
|
||||||
binary_to_list(F).
|
|
||||||
|
|
||||||
increment_and_cache_filename(Tid, DataDir, #ns_info{name=NS,locator=NSLocator}, Prefix) ->
|
|
||||||
ok = machi_util:increment_max_filenum(DataDir, NS, NSLocator, Prefix),
|
|
||||||
N = machi_util:read_max_filenum(DataDir, NS, NSLocator, Prefix),
|
|
||||||
F = generate_filename(DataDir, NS, NSLocator, Prefix, N),
|
|
||||||
true = ets:insert(Tid, {{NS, NSLocator, Prefix}, F}),
|
|
||||||
F.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
-endif.
|
|
|
@ -1,297 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc This is a metadata service for the machi FLU which currently
|
|
||||||
%% tracks the mappings between filenames and file proxies.
|
|
||||||
%%
|
|
||||||
%% The service takes a given hash space and spreads it out over a
|
|
||||||
%% pool of N processes which are responsible for 1/Nth the hash
|
|
||||||
%% space. When a user requests an operation on a particular file
|
|
||||||
%% the filename is hashed into the hash space and the request
|
|
||||||
%% forwarded to a particular manager responsible for that slice
|
|
||||||
%% of the hash space.
|
|
||||||
%%
|
|
||||||
%% The current hash implementation is `erlang:phash2/1' which has
|
|
||||||
%% a range between 0..2^27-1 or 134,217,727.
|
|
||||||
|
|
||||||
-module(machi_flu_metadata_mgr).
|
|
||||||
-behaviour(gen_server).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
-define(MAX_MGRS, 10). %% number of managers to start by default.
|
|
||||||
-define(HASH(X), erlang:phash2(X)). %% hash algorithm to use
|
|
||||||
-define(TIMEOUT, 10 * 1000). %% 10 second timeout
|
|
||||||
|
|
||||||
-define(KNOWN_FILES_LIST_PREFIX, "known_files_").
|
|
||||||
|
|
||||||
-record(state, {fluname :: atom(),
|
|
||||||
datadir :: string(),
|
|
||||||
tid :: ets:tid(),
|
|
||||||
cnt :: non_neg_integer(),
|
|
||||||
trimmed_files :: machi_plist:plist()
|
|
||||||
}).
|
|
||||||
|
|
||||||
%% This record goes in the ets table where filename is the key
|
|
||||||
-record(md, {filename :: string(),
|
|
||||||
proxy_pid :: undefined|pid(),
|
|
||||||
mref :: undefined|reference() %% monitor ref for file proxy
|
|
||||||
}).
|
|
||||||
|
|
||||||
%% public api
|
|
||||||
-export([
|
|
||||||
child_spec/4,
|
|
||||||
start_link/4,
|
|
||||||
lookup_manager_pid/2,
|
|
||||||
lookup_proxy_pid/2,
|
|
||||||
start_proxy_pid/2,
|
|
||||||
stop_proxy_pid/2,
|
|
||||||
stop_proxy_pid_rollover/2,
|
|
||||||
build_metadata_mgr_name/2,
|
|
||||||
trim_file/2
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
-export([
|
|
||||||
init/1,
|
|
||||||
handle_cast/2,
|
|
||||||
handle_call/3,
|
|
||||||
handle_info/2,
|
|
||||||
terminate/2,
|
|
||||||
code_change/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% Public API
|
|
||||||
build_metadata_mgr_name(FluName, N) when is_atom(FluName) andalso is_integer(N) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_metadata_mgr_" ++ integer_to_list(N)).
|
|
||||||
|
|
||||||
child_spec(FluName, C, DataDir, N) ->
|
|
||||||
Name = build_metadata_mgr_name(FluName, C),
|
|
||||||
{Name,
|
|
||||||
{?MODULE, start_link, [FluName, Name, DataDir, N]},
|
|
||||||
permanent, 5000, worker, [?MODULE]}.
|
|
||||||
|
|
||||||
start_link(FluName, Name, DataDir, Num) when is_atom(Name) andalso is_list(DataDir) ->
|
|
||||||
gen_server:start_link({local, Name}, ?MODULE, [FluName, Name, DataDir, Num], []).
|
|
||||||
|
|
||||||
lookup_manager_pid(FluName, {file, Filename}) ->
|
|
||||||
whereis(get_manager_atom(FluName, Filename)).
|
|
||||||
|
|
||||||
lookup_proxy_pid(FluName, {file, Filename}) ->
|
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {proxy_pid, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
start_proxy_pid(FluName, {file, Filename}) ->
|
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {start_proxy_pid, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
stop_proxy_pid(FluName, {file, Filename}) ->
|
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, false, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
stop_proxy_pid_rollover(FluName, {file, Filename}) ->
|
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, true, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
trim_file(FluName, {file, Filename}) ->
|
|
||||||
gen_server:call(get_manager_atom(FluName, Filename), {trim_file, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
init([FluName, Name, DataDir, Num]) ->
|
|
||||||
%% important: we'll need another persistent storage to
|
|
||||||
%% remember deleted (trimmed) file, to prevent resurrection after
|
|
||||||
%% flu restart and append.
|
|
||||||
FileListFileName =
|
|
||||||
filename:join([DataDir, ?KNOWN_FILES_LIST_PREFIX ++ atom_to_list(FluName)]),
|
|
||||||
{ok, PList} = machi_plist:open(FileListFileName, []),
|
|
||||||
%% TODO make sure all files non-existent, if any remaining files
|
|
||||||
%% here, just delete it. They're in the list *because* they're all
|
|
||||||
%% trimmed.
|
|
||||||
|
|
||||||
Tid = ets:new(Name, [{keypos, 2}, {read_concurrency, true}, {write_concurrency, true}]),
|
|
||||||
{ok, #state{fluname = FluName, datadir = DataDir, tid = Tid, cnt = Num,
|
|
||||||
trimmed_files=PList}}.
|
|
||||||
|
|
||||||
handle_cast(Req, State) ->
|
|
||||||
lager:warning("Got unknown cast ~p", [Req]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
handle_call({proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
|
|
||||||
Reply = case lookup_md(Tid, Filename) of
|
|
||||||
not_found -> undefined;
|
|
||||||
R -> R#md.proxy_pid
|
|
||||||
end,
|
|
||||||
{reply, Reply, State};
|
|
||||||
|
|
||||||
handle_call({start_proxy_pid, Filename}, _From,
|
|
||||||
State = #state{ fluname = N, tid = Tid, datadir = D,
|
|
||||||
trimmed_files=TrimmedFiles}) ->
|
|
||||||
case machi_plist:find(TrimmedFiles, Filename) of
|
|
||||||
false ->
|
|
||||||
NewR = case lookup_md(Tid, Filename) of
|
|
||||||
not_found ->
|
|
||||||
start_file_proxy(N, D, Filename);
|
|
||||||
#md{ proxy_pid = undefined } = R0 ->
|
|
||||||
start_file_proxy(N, D, R0);
|
|
||||||
#md{ proxy_pid = _Pid } = R1 ->
|
|
||||||
R1
|
|
||||||
end,
|
|
||||||
update_ets(Tid, NewR),
|
|
||||||
{reply, {ok, NewR#md.proxy_pid}, State};
|
|
||||||
true ->
|
|
||||||
{reply, {error, trimmed}, State}
|
|
||||||
end;
|
|
||||||
|
|
||||||
handle_call({stop_proxy_pid, Rollover_p, Filename}, _From, State = #state{ tid = Tid }) ->
|
|
||||||
case lookup_md(Tid, Filename) of
|
|
||||||
not_found ->
|
|
||||||
ok;
|
|
||||||
#md{ proxy_pid = undefined } ->
|
|
||||||
ok;
|
|
||||||
#md{ proxy_pid = Pid, mref = M } = R ->
|
|
||||||
demonitor(M, [flush]),
|
|
||||||
if Rollover_p ->
|
|
||||||
do_rollover(Filename, State);
|
|
||||||
true ->
|
|
||||||
machi_file_proxy:stop(Pid),
|
|
||||||
update_ets(Tid, R#md{ proxy_pid = undefined,
|
|
||||||
mref = undefined })
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
{reply, ok, State};
|
|
||||||
|
|
||||||
handle_call({trim_file, Filename}, _,
|
|
||||||
S = #state{trimmed_files = TrimmedFiles }) ->
|
|
||||||
case machi_plist:add(TrimmedFiles, Filename) of
|
|
||||||
{ok, TrimmedFiles2} ->
|
|
||||||
{reply, ok, S#state{trimmed_files=TrimmedFiles2}};
|
|
||||||
Error ->
|
|
||||||
{reply, Error, S}
|
|
||||||
end;
|
|
||||||
|
|
||||||
handle_call(Req, From, State) ->
|
|
||||||
lager:warning("Got unknown call ~p from ~p", [Req, From]),
|
|
||||||
{reply, hoge, State}.
|
|
||||||
|
|
||||||
handle_info({'DOWN', Mref, process, Pid, normal}, State = #state{ tid = Tid }) ->
|
|
||||||
lager:debug("file proxy ~p shutdown normally", [Pid]),
|
|
||||||
clear_ets(Tid, Mref),
|
|
||||||
{noreply, State};
|
|
||||||
|
|
||||||
handle_info({'DOWN', Mref, process, Pid, wedged}, State = #state{ tid = Tid }) ->
|
|
||||||
lager:error("file proxy ~p shutdown because it's wedged", [Pid]),
|
|
||||||
clear_ets(Tid, Mref),
|
|
||||||
{noreply, State};
|
|
||||||
handle_info({'DOWN', _Mref, process, Pid, trimmed}, State = #state{ tid = _Tid }) ->
|
|
||||||
lager:debug("file proxy ~p shutdown because the file was trimmed", [Pid]),
|
|
||||||
{noreply, State};
|
|
||||||
handle_info({'DOWN', Mref, process, Pid, Error}, State = #state{ tid = Tid }) ->
|
|
||||||
lager:error("file proxy ~p shutdown because ~p", [Pid, Error]),
|
|
||||||
clear_ets(Tid, Mref),
|
|
||||||
{noreply, State};
|
|
||||||
|
|
||||||
handle_info(Info, State) ->
|
|
||||||
lager:warning("Got unknown info ~p", [Info]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
terminate(Reason, _State = #state{trimmed_files=TrimmedFiles}) ->
|
|
||||||
lager:info("Shutting down because ~p", [Reason]),
|
|
||||||
machi_plist:close(TrimmedFiles),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
code_change(_OldVsn, State, _Extra) ->
|
|
||||||
{ok, State}.
|
|
||||||
|
|
||||||
%% Private functions
|
|
||||||
|
|
||||||
compute_hash(Data) ->
|
|
||||||
?HASH(Data).
|
|
||||||
|
|
||||||
compute_worker(Hash) ->
|
|
||||||
MgrCount = get_env(metadata_manager_count, ?MAX_MGRS),
|
|
||||||
(Hash rem MgrCount) + 1.
|
|
||||||
%% TODO MARK: Hrm, the intermittent failures of both of the tests
|
|
||||||
%% in machi_cr_client_test.erl were due to this func returning 0.
|
|
||||||
%% But machi_flu_metadata_mgr_sup:init() doesn't make a child with N=0.
|
|
||||||
%%
|
|
||||||
%% The remaining puzzle for me, which I'm now punting to you, sorry,
|
|
||||||
%% is why those two tests would sometimes pass, which implies to me that
|
|
||||||
%% sometimes the code path used by those tests never chooses worker 0
|
|
||||||
%% and occasionally it does choose worker 0.
|
|
||||||
|
|
||||||
get_manager_atom(FluName, Data) ->
|
|
||||||
build_metadata_mgr_name(FluName, compute_worker(compute_hash(Data))).
|
|
||||||
|
|
||||||
lookup_md(Tid, Data) ->
|
|
||||||
case ets:lookup(Tid, Data) of
|
|
||||||
[] -> not_found;
|
|
||||||
[R] -> R
|
|
||||||
end.
|
|
||||||
|
|
||||||
start_file_proxy(FluName, D, R = #md{filename = F} ) ->
|
|
||||||
{ok, Pid} = machi_file_proxy_sup:start_proxy(FluName, D, F),
|
|
||||||
Mref = monitor(process, Pid),
|
|
||||||
R#md{ proxy_pid = Pid, mref = Mref };
|
|
||||||
|
|
||||||
start_file_proxy(FluName, D, Filename) ->
|
|
||||||
start_file_proxy(FluName, D, #md{ filename = Filename }).
|
|
||||||
|
|
||||||
update_ets(Tid, R) ->
|
|
||||||
ets:insert(Tid, R).
|
|
||||||
|
|
||||||
clear_ets(Tid, Mref) ->
|
|
||||||
R = get_md_record_by_mref(Tid, Mref),
|
|
||||||
update_ets(Tid, R#md{ proxy_pid = undefined, mref = undefined }).
|
|
||||||
|
|
||||||
purge_ets(Tid, R) ->
|
|
||||||
true = ets:delete_object(Tid, R).
|
|
||||||
|
|
||||||
get_md_record_by_mref(Tid, Mref) ->
|
|
||||||
[R] = ets:match_object(Tid, {md, '_', '_', Mref}),
|
|
||||||
R.
|
|
||||||
|
|
||||||
get_md_record_by_filename(Tid, Filename) ->
|
|
||||||
[R] = ets:lookup(Tid, Filename),
|
|
||||||
R.
|
|
||||||
|
|
||||||
get_env(Setting, Default) ->
|
|
||||||
case application:get_env(machi, Setting) of
|
|
||||||
undefined -> Default;
|
|
||||||
{ok, V} -> V
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_rollover(Filename, _State = #state{ fluname = FluName,
|
|
||||||
tid = Tid }) ->
|
|
||||||
R = get_md_record_by_filename(Tid, Filename),
|
|
||||||
lager:info("file ~p proxy ~p shutdown because of file rollover",
|
|
||||||
[Filename, R#md.proxy_pid]),
|
|
||||||
{Prefix, NS, NSLocator, _, _} =
|
|
||||||
machi_util:parse_filename(R#md.filename),
|
|
||||||
|
|
||||||
%% We only increment the counter here. The filename will be generated on the
|
|
||||||
%% next append request to that prefix and since the filename will have a new
|
|
||||||
%% sequence number it probably will be associated with a different metadata
|
|
||||||
%% manager. That's why we don't want to generate a new file name immediately
|
|
||||||
%% and use it to start a new file proxy.
|
|
||||||
NSInfo = #ns_info{name=NS, locator=NSLocator},
|
|
||||||
lager:warning("INCR: ~p ~p\n", [FluName, Prefix]),
|
|
||||||
ok = machi_flu_filename_mgr:increment_prefix_sequence(FluName, NSInfo, {prefix, Prefix}),
|
|
||||||
|
|
||||||
%% purge our ets table of this entry completely since it is likely the
|
|
||||||
%% new filename (whenever it comes) will be in a different manager than
|
|
||||||
%% us.
|
|
||||||
purge_ets(Tid, R),
|
|
||||||
ok.
|
|
|
@ -1,59 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc This is the supervisor for the collection of metadata
|
|
||||||
%% managers. It's started out of `machi_flu_psup'. It reads an
|
|
||||||
%% application environment variable named `metadata_manager_count'
|
|
||||||
%% with a default of 10 if it is not set.
|
|
||||||
|
|
||||||
-module(machi_flu_metadata_mgr_sup).
|
|
||||||
-behaviour(supervisor).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
-export([
|
|
||||||
child_spec/3,
|
|
||||||
start_link/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% supervisor callback
|
|
||||||
-export([init/1]).
|
|
||||||
|
|
||||||
child_spec(FluName, DataDir, N) ->
|
|
||||||
{make_sup_name(FluName),
|
|
||||||
{?MODULE, start_link, [FluName, DataDir, N]},
|
|
||||||
permanent, 5000, supervisor, [?MODULE]}.
|
|
||||||
|
|
||||||
start_link(FluName, DataDir, N) ->
|
|
||||||
supervisor:start_link({local, make_sup_name(FluName)}, ?MODULE, [FluName, DataDir, N]).
|
|
||||||
|
|
||||||
init([FluName, DataDir, N]) ->
|
|
||||||
Restart = one_for_one,
|
|
||||||
MaxRestarts = 1000,
|
|
||||||
SecondsBetween = 3600,
|
|
||||||
SupFlags = {Restart, MaxRestarts, SecondsBetween},
|
|
||||||
|
|
||||||
Children =
|
|
||||||
[ machi_flu_metadata_mgr:child_spec(FluName, C, DataDir, N) || C <- lists:seq(1,N) ],
|
|
||||||
|
|
||||||
{ok, {SupFlags, Children}}.
|
|
||||||
|
|
||||||
make_sup_name(FluName) when is_atom(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_metadata_mgr_sup").
|
|
||||||
|
|
|
@ -61,7 +61,6 @@
|
||||||
|
|
||||||
-behaviour(supervisor).
|
-behaviour(supervisor).
|
||||||
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
-include("machi_verbose.hrl").
|
-include("machi_verbose.hrl").
|
||||||
|
|
||||||
-ifdef(PULSE).
|
-ifdef(PULSE).
|
||||||
|
@ -73,32 +72,19 @@
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
%% External API
|
%% External API
|
||||||
-export([make_package_spec/1, make_package_spec/4,
|
-export([make_package_spec/4, start_flu_package/4, stop_flu_package/1]).
|
||||||
start_flu_package/1, start_flu_package/4, stop_flu_package/1]).
|
|
||||||
%% Internal API
|
%% Internal API
|
||||||
-export([start_link/4,
|
-export([start_link/4,
|
||||||
make_flu_regname/1, make_p_regname/1, make_mgr_supname/1,
|
make_p_regname/1, make_mgr_supname/1, make_proj_supname/1]).
|
||||||
make_proj_supname/1, make_fitness_regname/1]).
|
|
||||||
|
|
||||||
%% Supervisor callbacks
|
%% Supervisor callbacks
|
||||||
-export([init/1]).
|
-export([init/1]).
|
||||||
|
|
||||||
make_package_spec(#p_srvr{name=FluName, port=TcpPort, props=Props}) when is_list(Props) ->
|
|
||||||
make_package_spec({FluName, TcpPort, Props});
|
|
||||||
make_package_spec({FluName, TcpPort, Props}) when is_list(Props) ->
|
|
||||||
FluDataDir = get_env(flu_data_dir, undefined_is_invalid),
|
|
||||||
MyDataDir = filename:join(FluDataDir, atom_to_list(FluName)),
|
|
||||||
make_package_spec(FluName, TcpPort, MyDataDir, Props).
|
|
||||||
|
|
||||||
make_package_spec(FluName, TcpPort, DataDir, Props) ->
|
make_package_spec(FluName, TcpPort, DataDir, Props) ->
|
||||||
{FluName, {machi_flu_psup, start_link,
|
{FluName, {machi_flu_psup, start_link,
|
||||||
[FluName, TcpPort, DataDir, Props]},
|
[FluName, TcpPort, DataDir, Props]},
|
||||||
permanent, ?SHUTDOWN, supervisor, []}.
|
permanent, ?SHUTDOWN, supervisor, []}.
|
||||||
|
|
||||||
start_flu_package(#p_srvr{name=FluName, port=TcpPort, props=Props}) ->
|
|
||||||
DataDir = get_data_dir(FluName, Props),
|
|
||||||
start_flu_package(FluName, TcpPort, DataDir, Props).
|
|
||||||
|
|
||||||
start_flu_package(FluName, TcpPort, DataDir, Props) ->
|
start_flu_package(FluName, TcpPort, DataDir, Props) ->
|
||||||
Spec = make_package_spec(FluName, TcpPort, DataDir, Props),
|
Spec = make_package_spec(FluName, TcpPort, DataDir, Props),
|
||||||
{ok, _SupPid} = supervisor:start_child(machi_flu_sup, Spec).
|
{ok, _SupPid} = supervisor:start_child(machi_flu_sup, Spec).
|
||||||
|
@ -128,39 +114,15 @@ init([FluName, TcpPort, DataDir, Props0]) ->
|
||||||
{machi_projection_store, start_link,
|
{machi_projection_store, start_link,
|
||||||
[ProjRegName, DataDir, FluName]},
|
[ProjRegName, DataDir, FluName]},
|
||||||
permanent, ?SHUTDOWN, worker, []},
|
permanent, ?SHUTDOWN, worker, []},
|
||||||
FitnessRegName = make_fitness_regname(FluName),
|
|
||||||
FitnessSpec = {FitnessRegName,
|
|
||||||
{machi_fitness, start_link,
|
|
||||||
[ [{FluName}|Props] ]},
|
|
||||||
permanent, ?SHUTDOWN, worker, []},
|
|
||||||
MgrSpec = {make_mgr_supname(FluName),
|
MgrSpec = {make_mgr_supname(FluName),
|
||||||
{machi_chain_manager1, start_link,
|
{machi_chain_manager1, start_link,
|
||||||
[FluName, [], Props]},
|
[FluName, [], Props]},
|
||||||
permanent, ?SHUTDOWN, worker, []},
|
permanent, ?SHUTDOWN, worker, []},
|
||||||
|
|
||||||
FNameMgrSpec = machi_flu_filename_mgr:child_spec(FluName, DataDir),
|
|
||||||
|
|
||||||
MetaMgrCnt = get_env(metadata_manager_count, 10),
|
|
||||||
MetaSupSpec = machi_flu_metadata_mgr_sup:child_spec(FluName, DataDir, MetaMgrCnt),
|
|
||||||
|
|
||||||
FProxySupSpec = machi_file_proxy_sup:child_spec(FluName),
|
|
||||||
|
|
||||||
Flu1SubSupSpec = {machi_flu1_subsup:subsup_name(FluName),
|
|
||||||
{machi_flu1_subsup, start_link, [FluName]},
|
|
||||||
permanent, ?SHUTDOWN, supervisor, []},
|
|
||||||
|
|
||||||
FluSpec = {FluName,
|
FluSpec = {FluName,
|
||||||
{machi_flu1, start_link,
|
{machi_flu1, start_link,
|
||||||
[ [{FluName, TcpPort, DataDir}|Props] ]},
|
[ [{FluName, TcpPort, DataDir}|Props] ]},
|
||||||
permanent, ?SHUTDOWN, worker, []},
|
permanent, ?SHUTDOWN, worker, []},
|
||||||
|
{ok, {SupFlags, [ProjSpec, MgrSpec, FluSpec]}}.
|
||||||
{ok, {SupFlags, [
|
|
||||||
ProjSpec, FitnessSpec, MgrSpec,
|
|
||||||
FProxySupSpec, FNameMgrSpec, MetaSupSpec,
|
|
||||||
Flu1SubSupSpec, FluSpec]}}.
|
|
||||||
|
|
||||||
make_flu_regname(FluName) when is_atom(FluName) ->
|
|
||||||
FluName.
|
|
||||||
|
|
||||||
make_p_regname(FluName) when is_atom(FluName) ->
|
make_p_regname(FluName) when is_atom(FluName) ->
|
||||||
list_to_atom("flusup_" ++ atom_to_list(FluName)).
|
list_to_atom("flusup_" ++ atom_to_list(FluName)).
|
||||||
|
@ -170,21 +132,3 @@ make_mgr_supname(MgrName) when is_atom(MgrName) ->
|
||||||
|
|
||||||
make_proj_supname(ProjName) when is_atom(ProjName) ->
|
make_proj_supname(ProjName) when is_atom(ProjName) ->
|
||||||
list_to_atom(atom_to_list(ProjName) ++ "_pstore").
|
list_to_atom(atom_to_list(ProjName) ++ "_pstore").
|
||||||
|
|
||||||
make_fitness_regname(FluName) when is_atom(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_fitness").
|
|
||||||
|
|
||||||
get_env(Setting, Default) ->
|
|
||||||
case application:get_env(machi, Setting) of
|
|
||||||
undefined -> Default;
|
|
||||||
{ok, V} -> V
|
|
||||||
end.
|
|
||||||
|
|
||||||
get_data_dir(FluName, Props) ->
|
|
||||||
case proplists:get_value(data_dir, Props) of
|
|
||||||
Path when is_list(Path) ->
|
|
||||||
Path;
|
|
||||||
undefined ->
|
|
||||||
{ok, Dir} = application:get_env(machi, flu_data_dir),
|
|
||||||
Dir ++ "/" ++ atom_to_list(FluName)
|
|
||||||
end.
|
|
||||||
|
|
|
@ -21,9 +21,6 @@
|
||||||
%% @doc Supervisor for Machi FLU servers and their related support
|
%% @doc Supervisor for Machi FLU servers and their related support
|
||||||
%% servers.
|
%% servers.
|
||||||
%%
|
%%
|
||||||
%% Responsibility for managing FLU and chain lifecycle after the initial
|
|
||||||
%% application startup is delegated to {@link machi_lifecycle_mgr}.
|
|
||||||
%%
|
|
||||||
%% See {@link machi_flu_psup} for an illustration of the entire Machi
|
%% See {@link machi_flu_psup} for an illustration of the entire Machi
|
||||||
%% application process structure.
|
%% application process structure.
|
||||||
|
|
||||||
|
@ -31,12 +28,8 @@
|
||||||
|
|
||||||
-behaviour(supervisor).
|
-behaviour(supervisor).
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
-include("machi_verbose.hrl").
|
-include("machi_verbose.hrl").
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-compile(export_all).
|
|
||||||
-ifdef(PULSE).
|
-ifdef(PULSE).
|
||||||
-compile({parse_transform, pulse_instrument}).
|
-compile({parse_transform, pulse_instrument}).
|
||||||
-include_lib("pulse_otp/include/pulse_otp.hrl").
|
-include_lib("pulse_otp/include/pulse_otp.hrl").
|
||||||
|
@ -44,12 +37,9 @@
|
||||||
-else.
|
-else.
|
||||||
-define(SHUTDOWN, 5000).
|
-define(SHUTDOWN, 5000).
|
||||||
-endif.
|
-endif.
|
||||||
-endif. %TEST
|
|
||||||
|
|
||||||
%% API
|
%% API
|
||||||
-export([start_link/0,
|
-export([start_link/0]).
|
||||||
get_initial_flus/0, load_rc_d_files_from_dir/1,
|
|
||||||
sanitize_p_srvr_records/1]).
|
|
||||||
|
|
||||||
%% Supervisor callbacks
|
%% Supervisor callbacks
|
||||||
-export([init/1]).
|
-export([init/1]).
|
||||||
|
@ -65,11 +55,10 @@ init([]) ->
|
||||||
MaxSecondsBetweenRestarts = 3600,
|
MaxSecondsBetweenRestarts = 3600,
|
||||||
SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts},
|
SupFlags = {RestartStrategy, MaxRestarts, MaxSecondsBetweenRestarts},
|
||||||
|
|
||||||
_Tab = ets:new(?TEST_ETS_TABLE, [named_table, public, ordered_set,
|
|
||||||
{read_concurrency,true}]),
|
|
||||||
|
|
||||||
Ps = get_initial_flus(),
|
Ps = get_initial_flus(),
|
||||||
FLU_specs = [machi_flu_psup:make_package_spec(P) || P <- Ps],
|
FLU_specs = [machi_flu_psup:make_package_spec(FluName, TcpPort,
|
||||||
|
DataDir, Props) ||
|
||||||
|
{FluName, TcpPort, DataDir, Props} <- Ps],
|
||||||
|
|
||||||
{ok, {SupFlags, FLU_specs}}.
|
{ok, {SupFlags, FLU_specs}}.
|
||||||
|
|
||||||
|
@ -78,66 +67,5 @@ get_initial_flus() ->
|
||||||
[].
|
[].
|
||||||
-else. % PULSE
|
-else. % PULSE
|
||||||
get_initial_flus() ->
|
get_initial_flus() ->
|
||||||
DoesNotExist = "/tmp/does/not/exist",
|
application:get_env(machi, initial_flus, []).
|
||||||
ConfigDir = case application:get_env(machi, flu_config_dir, DoesNotExist) of
|
|
||||||
DoesNotExist ->
|
|
||||||
DoesNotExist;
|
|
||||||
Dir ->
|
|
||||||
Dir
|
|
||||||
end,
|
|
||||||
Ps = [P || {_File, P} <- load_rc_d_files_from_dir(ConfigDir)],
|
|
||||||
sanitize_p_srvr_records(Ps).
|
|
||||||
-endif. % PULSE
|
-endif. % PULSE
|
||||||
|
|
||||||
load_rc_d_files_from_dir(Dir) ->
|
|
||||||
Files = filelib:wildcard(Dir ++ "/*"),
|
|
||||||
[case file:consult(File) of
|
|
||||||
{ok, [X]} ->
|
|
||||||
{File, X};
|
|
||||||
_ ->
|
|
||||||
lager:warning("Error parsing file '~s', ignoring",
|
|
||||||
[File]),
|
|
||||||
{File, []}
|
|
||||||
end || File <- Files].
|
|
||||||
|
|
||||||
sanitize_p_srvr_records(Ps) ->
|
|
||||||
{Sane, _} = lists:foldl(fun sanitize_p_srvr_rec/2, {[], dict:new()}, Ps),
|
|
||||||
Sane.
|
|
||||||
|
|
||||||
sanitize_p_srvr_rec(Whole, {Acc, D}) ->
|
|
||||||
try
|
|
||||||
#p_srvr{name=Name,
|
|
||||||
proto_mod=PMod,
|
|
||||||
address=Address,
|
|
||||||
port=Port,
|
|
||||||
props=Props} = Whole,
|
|
||||||
true = is_atom(Name),
|
|
||||||
NameK = {name, Name},
|
|
||||||
error = dict:find(NameK, D),
|
|
||||||
true = is_atom(PMod),
|
|
||||||
case code:is_loaded(PMod) of
|
|
||||||
{file, _} ->
|
|
||||||
ok;
|
|
||||||
_ ->
|
|
||||||
{module, _} = code:load_file(PMod),
|
|
||||||
ok
|
|
||||||
end,
|
|
||||||
if is_list(Address) -> ok;
|
|
||||||
is_tuple(Address) -> ok % Erlang-style IPv4 or IPv6
|
|
||||||
end,
|
|
||||||
true = is_integer(Port) andalso Port >= 1024 andalso Port =< 65534,
|
|
||||||
PortK = {port, Port},
|
|
||||||
error = dict:find(PortK, D),
|
|
||||||
true = is_list(Props),
|
|
||||||
|
|
||||||
%% All is sane enough.
|
|
||||||
D2 = dict:store(NameK, Name,
|
|
||||||
dict:store(PortK, Port, D)),
|
|
||||||
{[Whole|Acc], D2}
|
|
||||||
catch _:_ ->
|
|
||||||
_ = lager:log(error, self(),
|
|
||||||
"~s: Bad (or duplicate name/port) p_srvr record, "
|
|
||||||
"skipping: ~P\n",
|
|
||||||
[?MODULE, Whole, 15]),
|
|
||||||
{Acc, D}
|
|
||||||
end.
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,156 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc Creates a Merkle tree per file based on the checksum data for
|
|
||||||
%% a given data file.
|
|
||||||
%%
|
|
||||||
%% The `naive' implementation representation is:
|
|
||||||
%%
|
|
||||||
%% `<<Length:64, Offset:32, 0>>' for unwritten bytes
|
|
||||||
%% `<<Length:64, Offset:32, 1>>' for trimmed bytes
|
|
||||||
%% `<<Length:64, Offset:32, Csum/binary>>' for written bytes
|
|
||||||
%%
|
|
||||||
%% The tree feeds these leaf nodes into hashes representing chunks of a minimum
|
|
||||||
%% size of at least 1024 KB (1 MB), but if the file size is larger, we will try
|
|
||||||
%% to get about 100 chunks for the first rollup "Level 1." We aim for around 10
|
|
||||||
%% hashes at level 2, and then 2 hashes level 3 and finally the root.
|
|
||||||
|
|
||||||
-module(machi_merkle_tree).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_merkle_tree.hrl").
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-compile(export_all).
|
|
||||||
-else.
|
|
||||||
-export([
|
|
||||||
open/2,
|
|
||||||
open/3,
|
|
||||||
tree/1,
|
|
||||||
filename/1,
|
|
||||||
diff/2
|
|
||||||
]).
|
|
||||||
-endif.
|
|
||||||
|
|
||||||
-define(TRIMMED, <<1>>).
|
|
||||||
-define(UNWRITTEN, <<0>>).
|
|
||||||
-define(NAIVE_ENCODE(Offset, Size, Data), <<Offset:64/unsigned-big, Size:32/unsigned-big, Data/binary>>).
|
|
||||||
|
|
||||||
-define(MINIMUM_CHUNK, 1048576). %% 1024 * 1024
|
|
||||||
-define(LEVEL_SIZE, 10).
|
|
||||||
-define(H, sha).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
|
|
||||||
open(Filename, DataDir) ->
|
|
||||||
open(Filename, DataDir, naive).
|
|
||||||
|
|
||||||
open(Filename, DataDir, Type) ->
|
|
||||||
Tree = load_filename(Filename, DataDir, Type),
|
|
||||||
{ok, #mt{ filename = Filename, tree = Tree, backend = Type}}.
|
|
||||||
|
|
||||||
tree(#mt{ tree = T, backend = naive }) ->
|
|
||||||
case T#naive.recalc of
|
|
||||||
true -> build_tree(T);
|
|
||||||
false -> T
|
|
||||||
end.
|
|
||||||
|
|
||||||
filename(#mt{ filename = F }) -> F.
|
|
||||||
|
|
||||||
diff(#mt{backend = naive, tree = T1}, #mt{backend = naive, tree = T2}) ->
|
|
||||||
case T1#naive.root == T2#naive.root of
|
|
||||||
true -> same;
|
|
||||||
false -> naive_diff(T1, T2)
|
|
||||||
end;
|
|
||||||
diff(_, _) -> error(badarg).
|
|
||||||
|
|
||||||
%% private
|
|
||||||
|
|
||||||
% @private
|
|
||||||
load_filename(Filename, DataDir, naive) ->
|
|
||||||
{Last, M} = do_load(Filename, DataDir, fun insert_csum_naive/2, []),
|
|
||||||
ChunkSize = max(?MINIMUM_CHUNK, Last div 100),
|
|
||||||
T = #naive{ leaves = lists:reverse(M), chunk_size = ChunkSize, recalc = true },
|
|
||||||
build_tree(T).
|
|
||||||
|
|
||||||
do_load(Filename, DataDir, FoldFun, AccInit) ->
|
|
||||||
CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
|
|
||||||
{ok, T} = machi_csum_table:open(CsumFile, []),
|
|
||||||
Acc = machi_csum_table:foldl_chunks(FoldFun, {0, AccInit}, T),
|
|
||||||
ok = machi_csum_table:close(T),
|
|
||||||
Acc.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
insert_csum_naive({Last, Size, _Csum}=In, {Last, MT}) ->
|
|
||||||
%% no gap
|
|
||||||
{Last+Size, update_acc(In, MT)};
|
|
||||||
insert_csum_naive({Offset, Size, _Csum}=In, {Last, MT}) ->
|
|
||||||
Hole = Offset - Last,
|
|
||||||
MT0 = update_acc({Last, Hole, unwritten}, MT),
|
|
||||||
{Offset+Size, update_acc(In, MT0)}.
|
|
||||||
|
|
||||||
% @private
|
|
||||||
update_acc({Offset, Size, unwritten}, MT) ->
|
|
||||||
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?UNWRITTEN)} | MT ];
|
|
||||||
update_acc({Offset, Size, trimmed}, MT) ->
|
|
||||||
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?TRIMMED)} | MT ];
|
|
||||||
update_acc({Offset, Size, <<_Tag:8, Csum/binary>>}, MT) ->
|
|
||||||
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, Csum)} | MT ].
|
|
||||||
|
|
||||||
build_tree(MT = #naive{ leaves = L, chunk_size = ChunkSize }) ->
|
|
||||||
Lvl1s = build_level_1(ChunkSize, L, 1, [ crypto:hash_init(?H) ]),
|
|
||||||
Mod2 = length(Lvl1s) div ?LEVEL_SIZE,
|
|
||||||
Lvl2s = build_int_level(Mod2, Lvl1s, 1, [ crypto:hash_init(?H) ]),
|
|
||||||
Mod3 = length(Lvl2s) div 2,
|
|
||||||
Lvl3s = build_int_level(Mod3, Lvl2s, 1, [ crypto:hash_init(?H) ]),
|
|
||||||
Root = build_root(Lvl3s, crypto:hash_init(?H)),
|
|
||||||
MT#naive{ root = Root, lvl1 = Lvl1s, lvl2 = Lvl2s, lvl3 = Lvl3s, recalc = false }.
|
|
||||||
|
|
||||||
build_root([], Ctx) ->
|
|
||||||
crypto:hash_final(Ctx);
|
|
||||||
build_root([H|T], Ctx) ->
|
|
||||||
build_root(T, crypto:hash_update(Ctx, H)).
|
|
||||||
|
|
||||||
build_int_level(_Mod, [], _Cnt, [ Ctx | Rest ]) ->
|
|
||||||
lists:reverse( [ crypto:hash_final(Ctx) | Rest ] );
|
|
||||||
build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) when Cnt rem Mod == 0 ->
|
|
||||||
NewCtx = crypto:hash_init(?H),
|
|
||||||
build_int_level(Mod, T, Cnt + 1, [ crypto:hash_update(NewCtx, H), crypto:hash_final(Ctx) | Rest ]);
|
|
||||||
build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) ->
|
|
||||||
build_int_level(Mod, T, Cnt+1, [ crypto:hash_update(Ctx, H) | Rest ]).
|
|
||||||
|
|
||||||
build_level_1(_Size, [], _Multiple, [ Ctx | Rest ]) ->
|
|
||||||
lists:reverse([ crypto:hash_final(Ctx) | Rest ]);
|
|
||||||
build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
|
|
||||||
when ( Pos + Len ) > ( Size * Multiple ) ->
|
|
||||||
NewCtx = crypto:hash_init(?H),
|
|
||||||
build_level_1(Size, T, Multiple+1,
|
|
||||||
[ crypto:hash_update(NewCtx, Hash), crypto:hash_final(Ctx) | Rest ]);
|
|
||||||
build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
|
|
||||||
when ( Pos + Len ) =< ( Size * Multiple ) ->
|
|
||||||
build_level_1(Size, T, Multiple, [ crypto:hash_update(Ctx, Hash) | Rest ]).
|
|
||||||
|
|
||||||
naive_diff(#naive{lvl1 = L1}, #naive{lvl1=L2, chunk_size=CS2}) ->
|
|
||||||
Set1 = gb_sets:from_list(lists:zip(lists:seq(1, length(L1)), L1)),
|
|
||||||
Set2 = gb_sets:from_list(lists:zip(lists:seq(1, length(L2)), L2)),
|
|
||||||
|
|
||||||
%% The byte ranges in list 2 that do not match in list 1
|
|
||||||
%% Or should we do something else?
|
|
||||||
[ {(X-1)*CS2, CS2, SHA} || {X, SHA} <- gb_sets:to_list(gb_sets:subtract(Set1, Set2)) ].
|
|
|
@ -25,10 +25,6 @@
|
||||||
%% to a single socket connection, and there is no code to deal with
|
%% to a single socket connection, and there is no code to deal with
|
||||||
%% multiple connections/load balancing/error handling to several/all
|
%% multiple connections/load balancing/error handling to several/all
|
||||||
%% Machi cluster servers.
|
%% Machi cluster servers.
|
||||||
%%
|
|
||||||
%% Please see {@link machi_flu1_client} the "Client API implemntation notes"
|
|
||||||
%% section for how this module relates to the rest of the client API
|
|
||||||
%% implementation.
|
|
||||||
|
|
||||||
-module(machi_pb_high_client).
|
-module(machi_pb_high_client).
|
||||||
|
|
||||||
|
@ -44,8 +40,7 @@
|
||||||
auth/3, auth/4,
|
auth/3, auth/4,
|
||||||
append_chunk/6, append_chunk/7,
|
append_chunk/6, append_chunk/7,
|
||||||
write_chunk/5, write_chunk/6,
|
write_chunk/5, write_chunk/6,
|
||||||
read_chunk/5, read_chunk/6,
|
read_chunk/4, read_chunk/5,
|
||||||
trim_chunk/4, trim_chunk/5,
|
|
||||||
checksum_list/2, checksum_list/3,
|
checksum_list/2, checksum_list/3,
|
||||||
list_files/1, list_files/2
|
list_files/1, list_files/2
|
||||||
]).
|
]).
|
||||||
|
@ -62,114 +57,48 @@
|
||||||
count=0 :: non_neg_integer()
|
count=0 :: non_neg_integer()
|
||||||
}).
|
}).
|
||||||
|
|
||||||
%% Official error types that is specific in Machi
|
|
||||||
-type machi_client_error_reason() :: bad_arg | wedged | bad_checksum |
|
|
||||||
partition | not_written | written |
|
|
||||||
trimmed | no_such_file | partial_read |
|
|
||||||
bad_epoch | inet:posix().
|
|
||||||
|
|
||||||
%% @doc Creates a client process
|
|
||||||
-spec start_link(p_srvr_dict()) -> {ok, pid()} | {error, machi_client_error_reason()}.
|
|
||||||
start_link(P_srvr_list) ->
|
start_link(P_srvr_list) ->
|
||||||
gen_server:start_link(?MODULE, [P_srvr_list], []).
|
gen_server:start_link(?MODULE, [P_srvr_list], []).
|
||||||
|
|
||||||
%% @doc Stops a client process.
|
|
||||||
-spec quit(pid()) -> ok.
|
|
||||||
quit(PidSpec) ->
|
quit(PidSpec) ->
|
||||||
gen_server:call(PidSpec, quit, infinity).
|
gen_server:call(PidSpec, quit, infinity).
|
||||||
|
|
||||||
connected_p(PidSpec) ->
|
connected_p(PidSpec) ->
|
||||||
gen_server:call(PidSpec, connected_p, infinity).
|
gen_server:call(PidSpec, connected_p, infinity).
|
||||||
|
|
||||||
-spec echo(pid(), string()) -> {ok, string()} | {error, machi_client_error_reason()}.
|
|
||||||
echo(PidSpec, String) ->
|
echo(PidSpec, String) ->
|
||||||
echo(PidSpec, String, ?DEFAULT_TIMEOUT).
|
echo(PidSpec, String, ?DEFAULT_TIMEOUT).
|
||||||
|
|
||||||
-spec echo(pid(), string(), non_neg_integer()) -> {ok, string()} | {error, machi_client_error_reason()}.
|
|
||||||
echo(PidSpec, String, Timeout) ->
|
echo(PidSpec, String, Timeout) ->
|
||||||
send_sync(PidSpec, {echo, String}, Timeout).
|
send_sync(PidSpec, {echo, String}, Timeout).
|
||||||
|
|
||||||
%% TODO: auth() is not implemented. Auth requires SSL, and this client
|
%% TODO: auth() is not implemented. Auth requires SSL, and this client
|
||||||
%% doesn't support SSL yet. This is just a placeholder and reminder.
|
%% doesn't support SSL yet. This is just a placeholder and reminder.
|
||||||
|
|
||||||
-spec auth(pid(), string(), string()) -> ok | {error, machi_client_error_reason()}.
|
|
||||||
auth(PidSpec, User, Pass) ->
|
auth(PidSpec, User, Pass) ->
|
||||||
auth(PidSpec, User, Pass, ?DEFAULT_TIMEOUT).
|
auth(PidSpec, User, Pass, ?DEFAULT_TIMEOUT).
|
||||||
|
|
||||||
-spec auth(pid(), string(), string(), non_neg_integer()) -> ok | {error, machi_client_error_reason()}.
|
|
||||||
auth(PidSpec, User, Pass, Timeout) ->
|
auth(PidSpec, User, Pass, Timeout) ->
|
||||||
send_sync(PidSpec, {auth, User, Pass}, Timeout).
|
send_sync(PidSpec, {auth, User, Pass}, Timeout).
|
||||||
|
|
||||||
-spec append_chunk(pid(),
|
append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra) ->
|
||||||
NS::machi_dt:namespace(), Prefix::machi_dt:file_prefix(),
|
append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra, ?DEFAULT_TIMEOUT).
|
||||||
Chunk::machi_dt:chunk(), CSum::machi_dt:chunk_csum(),
|
|
||||||
Opts::machi_dt:append_opts()) ->
|
|
||||||
{ok, Filename::string(), Offset::machi_dt:file_offset()} |
|
|
||||||
{error, machi_client_error_reason()}.
|
|
||||||
append_chunk(PidSpec, NS, Prefix, Chunk, CSum, Opts) ->
|
|
||||||
append_chunk(PidSpec, NS, Prefix, Chunk, CSum, Opts, ?DEFAULT_TIMEOUT).
|
|
||||||
|
|
||||||
-spec append_chunk(pid(),
|
append_chunk(PidSpec, PlacementKey, Prefix, Chunk, CSum, ChunkExtra, Timeout) ->
|
||||||
NS::machi_dt:namespace(), Prefix::machi_dt:file_prefix(),
|
send_sync(PidSpec, {append_chunk, PlacementKey, Prefix, Chunk, CSum, ChunkExtra}, Timeout).
|
||||||
Chunk::machi_dt:chunk(), CSum::machi_dt:chunk_csum(),
|
|
||||||
Opts::machi_dt:append_opts(),
|
|
||||||
Timeout::non_neg_integer()) ->
|
|
||||||
{ok, Filename::string(), Offset::machi_dt:file_offset()} |
|
|
||||||
{error, machi_client_error_reason()}.
|
|
||||||
append_chunk(PidSpec, NS, Prefix, Chunk, CSum, Opts, Timeout) ->
|
|
||||||
send_sync(PidSpec, {append_chunk, NS, Prefix, Chunk, CSum, Opts}, Timeout).
|
|
||||||
|
|
||||||
-spec write_chunk(pid(), File::string(), machi_dt:file_offset(),
|
|
||||||
Chunk::machi_dt:chunk(), CSum::machi_dt:chunk_csum()) ->
|
|
||||||
ok | {error, machi_client_error_reason()}.
|
|
||||||
write_chunk(PidSpec, File, Offset, Chunk, CSum) ->
|
write_chunk(PidSpec, File, Offset, Chunk, CSum) ->
|
||||||
write_chunk(PidSpec, File, Offset, Chunk, CSum, ?DEFAULT_TIMEOUT).
|
write_chunk(PidSpec, File, Offset, Chunk, CSum, ?DEFAULT_TIMEOUT).
|
||||||
|
|
||||||
-spec write_chunk(pid(), File::string(), machi_dt:file_offset(),
|
|
||||||
Chunk::machi_dt:chunk(), CSum::machi_dt:chunk_csum(), Timeout::non_neg_integer()) ->
|
|
||||||
ok | {error, machi_client_error_reason()}.
|
|
||||||
write_chunk(PidSpec, File, Offset, Chunk, CSum, Timeout) ->
|
write_chunk(PidSpec, File, Offset, Chunk, CSum, Timeout) ->
|
||||||
send_sync(PidSpec, {write_chunk, File, Offset, Chunk, CSum}, Timeout).
|
send_sync(PidSpec, {write_chunk, File, Offset, Chunk, CSum}, Timeout).
|
||||||
|
|
||||||
%% @doc Tries to read a chunk of a specified file. It returns `{ok,
|
read_chunk(PidSpec, File, Offset, Size) ->
|
||||||
%% {Chunks, TrimmedChunks}}' for live file while it returns `{error,
|
read_chunk(PidSpec, File, Offset, Size, ?DEFAULT_TIMEOUT).
|
||||||
%% trimmed}' if all bytes of the file was trimmed.
|
|
||||||
-spec read_chunk(pid(), File::string(), machi_dt:file_offset(), machi_dt:chunk_size(),
|
|
||||||
machi_dt:read_opts_x()) ->
|
|
||||||
{ok, {Chunks::[{File::string(), machi_dt:file_offset(), machi_dt:chunk_size(), binary()}],
|
|
||||||
Trimmed::[{File::string(), machi_dt:file_offset(), machi_dt:chunk_size()}]}} |
|
|
||||||
{error, machi_client_error_reason()}.
|
|
||||||
read_chunk(PidSpec, File, Offset, Size, Opts) ->
|
|
||||||
read_chunk(PidSpec, File, Offset, Size, Opts, ?DEFAULT_TIMEOUT).
|
|
||||||
|
|
||||||
-spec read_chunk(pid(), File::string(), machi_dt:file_offset(), machi_dt:chunk_size(),
|
read_chunk(PidSpec, File, Offset, Size, Timeout) ->
|
||||||
machi_dt:read_opts_x(),
|
send_sync(PidSpec, {read_chunk, File, Offset, Size}, Timeout).
|
||||||
Timeout::non_neg_integer()) ->
|
|
||||||
{ok, {Chunks::[{File::string(), machi_dt:file_offset(), machi_dt:chunk_size(), binary()}],
|
|
||||||
Trimmed::[{File::string(), machi_dt:file_offset(), machi_dt:chunk_size()}]}} |
|
|
||||||
{error, machi_client_error_reason()}.
|
|
||||||
read_chunk(PidSpec, File, Offset, Size, Opts0, Timeout) ->
|
|
||||||
Opts = machi_util:read_opts_default(Opts0),
|
|
||||||
send_sync(PidSpec, {read_chunk, File, Offset, Size, Opts}, Timeout).
|
|
||||||
|
|
||||||
%% @doc Trims arbitrary binary range of any file. If a specified range
|
|
||||||
%% has any byte trimmed, it fails and returns `{error, trimmed}'.
|
|
||||||
%% Otherwise it trims all bytes in that range. If there are
|
|
||||||
%% overlapping chunks with client-specified checksum, they will cut
|
|
||||||
%% off and checksum are re-calculated in server side. TODO: Add
|
|
||||||
%% option specifying whether to trigger GC.
|
|
||||||
-spec trim_chunk(pid(), string(), non_neg_integer(), machi_dt:chunk_size()) ->
|
|
||||||
ok | {error, machi_client_error_reason()}.
|
|
||||||
trim_chunk(PidSpec, File, Offset, Size) ->
|
|
||||||
trim_chunk(PidSpec, File, Offset, Size, ?DEFAULT_TIMEOUT).
|
|
||||||
|
|
||||||
trim_chunk(PidSpec, File, Offset, Size, Timeout) ->
|
|
||||||
send_sync(PidSpec, {trim_chunk, File, Offset, Size}, Timeout).
|
|
||||||
|
|
||||||
%% @doc Returns a binary that has checksums and chunks encoded inside
|
|
||||||
%% (This is because encoding-decoding them are inefficient). TODO:
|
|
||||||
%% return a structured list of them.
|
|
||||||
-spec checksum_list(pid(), string()) -> {ok, binary()} | {error, machi_client_error_reason()}.
|
|
||||||
checksum_list(PidSpec, File) ->
|
checksum_list(PidSpec, File) ->
|
||||||
checksum_list(PidSpec, File, ?DEFAULT_TIMEOUT).
|
checksum_list(PidSpec, File, ?DEFAULT_TIMEOUT).
|
||||||
|
|
||||||
|
@ -289,19 +218,19 @@ do_send_sync2({auth, User, Pass}, #state{sock=Sock}=S) ->
|
||||||
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
||||||
{Res, S}
|
{Res, S}
|
||||||
end;
|
end;
|
||||||
do_send_sync2({append_chunk, NS, Prefix, Chunk, CSum, Opts},
|
do_send_sync2({append_chunk, PlacementKey, Prefix, Chunk, CSum, ChunkExtra},
|
||||||
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
||||||
try
|
try
|
||||||
ReqID = <<Index:64/big, Count:64/big>>,
|
ReqID = <<Index:64/big, Count:64/big>>,
|
||||||
|
PK = if PlacementKey == <<>> -> undefined;
|
||||||
|
true -> PlacementKey
|
||||||
|
end,
|
||||||
CSumT = convert_csum_req(CSum, Chunk),
|
CSumT = convert_csum_req(CSum, Chunk),
|
||||||
{ChunkExtra, Pref, FailPref} = machi_pb_translate:conv_from_append_opts(Opts),
|
Req = #mpb_appendchunkreq{placement_key=PK,
|
||||||
Req = #mpb_appendchunkreq{namespace=NS,
|
|
||||||
prefix=Prefix,
|
prefix=Prefix,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=CSumT,
|
csum=CSumT,
|
||||||
chunk_extra=ChunkExtra,
|
chunk_extra=ChunkExtra},
|
||||||
preferred_file_name=Pref,
|
|
||||||
flag_fail_preferred=FailPref},
|
|
||||||
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
||||||
append_chunk=Req},
|
append_chunk=Req},
|
||||||
Bin1a = machi_pb:encode_mpb_request(R1a),
|
Bin1a = machi_pb:encode_mpb_request(R1a),
|
||||||
|
@ -324,11 +253,10 @@ do_send_sync2({write_chunk, File, Offset, Chunk, CSum},
|
||||||
try
|
try
|
||||||
ReqID = <<Index:64/big, Count:64/big>>,
|
ReqID = <<Index:64/big, Count:64/big>>,
|
||||||
CSumT = convert_csum_req(CSum, Chunk),
|
CSumT = convert_csum_req(CSum, Chunk),
|
||||||
Req = #mpb_writechunkreq{chunk=
|
Req = #mpb_writechunkreq{file=File,
|
||||||
#mpb_chunk{chunk=Chunk,
|
|
||||||
file_name=File,
|
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
csum=CSumT}},
|
chunk=Chunk,
|
||||||
|
csum=CSumT},
|
||||||
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
||||||
write_chunk=Req},
|
write_chunk=Req},
|
||||||
Bin1a = machi_pb:encode_mpb_request(R1a),
|
Bin1a = machi_pb:encode_mpb_request(R1a),
|
||||||
|
@ -346,19 +274,13 @@ do_send_sync2({write_chunk, File, Offset, Chunk, CSum},
|
||||||
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
||||||
{Res, S#state{count=Count+1}}
|
{Res, S#state{count=Count+1}}
|
||||||
end;
|
end;
|
||||||
do_send_sync2({read_chunk, File, Offset, Size, Opts},
|
do_send_sync2({read_chunk, File, Offset, Size},
|
||||||
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
||||||
try
|
try
|
||||||
ReqID = <<Index:64/big, Count:64/big>>,
|
ReqID = <<Index:64/big, Count:64/big>>,
|
||||||
#read_opts{no_checksum=FlagNoChecksum,
|
Req = #mpb_readchunkreq{file=File,
|
||||||
no_chunk=FlagNoChunk,
|
|
||||||
needs_trimmed=NeedsTrimmed} = Opts,
|
|
||||||
Req = #mpb_readchunkreq{chunk_pos=#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
chunk_size=Size},
|
size=Size},
|
||||||
flag_no_checksum=machi_util:bool2int(FlagNoChecksum),
|
|
||||||
flag_no_chunk=machi_util:bool2int(FlagNoChunk),
|
|
||||||
flag_needs_trimmed=machi_util:bool2int(NeedsTrimmed)},
|
|
||||||
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
||||||
read_chunk=Req},
|
read_chunk=Req},
|
||||||
Bin1a = machi_pb:encode_mpb_request(R1a),
|
Bin1a = machi_pb:encode_mpb_request(R1a),
|
||||||
|
@ -376,30 +298,6 @@ do_send_sync2({read_chunk, File, Offset, Size, Opts},
|
||||||
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
||||||
{Res, S#state{count=Count+1}}
|
{Res, S#state{count=Count+1}}
|
||||||
end;
|
end;
|
||||||
do_send_sync2({trim_chunk, File, Offset, Size},
|
|
||||||
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
|
||||||
try
|
|
||||||
ReqID = <<Index:64/big, Count:64/big>>,
|
|
||||||
Req = #mpb_trimchunkreq{chunk_pos=#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}},
|
|
||||||
R1a = #mpb_request{req_id=ReqID, do_not_alter=1,
|
|
||||||
trim_chunk=Req},
|
|
||||||
Bin1a = machi_pb:encode_mpb_request(R1a),
|
|
||||||
ok = gen_tcp:send(Sock, Bin1a),
|
|
||||||
{ok, Bin1B} = gen_tcp:recv(Sock, 0),
|
|
||||||
case (catch machi_pb:decode_mpb_response(Bin1B)) of
|
|
||||||
#mpb_response{req_id=ReqID, trim_chunk=R} when R /= undefined ->
|
|
||||||
Result = convert_trim_chunk_resp(R),
|
|
||||||
{Result, S#state{count=Count+1}};
|
|
||||||
#mpb_response{req_id=ReqID, generic=G} when G /= undefined ->
|
|
||||||
#mpb_errorresp{code=Code, msg=Msg, extra=Extra} = G,
|
|
||||||
{{error, {Code, Msg, Extra}}, S#state{count=Count+1}}
|
|
||||||
end
|
|
||||||
catch X:Y ->
|
|
||||||
Res = {bummer, {X, Y, erlang:get_stacktrace()}},
|
|
||||||
{Res, S#state{count=Count+1}}
|
|
||||||
end;
|
|
||||||
do_send_sync2({checksum_list, File},
|
do_send_sync2({checksum_list, File},
|
||||||
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
#state{sock=Sock, sock_id=Index, count=Count}=S) ->
|
||||||
try
|
try
|
||||||
|
@ -445,15 +343,9 @@ do_send_sync2({list_files},
|
||||||
{Res, S#state{count=Count+1}}
|
{Res, S#state{count=Count+1}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% We only convert the checksum types that make sense here:
|
|
||||||
%% none or client_sha. None of the other types should be sent
|
|
||||||
%% to us via the PB high protocol.
|
|
||||||
|
|
||||||
convert_csum_req(none, Chunk) ->
|
convert_csum_req(none, Chunk) ->
|
||||||
#mpb_chunkcsum{type='CSUM_TAG_CLIENT_SHA',
|
#mpb_chunkcsum{type='CSUM_TAG_CLIENT_SHA',
|
||||||
csum=machi_util:checksum_chunk(Chunk)};
|
csum=machi_util:checksum_chunk(Chunk)};
|
||||||
convert_csum_req(<<>>, Chunk) ->
|
|
||||||
convert_csum_req(none, Chunk);
|
|
||||||
convert_csum_req({client_sha, CSumBin}, _Chunk) ->
|
convert_csum_req({client_sha, CSumBin}, _Chunk) ->
|
||||||
#mpb_chunkcsum{type='CSUM_TAG_CLIENT_SHA',
|
#mpb_chunkcsum{type='CSUM_TAG_CLIENT_SHA',
|
||||||
csum=CSumBin}.
|
csum=CSumBin}.
|
||||||
|
@ -478,8 +370,6 @@ convert_general_status_code('NOT_WRITTEN') ->
|
||||||
{error, not_written};
|
{error, not_written};
|
||||||
convert_general_status_code('WRITTEN') ->
|
convert_general_status_code('WRITTEN') ->
|
||||||
{error, written};
|
{error, written};
|
||||||
convert_general_status_code('TRIMMED') ->
|
|
||||||
{error, trimmed};
|
|
||||||
convert_general_status_code('NO_SUCH_FILE') ->
|
convert_general_status_code('NO_SUCH_FILE') ->
|
||||||
{error, no_such_file};
|
{error, no_such_file};
|
||||||
convert_general_status_code('PARTIAL_READ') ->
|
convert_general_status_code('PARTIAL_READ') ->
|
||||||
|
@ -494,29 +384,11 @@ convert_write_chunk_resp(#mpb_writechunkresp{status='OK'}) ->
|
||||||
convert_write_chunk_resp(#mpb_writechunkresp{status=Status}) ->
|
convert_write_chunk_resp(#mpb_writechunkresp{status=Status}) ->
|
||||||
convert_general_status_code(Status).
|
convert_general_status_code(Status).
|
||||||
|
|
||||||
convert_read_chunk_resp(#mpb_readchunkresp{status='OK', chunks=PB_Chunks, trimmed=PB_Trimmed}) ->
|
convert_read_chunk_resp(#mpb_readchunkresp{status='OK', chunk=Chunk}) ->
|
||||||
Chunks = lists:map(fun(#mpb_chunk{offset=Offset,
|
{ok, Chunk};
|
||||||
file_name=File,
|
|
||||||
chunk=Chunk,
|
|
||||||
csum=#mpb_chunkcsum{type=T, csum=Ck}}) ->
|
|
||||||
%% TODO: cleanup export
|
|
||||||
Csum = <<(machi_pb_translate:conv_to_csum_tag(T)):8, Ck/binary>>,
|
|
||||||
{list_to_binary(File), Offset, Chunk, Csum}
|
|
||||||
end, PB_Chunks),
|
|
||||||
Trimmed = lists:map(fun(#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}) ->
|
|
||||||
{list_to_binary(File), Offset, Size}
|
|
||||||
end, PB_Trimmed),
|
|
||||||
{ok, {Chunks, Trimmed}};
|
|
||||||
convert_read_chunk_resp(#mpb_readchunkresp{status=Status}) ->
|
convert_read_chunk_resp(#mpb_readchunkresp{status=Status}) ->
|
||||||
convert_general_status_code(Status).
|
convert_general_status_code(Status).
|
||||||
|
|
||||||
convert_trim_chunk_resp(#mpb_trimchunkresp{status='OK'}) ->
|
|
||||||
ok;
|
|
||||||
convert_trim_chunk_resp(#mpb_trimchunkresp{status=Status}) ->
|
|
||||||
convert_general_status_code(Status).
|
|
||||||
|
|
||||||
convert_checksum_list_resp(#mpb_checksumlistresp{status='OK', chunk=Chunk}) ->
|
convert_checksum_list_resp(#mpb_checksumlistresp{status='OK', chunk=Chunk}) ->
|
||||||
{ok, Chunk};
|
{ok, Chunk};
|
||||||
convert_checksum_list_resp(#mpb_checksumlistresp{status=Status}) ->
|
convert_checksum_list_resp(#mpb_checksumlistresp{status=Status}) ->
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
%% KIND, either express or implied. See the License for the
|
%% KIND, either express or implied. See the License for the
|
||||||
%% specific language governing permissions and limitations
|
%% specific language governing permissions and limitations
|
||||||
%% under the License.
|
%% under the License.
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
%%
|
||||||
|
%% -------------------------------------------------------------------
|
||||||
|
|
||||||
-module(machi_pb_translate).
|
-module(machi_pb_translate).
|
||||||
|
|
||||||
|
@ -34,115 +34,85 @@
|
||||||
-export([from_pb_request/1,
|
-export([from_pb_request/1,
|
||||||
from_pb_response/1,
|
from_pb_response/1,
|
||||||
to_pb_request/2,
|
to_pb_request/2,
|
||||||
to_pb_response/3,
|
to_pb_response/3
|
||||||
conv_from_append_opts/1,
|
|
||||||
conv_to_append_opts/1
|
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% TODO: fixme cleanup
|
|
||||||
-export([conv_to_csum_tag/1]).
|
|
||||||
|
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
echo=#mpb_echoreq{message=Msg}}) ->
|
echo=#mpb_echoreq{message=Msg}}) ->
|
||||||
{ReqID, {low_skip_wedge, {low_echo, Msg}}};
|
{ReqID, {low_echo, undefined, Msg}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
auth=#mpb_authreq{user=User, password=Pass}}) ->
|
auth=#mpb_authreq{user=User, password=Pass}}) ->
|
||||||
{ReqID, {low_skip_wedge, {low_auth, User, Pass}}};
|
{ReqID, {low_auth, undefined, User, Pass}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
append_chunk=IR=#mpb_ll_appendchunkreq{
|
append_chunk=#mpb_ll_appendchunkreq{
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS_str,
|
|
||||||
locator=NSLocator,
|
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
|
placement_key=PKey,
|
||||||
prefix=Prefix,
|
prefix=Prefix,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=#mpb_chunkcsum{type=CSum_type, csum=CSum}}}) ->
|
csum=#mpb_chunkcsum{type=CSum_type, csum=CSum},
|
||||||
NS = list_to_binary(NS_str),
|
chunk_extra=ChunkExtra}}) ->
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
CSum_tag = conv_to_csum_tag(CSum_type),
|
CSum_tag = conv_to_csum_tag(CSum_type),
|
||||||
Opts = conv_to_append_opts(IR),
|
{ReqID, {low_append_chunk, EpochID, PKey, Prefix, Chunk, CSum_tag, CSum,
|
||||||
%% NOTE: The tuple position of NSLocator is a bit odd, because EpochID
|
ChunkExtra}};
|
||||||
%% _must_ be in the 4th position (as NSV & NS must be in 2nd & 3rd).
|
|
||||||
{ReqID, {low_append_chunk, NSVersion, NS, EpochID, NSLocator,
|
|
||||||
Prefix, Chunk, CSum_tag, CSum, Opts}};
|
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
write_chunk=#mpb_ll_writechunkreq{
|
write_chunk=#mpb_ll_writechunkreq{
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS_str,
|
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
chunk=#mpb_chunk{file_name=File,
|
file=File,
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=#mpb_chunkcsum{type=CSum_type, csum=CSum}}}}) ->
|
csum=#mpb_chunkcsum{type=CSum_type, csum=CSum}}}) ->
|
||||||
NS = list_to_binary(NS_str),
|
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
CSum_tag = conv_to_csum_tag(CSum_type),
|
CSum_tag = conv_to_csum_tag(CSum_type),
|
||||||
{ReqID, {low_write_chunk, NSVersion, NS, EpochID, File, Offset, Chunk, CSum_tag, CSum}};
|
{ReqID, {low_write_chunk, EpochID, File, Offset, Chunk, CSum_tag, CSum}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
read_chunk=#mpb_ll_readchunkreq{
|
read_chunk=#mpb_ll_readchunkreq{
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS_str,
|
|
||||||
epoch_id=PB_EpochID,
|
|
||||||
chunk_pos=ChunkPos,
|
|
||||||
flag_no_checksum=PB_GetNoChecksum,
|
|
||||||
flag_no_chunk=PB_GetNoChunk,
|
|
||||||
flag_needs_trimmed=PB_NeedsTrimmed}}) ->
|
|
||||||
NS = list_to_binary(NS_str),
|
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
|
||||||
Opts = #read_opts{no_checksum=PB_GetNoChecksum,
|
|
||||||
no_chunk=PB_GetNoChunk,
|
|
||||||
needs_trimmed=PB_NeedsTrimmed},
|
|
||||||
#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size} = ChunkPos,
|
|
||||||
{ReqID, {low_read_chunk, NSVersion, NS, EpochID, File, Offset, Size, Opts}};
|
|
||||||
from_pb_request(#mpb_ll_request{
|
|
||||||
req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_ll_trimchunkreq{
|
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS_str,
|
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
file=File,
|
file=File,
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
size=Size,
|
size=Size,
|
||||||
trigger_gc=TriggerGC}}) ->
|
flag_get_checksum=PB_GetChecksum,
|
||||||
NS = list_to_binary(NS_str),
|
flag_no_chunk=PB_GetNoChunk}}) ->
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
{ReqID, {low_trim_chunk, NSVersion, NS, EpochID, File, Offset, Size, TriggerGC}};
|
Opts = [{get_checksum, conv_to_boolean(PB_GetChecksum)},
|
||||||
|
{no_chunk, conv_to_boolean(PB_GetNoChunk)}],
|
||||||
|
{ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
checksum_list=#mpb_ll_checksumlistreq{
|
checksum_list=#mpb_ll_checksumlistreq{
|
||||||
|
epoch_id=PB_EpochID,
|
||||||
file=File}}) ->
|
file=File}}) ->
|
||||||
{ReqID, {low_skip_wedge, {low_checksum_list, File}}};
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
|
{ReqID, {low_checksum_list, EpochID, File}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
list_files=#mpb_ll_listfilesreq{
|
list_files=#mpb_ll_listfilesreq{
|
||||||
epoch_id=PB_EpochID}}) ->
|
epoch_id=PB_EpochID}}) ->
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
{ReqID, {low_skip_wedge, {low_list_files, EpochID}}};
|
{ReqID, {low_list_files, EpochID}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
wedge_status=#mpb_ll_wedgestatusreq{}}) ->
|
wedge_status=#mpb_ll_wedgestatusreq{}}) ->
|
||||||
{ReqID, {low_skip_wedge, {low_wedge_status}}};
|
{ReqID, {low_wedge_status, undefined}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
delete_migration=#mpb_ll_deletemigrationreq{
|
delete_migration=#mpb_ll_deletemigrationreq{
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
file=File}}) ->
|
file=File}}) ->
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
{ReqID, {low_skip_wedge, {low_delete_migration, EpochID, File}}};
|
{ReqID, {low_delete_migration, EpochID, File}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
trunc_hack=#mpb_ll_trunchackreq{
|
trunc_hack=#mpb_ll_trunchackreq{
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
file=File}}) ->
|
file=File}}) ->
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
{ReqID, {low_skip_wedge, {low_trunc_hack, EpochID, File}}};
|
{ReqID, {low_trunc_hack, EpochID, File}};
|
||||||
from_pb_request(#mpb_ll_request{
|
from_pb_request(#mpb_ll_request{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
proj_gl=#mpb_ll_getlatestepochidreq{type=ProjType}}) ->
|
proj_gl=#mpb_ll_getlatestepochidreq{type=ProjType}}) ->
|
||||||
|
@ -183,40 +153,28 @@ from_pb_request(#mpb_request{req_id=ReqID,
|
||||||
{ReqID, {high_auth, User, Pass}};
|
{ReqID, {high_auth, User, Pass}};
|
||||||
from_pb_request(#mpb_request{req_id=ReqID,
|
from_pb_request(#mpb_request{req_id=ReqID,
|
||||||
append_chunk=IR=#mpb_appendchunkreq{}}) ->
|
append_chunk=IR=#mpb_appendchunkreq{}}) ->
|
||||||
#mpb_appendchunkreq{namespace=NS_str,
|
#mpb_appendchunkreq{placement_key=__todoPK,
|
||||||
prefix=Prefix,
|
prefix=Prefix,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=CSum} = IR,
|
csum=CSum,
|
||||||
NS = list_to_binary(NS_str),
|
chunk_extra=ChunkExtra} = IR,
|
||||||
TaggedCSum = make_tagged_csum(CSum, Chunk),
|
TaggedCSum = make_tagged_csum(CSum, Chunk),
|
||||||
Opts = conv_to_append_opts(IR),
|
{ReqID, {high_append_chunk, __todoPK, Prefix, Chunk, TaggedCSum,
|
||||||
{ReqID, {high_append_chunk, NS, Prefix, Chunk, TaggedCSum, Opts}};
|
ChunkExtra}};
|
||||||
from_pb_request(#mpb_request{req_id=ReqID,
|
from_pb_request(#mpb_request{req_id=ReqID,
|
||||||
write_chunk=IR=#mpb_writechunkreq{}}) ->
|
write_chunk=IR=#mpb_writechunkreq{}}) ->
|
||||||
#mpb_writechunkreq{chunk=#mpb_chunk{file_name=File,
|
#mpb_writechunkreq{file=File,
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=CSumRec}} = IR,
|
csum=CSum} = IR,
|
||||||
CSum = make_tagged_csum(CSumRec, Chunk),
|
TaggedCSum = make_tagged_csum(CSum, Chunk),
|
||||||
{ReqID, {high_write_chunk, File, Offset, Chunk, CSum}};
|
{ReqID, {high_write_chunk, File, Offset, Chunk, TaggedCSum}};
|
||||||
from_pb_request(#mpb_request{req_id=ReqID,
|
from_pb_request(#mpb_request{req_id=ReqID,
|
||||||
read_chunk=IR=#mpb_readchunkreq{}}) ->
|
read_chunk=IR=#mpb_readchunkreq{}}) ->
|
||||||
#mpb_readchunkreq{chunk_pos=#mpb_chunkpos{file_name=File,
|
#mpb_readchunkreq{file=File,
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
chunk_size=Size},
|
size=Size} = IR,
|
||||||
flag_no_checksum=FlagNoChecksum,
|
{ReqID, {high_read_chunk, File, Offset, Size}};
|
||||||
flag_no_chunk=FlagNoChunk,
|
|
||||||
flag_needs_trimmed=NeedsTrimmed} = IR,
|
|
||||||
Opts = #read_opts{no_checksum=FlagNoChecksum,
|
|
||||||
no_chunk=FlagNoChunk,
|
|
||||||
needs_trimmed=NeedsTrimmed},
|
|
||||||
{ReqID, {high_read_chunk, File, Offset, Size, Opts}};
|
|
||||||
from_pb_request(#mpb_request{req_id=ReqID,
|
|
||||||
trim_chunk=IR=#mpb_trimchunkreq{}}) ->
|
|
||||||
#mpb_trimchunkreq{chunk_pos=#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}} = IR,
|
|
||||||
{ReqID, {high_trim_chunk, File, Offset, Size}};
|
|
||||||
from_pb_request(#mpb_request{req_id=ReqID,
|
from_pb_request(#mpb_request{req_id=ReqID,
|
||||||
checksum_list=IR=#mpb_checksumlistreq{}}) ->
|
checksum_list=IR=#mpb_checksumlistreq{}}) ->
|
||||||
#mpb_checksumlistreq{file=File} = IR,
|
#mpb_checksumlistreq{file=File} = IR,
|
||||||
|
@ -232,17 +190,10 @@ from_pb_request(_Else) ->
|
||||||
|
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
% There is no separate LL error response record
|
|
||||||
generic=#mpb_errorresp{code=Code, msg=Msg}}) ->
|
|
||||||
{ReqID, {error, {Code, Msg}}};
|
|
||||||
from_pb_response(#mpb_ll_response{
|
|
||||||
req_id=ReqID,
|
|
||||||
% There is no separate LL echo response record
|
|
||||||
echo=#mpb_echoresp{message=Msg}}) ->
|
echo=#mpb_echoresp{message=Msg}}) ->
|
||||||
{ReqID, Msg};
|
{ReqID, Msg};
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
% There is no separate LL auth response record
|
|
||||||
auth=#mpb_authresp{code=Code}}) ->
|
auth=#mpb_authresp{code=Code}}) ->
|
||||||
{ReqID, Code};
|
{ReqID, Code};
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
|
@ -265,30 +216,13 @@ from_pb_response(#mpb_ll_response{
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
read_chunk=#mpb_ll_readchunkresp{status=Status,
|
read_chunk=#mpb_ll_readchunkresp{status=Status,
|
||||||
chunks=PB_Chunks,
|
chunk=Chunk}}) ->
|
||||||
trimmed=PB_Trimmed}}) ->
|
|
||||||
case Status of
|
case Status of
|
||||||
'OK' ->
|
'OK' ->
|
||||||
Chunks = lists:map(fun(#mpb_chunk{file_name=File,
|
{ReqID, {ok, Chunk}};
|
||||||
offset=Offset,
|
|
||||||
chunk=Bytes,
|
|
||||||
csum=#mpb_chunkcsum{type=T,csum=Ck}}) ->
|
|
||||||
Csum = <<(conv_to_csum_tag(T)):8, Ck/binary>>,
|
|
||||||
{list_to_binary(File), Offset, Bytes, Csum}
|
|
||||||
end, PB_Chunks),
|
|
||||||
Trimmed = lists:map(fun(#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}) ->
|
|
||||||
{list_to_binary(File), Offset, Size}
|
|
||||||
end, PB_Trimmed),
|
|
||||||
{ReqID, {ok, {Chunks, Trimmed}}};
|
|
||||||
_ ->
|
_ ->
|
||||||
{ReqID, machi_pb_high_client:convert_general_status_code(Status)}
|
{ReqID, machi_pb_high_client:convert_general_status_code(Status)}
|
||||||
end;
|
end;
|
||||||
from_pb_response(#mpb_ll_response{
|
|
||||||
req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_ll_trimchunkresp{status=Status}}) ->
|
|
||||||
{ReqID, machi_pb_high_client:convert_general_status_code(Status)};
|
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
checksum_list=#mpb_ll_checksumlistresp{
|
checksum_list=#mpb_ll_checksumlistresp{
|
||||||
|
@ -315,16 +249,12 @@ from_pb_response(#mpb_ll_response{
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
wedge_status=#mpb_ll_wedgestatusresp{
|
wedge_status=#mpb_ll_wedgestatusresp{
|
||||||
status=Status,
|
epoch_id=PB_EpochID, wedged_flag=PB_Wedged}}) ->
|
||||||
epoch_id=PB_EpochID, wedged_flag=Wedged_p,
|
|
||||||
namespace_version=NSVersion, namespace=NS_str}}) ->
|
|
||||||
GeneralStatus = case machi_pb_high_client:convert_general_status_code(Status) of
|
|
||||||
ok -> ok;
|
|
||||||
_Else -> {yukky, _Else}
|
|
||||||
end,
|
|
||||||
EpochID = conv_to_epoch_id(PB_EpochID),
|
EpochID = conv_to_epoch_id(PB_EpochID),
|
||||||
NS = list_to_binary(NS_str),
|
Wedged_p = if PB_Wedged == 1 -> true;
|
||||||
{ReqID, {GeneralStatus, {Wedged_p, EpochID, NSVersion, NS}}};
|
PB_Wedged == 0 -> false
|
||||||
|
end,
|
||||||
|
{ReqID, {ok, {Wedged_p, EpochID}}};
|
||||||
from_pb_response(#mpb_ll_response{
|
from_pb_response(#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
delete_migration=#mpb_ll_deletemigrationresp{
|
delete_migration=#mpb_ll_deletemigrationresp{
|
||||||
|
@ -390,100 +320,74 @@ from_pb_response(#mpb_ll_response{
|
||||||
'OK' ->
|
'OK' ->
|
||||||
{ReqID, {ok, Epochs}};
|
{ReqID, {ok, Epochs}};
|
||||||
_ ->
|
_ ->
|
||||||
{ReqID, machi_pb_high_client:convert_general_status_code(Status)}
|
{ReqID< machi_pb_high_client:convert_general_status_code(Status)}
|
||||||
end.
|
end.
|
||||||
%% No response for proj_kp/kick_projection_reaction
|
%% No response for proj_kp/kick_projection_reaction
|
||||||
|
|
||||||
%% TODO: move the #mbp_* record making code from
|
%% TODO: move the #mbp_* record making code from
|
||||||
%% machi_pb_high_client:do_send_sync() clauses into to_pb_request().
|
%% machi_pb_high_client:do_send_sync() clauses into to_pb_request().
|
||||||
|
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_echo, Msg}}) ->
|
to_pb_request(ReqID, {low_echo, _BogusEpochID, Msg}) ->
|
||||||
#mpb_ll_request{
|
#mpb_ll_request{
|
||||||
req_id=ReqID, do_not_alter=2,
|
req_id=ReqID, do_not_alter=2,
|
||||||
echo=#mpb_echoreq{message=Msg}};
|
echo=#mpb_echoreq{message=Msg}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_auth, User, Pass}}) ->
|
to_pb_request(ReqID, {low_auth, _BogusEpochID, User, Pass}) ->
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
auth=#mpb_authreq{user=User, password=Pass}};
|
auth=#mpb_authreq{user=User, password=Pass}};
|
||||||
%% NOTE: The tuple position of NSLocator is a bit odd, because EpochID
|
to_pb_request(ReqID, {low_append_chunk, EpochID, PKey, Prefix, Chunk,
|
||||||
%% _must_ be in the 4th position (as NSV & NS must be in 2nd & 3rd).
|
CSum_tag, CSum, ChunkExtra}) ->
|
||||||
to_pb_request(ReqID, {low_append_chunk, NSVersion, NS, EpochID, NSLocator,
|
|
||||||
Prefix, Chunk, CSum_tag, CSum, Opts}) ->
|
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
CSum_type = conv_from_csum_tag(CSum_tag),
|
CSum_type = conv_from_csum_tag(CSum_tag),
|
||||||
PB_CSum = #mpb_chunkcsum{type=CSum_type, csum=CSum},
|
PB_CSum = #mpb_chunkcsum{type=CSum_type, csum=CSum},
|
||||||
{ChunkExtra, Pref, FailPref} = conv_from_append_opts(Opts),
|
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
append_chunk=#mpb_ll_appendchunkreq{
|
append_chunk=#mpb_ll_appendchunkreq{
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS,
|
|
||||||
locator=NSLocator,
|
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
|
placement_key=PKey,
|
||||||
prefix=Prefix,
|
prefix=Prefix,
|
||||||
chunk=Chunk,
|
chunk=Chunk,
|
||||||
csum=PB_CSum,
|
csum=PB_CSum,
|
||||||
chunk_extra=ChunkExtra,
|
chunk_extra=ChunkExtra}};
|
||||||
preferred_file_name=Pref,
|
to_pb_request(ReqID, {low_write_chunk, EpochID, File, Offset, Chunk, CSum_tag, CSum}) ->
|
||||||
flag_fail_preferred=FailPref}};
|
|
||||||
to_pb_request(ReqID, {low_write_chunk, NSVersion, NS, EpochID, File, Offset, Chunk, CSum_tag, CSum}) ->
|
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
CSum_type = conv_from_csum_tag(CSum_tag),
|
CSum_type = conv_from_csum_tag(CSum_tag),
|
||||||
PB_CSum = #mpb_chunkcsum{type=CSum_type, csum=CSum},
|
PB_CSum = #mpb_chunkcsum{type=CSum_type, csum=CSum},
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
write_chunk=#mpb_ll_writechunkreq{
|
write_chunk=#mpb_ll_writechunkreq{
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS,
|
|
||||||
epoch_id=PB_EpochID,
|
|
||||||
chunk=#mpb_chunk{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk=Chunk,
|
|
||||||
csum=PB_CSum}}};
|
|
||||||
to_pb_request(ReqID, {low_read_chunk, NSVersion, NS, EpochID, File, Offset, Size, Opts}) ->
|
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
|
||||||
#read_opts{no_checksum=FNChecksum,
|
|
||||||
no_chunk=FNChunk,
|
|
||||||
needs_trimmed=NeedsTrimmed} = Opts,
|
|
||||||
#mpb_ll_request{
|
|
||||||
req_id=ReqID, do_not_alter=2,
|
|
||||||
read_chunk=#mpb_ll_readchunkreq{
|
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS,
|
|
||||||
epoch_id=PB_EpochID,
|
|
||||||
chunk_pos=#mpb_chunkpos{
|
|
||||||
file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size},
|
|
||||||
flag_no_checksum=FNChecksum,
|
|
||||||
flag_no_chunk=FNChunk,
|
|
||||||
flag_needs_trimmed=NeedsTrimmed}};
|
|
||||||
to_pb_request(ReqID, {low_trim_chunk, NSVersion, NS, EpochID, File, Offset, Size, TriggerGC}) ->
|
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
|
||||||
trim_chunk=#mpb_ll_trimchunkreq{
|
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS,
|
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
file=File,
|
file=File,
|
||||||
offset=Offset,
|
offset=Offset,
|
||||||
size=Size,
|
chunk=Chunk,
|
||||||
trigger_gc=TriggerGC}};
|
csum=PB_CSum}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_checksum_list, File}}) ->
|
to_pb_request(ReqID, {low_read_chunk, EpochID, File, Offset, Size, _Opts}) ->
|
||||||
|
%% TODO: stop ignoring Opts ^_^
|
||||||
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
|
#mpb_ll_request{
|
||||||
|
req_id=ReqID, do_not_alter=2,
|
||||||
|
read_chunk=#mpb_ll_readchunkreq{
|
||||||
|
epoch_id=PB_EpochID,
|
||||||
|
file=File,
|
||||||
|
offset=Offset,
|
||||||
|
size=Size}};
|
||||||
|
to_pb_request(ReqID, {low_checksum_list, EpochID, File}) ->
|
||||||
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
checksum_list=#mpb_ll_checksumlistreq{
|
checksum_list=#mpb_ll_checksumlistreq{
|
||||||
|
epoch_id=PB_EpochID,
|
||||||
file=File}};
|
file=File}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_list_files, EpochID}}) ->
|
to_pb_request(ReqID, {low_list_files, EpochID}) ->
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
list_files=#mpb_ll_listfilesreq{epoch_id=PB_EpochID}};
|
list_files=#mpb_ll_listfilesreq{epoch_id=PB_EpochID}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_wedge_status}}) ->
|
to_pb_request(ReqID, {low_wedge_status, _BogusEpochID}) ->
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
wedge_status=#mpb_ll_wedgestatusreq{}};
|
wedge_status=#mpb_ll_wedgestatusreq{}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_delete_migration, EpochID, File}}) ->
|
to_pb_request(ReqID, {low_delete_migration, EpochID, File}) ->
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
delete_migration=#mpb_ll_deletemigrationreq{
|
delete_migration=#mpb_ll_deletemigrationreq{
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
file=File}};
|
file=File}};
|
||||||
to_pb_request(ReqID, {low_skip_wedge, {low_trunc_hack, EpochID, File}}) ->
|
to_pb_request(ReqID, {low_trunc_hack, EpochID, File}) ->
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
|
||||||
trunc_hack=#mpb_ll_trunchackreq{
|
trunc_hack=#mpb_ll_trunchackreq{
|
||||||
|
@ -519,15 +423,15 @@ to_pb_response(_ReqID, _, async_no_response=X) ->
|
||||||
X;
|
X;
|
||||||
to_pb_response(ReqID, _, {low_error, ErrCode, ErrMsg}) ->
|
to_pb_response(ReqID, _, {low_error, ErrCode, ErrMsg}) ->
|
||||||
make_ll_error_resp(ReqID, ErrCode, ErrMsg);
|
make_ll_error_resp(ReqID, ErrCode, ErrMsg);
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_echo, _Msg}}, Resp) ->
|
to_pb_response(ReqID, {low_echo, _BogusEpochID, _Msg}, Resp) ->
|
||||||
#mpb_ll_response{
|
#mpb_ll_response{
|
||||||
req_id=ReqID,
|
req_id=ReqID,
|
||||||
echo=#mpb_echoresp{message=Resp}};
|
echo=#mpb_echoresp{message=Resp}};
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_auth, _, _}}, __TODO_Resp) ->
|
to_pb_response(ReqID, {low_auth, _, _, _}, __TODO_Resp) ->
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
generic=#mpb_errorresp{code=1,
|
generic=#mpb_errorresp{code=1,
|
||||||
msg="AUTH not implemented"}};
|
msg="AUTH not implemented"}};
|
||||||
to_pb_response(ReqID, {low_append_chunk, _NSV, _NS, _EID, _NSL, _Pfx, _Ch, _CST, _CS, _O}, Resp)->
|
to_pb_response(ReqID, {low_append_chunk, _EID, _PKey, _Pfx, _Ch, _CST, _CS, _CE}, Resp)->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, {Offset, Size, File}} ->
|
{ok, {Offset, Size, File}} ->
|
||||||
Where = #mpb_chunkpos{offset=Offset,
|
Where = #mpb_chunkpos{offset=Offset,
|
||||||
|
@ -543,30 +447,18 @@ to_pb_response(ReqID, {low_append_chunk, _NSV, _NS, _EID, _NSL, _Pfx, _Ch, _CST,
|
||||||
_Else ->
|
_Else ->
|
||||||
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {low_write_chunk, _NSV, _NS, _EID, _Fl, _Off, _Ch, _CST, _CS},Resp)->
|
to_pb_response(ReqID, {low_write_chunk, _EID, _Fl, _Off, _Ch, _CST, _CS},Resp)->
|
||||||
Status = conv_from_status(Resp),
|
Status = conv_from_status(Resp),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
write_chunk=#mpb_ll_writechunkresp{status=Status}};
|
write_chunk=#mpb_ll_writechunkresp{status=Status}};
|
||||||
to_pb_response(ReqID, {low_read_chunk, _NSV, _NS, _EID, _Fl, _Off, _Sz, _Opts}, Resp)->
|
to_pb_response(ReqID, {low_read_chunk, _EID, _Fl, _Off, _Sz, _Opts}, Resp)->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, {Chunks, Trimmed}} ->
|
{ok, Chunk} ->
|
||||||
PB_Chunks = lists:map(fun({File, Offset, Bytes, Csum}) ->
|
CSum = undefined, % TODO not implemented
|
||||||
{Tag, Ck} = machi_util:unmake_tagged_csum(Csum),
|
|
||||||
#mpb_chunk{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk=Bytes,
|
|
||||||
csum=#mpb_chunkcsum{type=conv_from_csum_tag(Tag),
|
|
||||||
csum=Ck}}
|
|
||||||
end, Chunks),
|
|
||||||
PB_Trimmed = lists:map(fun({File, Offset, Size}) ->
|
|
||||||
#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}
|
|
||||||
end, Trimmed),
|
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
read_chunk=#mpb_ll_readchunkresp{status='OK',
|
read_chunk=#mpb_ll_readchunkresp{status='OK',
|
||||||
chunks=PB_Chunks,
|
chunk=Chunk,
|
||||||
trimmed=PB_Trimmed}};
|
csum=CSum}};
|
||||||
{error, _}=Error ->
|
{error, _}=Error ->
|
||||||
Status = conv_from_status(Error),
|
Status = conv_from_status(Error),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
|
@ -574,19 +466,7 @@ to_pb_response(ReqID, {low_read_chunk, _NSV, _NS, _EID, _Fl, _Off, _Sz, _Opts},
|
||||||
_Else ->
|
_Else ->
|
||||||
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {low_trim_chunk, _, _, _, _, _, _, _}, Resp) ->
|
to_pb_response(ReqID, {low_checksum_list, _EpochID, _File}, Resp) ->
|
||||||
case Resp of
|
|
||||||
ok ->
|
|
||||||
#mpb_ll_response{req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_ll_trimchunkresp{status='OK'}};
|
|
||||||
{error, _}=Error ->
|
|
||||||
Status = conv_from_status(Error),
|
|
||||||
#mpb_ll_response{req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_ll_trimchunkresp{status=Status}};
|
|
||||||
_Else ->
|
|
||||||
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
|
||||||
end;
|
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_checksum_list, _File}}, Resp) ->
|
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, Chunk} ->
|
{ok, Chunk} ->
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
|
@ -599,7 +479,7 @@ to_pb_response(ReqID, {low_skip_wedge, {low_checksum_list, _File}}, Resp) ->
|
||||||
_Else ->
|
_Else ->
|
||||||
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_list_files, _EpochID}}, Resp) ->
|
to_pb_response(ReqID, {low_list_files, _EpochID}, Resp) ->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, FileInfo} ->
|
{ok, FileInfo} ->
|
||||||
PB_Files = [#mpb_fileinfo{file_size=Size, file_name=Name} ||
|
PB_Files = [#mpb_fileinfo{file_size=Size, file_name=Name} ||
|
||||||
|
@ -614,28 +494,26 @@ to_pb_response(ReqID, {low_skip_wedge, {low_list_files, _EpochID}}, Resp) ->
|
||||||
_Else ->
|
_Else ->
|
||||||
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_wedge_status}}, Resp) ->
|
to_pb_response(ReqID, {low_wedge_status, _BogusEpochID}, Resp) ->
|
||||||
case Resp of
|
case Resp of
|
||||||
{error, _}=Error ->
|
{error, _}=Error ->
|
||||||
Status = conv_from_status(Error),
|
Status = conv_from_status(Error),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
wedge_status=#mpb_ll_wedgestatusresp{status=Status}};
|
wedge_status=#mpb_ll_wedgestatusresp{status=Status}};
|
||||||
{Wedged_p, EpochID, NSVersion, NS} ->
|
{Wedged_p, EpochID} ->
|
||||||
|
PB_Wedged = conv_from_boolean(Wedged_p),
|
||||||
PB_EpochID = conv_from_epoch_id(EpochID),
|
PB_EpochID = conv_from_epoch_id(EpochID),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
wedge_status=#mpb_ll_wedgestatusresp{
|
wedge_status=#mpb_ll_wedgestatusresp{
|
||||||
status='OK',
|
status='OK',
|
||||||
epoch_id=PB_EpochID,
|
epoch_id=PB_EpochID,
|
||||||
wedged_flag=Wedged_p,
|
wedged_flag=PB_Wedged}}
|
||||||
namespace_version=NSVersion,
|
|
||||||
namespace=NS
|
|
||||||
}}
|
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_delete_migration, _EID, _Fl}}, Resp)->
|
to_pb_response(ReqID, {low_delete_migration, _EID, _Fl}, Resp)->
|
||||||
Status = conv_from_status(Resp),
|
Status = conv_from_status(Resp),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
delete_migration=#mpb_ll_deletemigrationresp{status=Status}};
|
delete_migration=#mpb_ll_deletemigrationresp{status=Status}};
|
||||||
to_pb_response(ReqID, {low_skip_wedge, {low_trunc_hack, _EID, _Fl}}, Resp)->
|
to_pb_response(ReqID, {low_trunc_hack, _EID, _Fl}, Resp)->
|
||||||
Status = conv_from_status(Resp),
|
Status = conv_from_status(Resp),
|
||||||
#mpb_ll_response{req_id=ReqID,
|
#mpb_ll_response{req_id=ReqID,
|
||||||
trunc_hack=#mpb_ll_trunchackresp{status=Status}};
|
trunc_hack=#mpb_ll_trunchackresp{status=Status}};
|
||||||
|
@ -716,7 +594,7 @@ to_pb_response(ReqID, {high_auth, _User, _Pass}, _Resp) ->
|
||||||
#mpb_response{req_id=ReqID,
|
#mpb_response{req_id=ReqID,
|
||||||
generic=#mpb_errorresp{code=1,
|
generic=#mpb_errorresp{code=1,
|
||||||
msg="AUTH not implemented"}};
|
msg="AUTH not implemented"}};
|
||||||
to_pb_response(ReqID, {high_append_chunk, _NS, _Prefix, _Chunk, _TSum, _O}, Resp)->
|
to_pb_response(ReqID, {high_append_chunk, _TODO, _Prefix, _Chunk, _TSum, _CE}, Resp)->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, {Offset, Size, File}} ->
|
{ok, {Offset, Size, File}} ->
|
||||||
Where = #mpb_chunkpos{offset=Offset,
|
Where = #mpb_chunkpos{offset=Offset,
|
||||||
|
@ -732,7 +610,7 @@ to_pb_response(ReqID, {high_append_chunk, _NS, _Prefix, _Chunk, _TSum, _O}, Resp
|
||||||
_Else ->
|
_Else ->
|
||||||
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {high_write_chunk, _File, _Offset, _Chunk, _CSum}, Resp) ->
|
to_pb_response(ReqID, {high_write_chunk, _File, _Offset, _Chunk, _TaggedCSum}, Resp) ->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, {_,_,_}} ->
|
{ok, {_,_,_}} ->
|
||||||
%% machi_cr_client returns ok 2-tuple, convert to simple ok.
|
%% machi_cr_client returns ok 2-tuple, convert to simple ok.
|
||||||
|
@ -745,26 +623,12 @@ to_pb_response(ReqID, {high_write_chunk, _File, _Offset, _Chunk, _CSum}, Resp) -
|
||||||
_Else ->
|
_Else ->
|
||||||
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {high_read_chunk, _File, _Offset, _Size, _}, Resp) ->
|
to_pb_response(ReqID, {high_read_chunk, _File, _Offset, _Size}, Resp) ->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, {Chunks, Trimmed}} ->
|
{ok, Chunk} ->
|
||||||
PB_Chunks = lists:map(fun({File, Offset, Bytes, Csum}) ->
|
|
||||||
{Tag, Ck} = machi_util:unmake_tagged_csum(Csum),
|
|
||||||
#mpb_chunk{
|
|
||||||
offset=Offset,
|
|
||||||
file_name=File,
|
|
||||||
chunk=Bytes,
|
|
||||||
csum=#mpb_chunkcsum{type=conv_from_csum_tag(Tag), csum=Ck}}
|
|
||||||
end, Chunks),
|
|
||||||
PB_Trimmed = lists:map(fun({File, Offset, Size}) ->
|
|
||||||
#mpb_chunkpos{file_name=File,
|
|
||||||
offset=Offset,
|
|
||||||
chunk_size=Size}
|
|
||||||
end, Trimmed),
|
|
||||||
#mpb_response{req_id=ReqID,
|
#mpb_response{req_id=ReqID,
|
||||||
read_chunk=#mpb_readchunkresp{status='OK',
|
read_chunk=#mpb_readchunkresp{status='OK',
|
||||||
chunks=PB_Chunks,
|
chunk=Chunk}};
|
||||||
trimmed=PB_Trimmed}};
|
|
||||||
{error, _}=Error ->
|
{error, _}=Error ->
|
||||||
Status = conv_from_status(Error),
|
Status = conv_from_status(Error),
|
||||||
#mpb_response{req_id=ReqID,
|
#mpb_response{req_id=ReqID,
|
||||||
|
@ -772,18 +636,6 @@ to_pb_response(ReqID, {high_read_chunk, _File, _Offset, _Size, _}, Resp) ->
|
||||||
_Else ->
|
_Else ->
|
||||||
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
||||||
end;
|
end;
|
||||||
to_pb_response(ReqID, {high_trim_chunk, _File, _Offset, _Size}, Resp) ->
|
|
||||||
case Resp of
|
|
||||||
ok ->
|
|
||||||
#mpb_response{req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_trimchunkresp{status='OK'}};
|
|
||||||
{error, _}=Error ->
|
|
||||||
Status = conv_from_status(Error),
|
|
||||||
#mpb_response{req_id=ReqID,
|
|
||||||
trim_chunk=#mpb_trimchunkresp{status=Status}};
|
|
||||||
_Else ->
|
|
||||||
make_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
|
|
||||||
end;
|
|
||||||
to_pb_response(ReqID, {high_checksum_list, _File}, Resp) ->
|
to_pb_response(ReqID, {high_checksum_list, _File}, Resp) ->
|
||||||
case Resp of
|
case Resp of
|
||||||
{ok, Chunk} ->
|
{ok, Chunk} ->
|
||||||
|
@ -840,7 +692,6 @@ conv_to_epoch_id(#mpb_epochid{epoch_number=Epoch,
|
||||||
conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch,
|
conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch,
|
||||||
epoch_csum=CSum,
|
epoch_csum=CSum,
|
||||||
author_server=Author,
|
author_server=Author,
|
||||||
chain_name=ChainName,
|
|
||||||
all_members=AllMembers,
|
all_members=AllMembers,
|
||||||
witnesses=Witnesses,
|
witnesses=Witnesses,
|
||||||
creation_time=CTime,
|
creation_time=CTime,
|
||||||
|
@ -848,13 +699,14 @@ conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch,
|
||||||
upi=UPI,
|
upi=UPI,
|
||||||
repairing=Repairing,
|
repairing=Repairing,
|
||||||
down=Down,
|
down=Down,
|
||||||
|
opaque_flap=Flap,
|
||||||
|
opaque_inner=Inner,
|
||||||
opaque_dbg=Dbg,
|
opaque_dbg=Dbg,
|
||||||
opaque_dbg2=Dbg2,
|
opaque_dbg2=Dbg2,
|
||||||
members_dict=MembersDict}) ->
|
members_dict=MembersDict}) ->
|
||||||
#projection_v1{epoch_number=Epoch,
|
#projection_v1{epoch_number=Epoch,
|
||||||
epoch_csum=CSum,
|
epoch_csum=CSum,
|
||||||
author_server=to_atom(Author),
|
author_server=to_atom(Author),
|
||||||
chain_name=to_atom(ChainName),
|
|
||||||
all_members=[to_atom(X) || X <- AllMembers],
|
all_members=[to_atom(X) || X <- AllMembers],
|
||||||
witnesses=[to_atom(X) || X <- Witnesses],
|
witnesses=[to_atom(X) || X <- Witnesses],
|
||||||
creation_time=conv_to_now(CTime),
|
creation_time=conv_to_now(CTime),
|
||||||
|
@ -862,6 +714,8 @@ conv_to_projection_v1(#mpb_projectionv1{epoch_number=Epoch,
|
||||||
upi=[to_atom(X) || X <- UPI],
|
upi=[to_atom(X) || X <- UPI],
|
||||||
repairing=[to_atom(X) || X <- Repairing],
|
repairing=[to_atom(X) || X <- Repairing],
|
||||||
down=[to_atom(X) || X <- Down],
|
down=[to_atom(X) || X <- Down],
|
||||||
|
flap=dec_optional_sexp(Flap),
|
||||||
|
inner=dec_optional_sexp(Inner),
|
||||||
dbg=dec_sexp(Dbg),
|
dbg=dec_sexp(Dbg),
|
||||||
dbg2=dec_sexp(Dbg2),
|
dbg2=dec_sexp(Dbg2),
|
||||||
members_dict=conv_to_members_dict(MembersDict)}.
|
members_dict=conv_to_members_dict(MembersDict)}.
|
||||||
|
@ -872,6 +726,16 @@ enc_sexp(T) ->
|
||||||
dec_sexp(Bin) when is_binary(Bin) ->
|
dec_sexp(Bin) when is_binary(Bin) ->
|
||||||
binary_to_term(Bin).
|
binary_to_term(Bin).
|
||||||
|
|
||||||
|
enc_optional_sexp(undefined) ->
|
||||||
|
undefined;
|
||||||
|
enc_optional_sexp(T) ->
|
||||||
|
enc_sexp(T).
|
||||||
|
|
||||||
|
dec_optional_sexp(undefined) ->
|
||||||
|
undefined;
|
||||||
|
dec_optional_sexp(T) ->
|
||||||
|
dec_sexp(T).
|
||||||
|
|
||||||
conv_from_members_dict(D) ->
|
conv_from_members_dict(D) ->
|
||||||
%% Use list_to_binary() here to "flatten" the serialized #p_srvr{}
|
%% Use list_to_binary() here to "flatten" the serialized #p_srvr{}
|
||||||
[#mpb_membersdictentry{key=to_list(K), val=conv_from_p_srvr(V)} ||
|
[#mpb_membersdictentry{key=to_list(K), val=conv_from_p_srvr(V)} ||
|
||||||
|
@ -975,8 +839,6 @@ conv_from_status({error, not_written}) ->
|
||||||
'NOT_WRITTEN';
|
'NOT_WRITTEN';
|
||||||
conv_from_status({error, written}) ->
|
conv_from_status({error, written}) ->
|
||||||
'WRITTEN';
|
'WRITTEN';
|
||||||
conv_from_status({error, trimmed}) ->
|
|
||||||
'TRIMMED';
|
|
||||||
conv_from_status({error, no_such_file}) ->
|
conv_from_status({error, no_such_file}) ->
|
||||||
'NO_SUCH_FILE';
|
'NO_SUCH_FILE';
|
||||||
conv_from_status({error, partial_read}) ->
|
conv_from_status({error, partial_read}) ->
|
||||||
|
@ -984,34 +846,24 @@ conv_from_status({error, partial_read}) ->
|
||||||
conv_from_status({error, bad_epoch}) ->
|
conv_from_status({error, bad_epoch}) ->
|
||||||
'BAD_EPOCH';
|
'BAD_EPOCH';
|
||||||
conv_from_status(_OOPS) ->
|
conv_from_status(_OOPS) ->
|
||||||
io:format(user, "HEY, ~s:~w got ~p\n", [?MODULE, ?LINE, _OOPS]),
|
io:format(user, "HEY, ~s:~w got ~w\n", [?MODULE, ?LINE, _OOPS]),
|
||||||
'BAD_JOSS'.
|
'BAD_JOSS'.
|
||||||
|
|
||||||
conv_from_append_opts(#append_opts{chunk_extra=ChunkExtra,
|
conv_to_boolean(undefined) ->
|
||||||
preferred_file_name=Pref,
|
false;
|
||||||
flag_fail_preferred=FailPref}) ->
|
conv_to_boolean(0) ->
|
||||||
{ChunkExtra, Pref, FailPref}.
|
false;
|
||||||
|
conv_to_boolean(N) when is_integer(N) ->
|
||||||
|
true.
|
||||||
|
|
||||||
|
conv_from_boolean(false) ->
|
||||||
conv_to_append_opts(#mpb_appendchunkreq{
|
0;
|
||||||
chunk_extra=ChunkExtra,
|
conv_from_boolean(true) ->
|
||||||
preferred_file_name=Pref,
|
1.
|
||||||
flag_fail_preferred=FailPref}) ->
|
|
||||||
#append_opts{chunk_extra=ChunkExtra,
|
|
||||||
preferred_file_name=Pref,
|
|
||||||
flag_fail_preferred=FailPref};
|
|
||||||
conv_to_append_opts(#mpb_ll_appendchunkreq{
|
|
||||||
chunk_extra=ChunkExtra,
|
|
||||||
preferred_file_name=Pref,
|
|
||||||
flag_fail_preferred=FailPref}) ->
|
|
||||||
#append_opts{chunk_extra=ChunkExtra,
|
|
||||||
preferred_file_name=Pref,
|
|
||||||
flag_fail_preferred=FailPref}.
|
|
||||||
|
|
||||||
conv_from_projection_v1(#projection_v1{epoch_number=Epoch,
|
conv_from_projection_v1(#projection_v1{epoch_number=Epoch,
|
||||||
epoch_csum=CSum,
|
epoch_csum=CSum,
|
||||||
author_server=Author,
|
author_server=Author,
|
||||||
chain_name=ChainName,
|
|
||||||
all_members=AllMembers,
|
all_members=AllMembers,
|
||||||
witnesses=Witnesses,
|
witnesses=Witnesses,
|
||||||
creation_time=CTime,
|
creation_time=CTime,
|
||||||
|
@ -1019,13 +871,14 @@ conv_from_projection_v1(#projection_v1{epoch_number=Epoch,
|
||||||
upi=UPI,
|
upi=UPI,
|
||||||
repairing=Repairing,
|
repairing=Repairing,
|
||||||
down=Down,
|
down=Down,
|
||||||
|
flap=Flap,
|
||||||
|
inner=Inner,
|
||||||
dbg=Dbg,
|
dbg=Dbg,
|
||||||
dbg2=Dbg2,
|
dbg2=Dbg2,
|
||||||
members_dict=MembersDict}) ->
|
members_dict=MembersDict}) ->
|
||||||
#mpb_projectionv1{epoch_number=Epoch,
|
#mpb_projectionv1{epoch_number=Epoch,
|
||||||
epoch_csum=CSum,
|
epoch_csum=CSum,
|
||||||
author_server=to_list(Author),
|
author_server=to_list(Author),
|
||||||
chain_name=to_list(ChainName),
|
|
||||||
all_members=[to_list(X) || X <- AllMembers],
|
all_members=[to_list(X) || X <- AllMembers],
|
||||||
witnesses=[to_list(X) || X <- Witnesses],
|
witnesses=[to_list(X) || X <- Witnesses],
|
||||||
creation_time=conv_from_now(CTime),
|
creation_time=conv_from_now(CTime),
|
||||||
|
@ -1033,6 +886,8 @@ conv_from_projection_v1(#projection_v1{epoch_number=Epoch,
|
||||||
upi=[to_list(X) || X <- UPI],
|
upi=[to_list(X) || X <- UPI],
|
||||||
repairing=[to_list(X) || X <- Repairing],
|
repairing=[to_list(X) || X <- Repairing],
|
||||||
down=[to_list(X) || X <- Down],
|
down=[to_list(X) || X <- Down],
|
||||||
|
opaque_flap=enc_optional_sexp(Flap),
|
||||||
|
opaque_inner=enc_optional_sexp(Inner),
|
||||||
opaque_dbg=enc_sexp(Dbg),
|
opaque_dbg=enc_sexp(Dbg),
|
||||||
opaque_dbg2=enc_sexp(Dbg2),
|
opaque_dbg2=enc_sexp(Dbg2),
|
||||||
members_dict=conv_from_members_dict(MembersDict)}.
|
members_dict=conv_from_members_dict(MembersDict)}.
|
||||||
|
|
|
@ -1,89 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2016 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_plist).
|
|
||||||
|
|
||||||
%%% @doc persistent list of binaries
|
|
||||||
|
|
||||||
-export([open/2, close/1, find/2, add/2]).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-export([all/1]).
|
|
||||||
-endif.
|
|
||||||
|
|
||||||
-record(machi_plist,
|
|
||||||
{filename :: file:filename_all(),
|
|
||||||
fd :: file:io_device(),
|
|
||||||
list = [] :: list(string)}).
|
|
||||||
|
|
||||||
-type plist() :: #machi_plist{}.
|
|
||||||
-export_type([plist/0]).
|
|
||||||
|
|
||||||
-spec open(file:filename_all(), proplists:proplist()) ->
|
|
||||||
{ok, plist()} | {error, file:posix()}.
|
|
||||||
open(Filename, _Opt) ->
|
|
||||||
%% TODO: This decode could fail if the file didn't finish writing
|
|
||||||
%% whole contents, which should be fixed by some persistent
|
|
||||||
%% solution.
|
|
||||||
List = case file:read_file(Filename) of
|
|
||||||
{ok, <<>>} -> [];
|
|
||||||
{ok, Bin} -> binary_to_term(Bin);
|
|
||||||
{error, enoent} -> []
|
|
||||||
end,
|
|
||||||
case file:open(Filename, [read, write, raw, binary, sync]) of
|
|
||||||
{ok, Fd} ->
|
|
||||||
{ok, #machi_plist{filename=Filename,
|
|
||||||
fd=Fd,
|
|
||||||
list=List}};
|
|
||||||
Error ->
|
|
||||||
Error
|
|
||||||
end.
|
|
||||||
|
|
||||||
-spec close(plist()) -> ok.
|
|
||||||
close(#machi_plist{fd=Fd}) ->
|
|
||||||
_ = file:close(Fd).
|
|
||||||
|
|
||||||
-spec find(plist(), string()) -> boolean().
|
|
||||||
find(#machi_plist{list=List}, Name) ->
|
|
||||||
lists:member(Name, List).
|
|
||||||
|
|
||||||
-spec add(plist(), string()) -> {ok, plist()} | {error, file:posix()}.
|
|
||||||
add(Plist = #machi_plist{list=List0, fd=Fd}, Name) ->
|
|
||||||
case find(Plist, Name) of
|
|
||||||
true ->
|
|
||||||
{ok, Plist};
|
|
||||||
false ->
|
|
||||||
List = lists:append(List0, [Name]),
|
|
||||||
%% TODO: partial write could break the file with other
|
|
||||||
%% persistent info (even lose data of trimmed states);
|
|
||||||
%% needs a solution.
|
|
||||||
case file:pwrite(Fd, 0, term_to_binary(List)) of
|
|
||||||
ok ->
|
|
||||||
{ok, Plist#machi_plist{list=List}};
|
|
||||||
Error ->
|
|
||||||
Error
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-spec all(plist()) -> [file:filename()].
|
|
||||||
all(#machi_plist{list=List}) ->
|
|
||||||
List.
|
|
||||||
-endif.
|
|
|
@ -31,8 +31,7 @@
|
||||||
compare/2,
|
compare/2,
|
||||||
get_epoch_id/1,
|
get_epoch_id/1,
|
||||||
make_summary/1,
|
make_summary/1,
|
||||||
make_members_dict/1,
|
make_members_dict/1
|
||||||
make_epoch_id/1
|
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% @doc Create a new projection record.
|
%% @doc Create a new projection record.
|
||||||
|
@ -111,25 +110,8 @@ new(EpochNum, MyName, [] = _MembersDict0, _Down_list, _UPI_list,_Repairing_list,
|
||||||
%% @doc Update the checksum element of a projection record.
|
%% @doc Update the checksum element of a projection record.
|
||||||
|
|
||||||
update_checksum(P) ->
|
update_checksum(P) ->
|
||||||
%% Fields that we ignore when calculating checksum:
|
|
||||||
%% * epoch_csum
|
|
||||||
%% * dbg2: humming consensus participants may modify this at will without
|
|
||||||
%% voiding the identity of the projection as a whole.
|
|
||||||
%% * flap: In some cases in CP mode, coode upstream of C120 may have
|
|
||||||
%% updated the flapping information. That's OK enough: we aren't
|
|
||||||
%% going to violate chain replication safety rules (or
|
|
||||||
%% accidentally encourage someone else sometime later) by
|
|
||||||
%% replacing flapping information with our own local view at
|
|
||||||
%% this instant in time.
|
|
||||||
%% * creation_time: With CP mode & inner projections, it's damn annoying
|
|
||||||
%% to have to copy this around 100% correctly. {sigh}
|
|
||||||
%% That's a negative state of the code. However, there
|
|
||||||
%% isn't a safety violation if the creation_time is
|
|
||||||
%% altered for any reason: it's there only for human
|
|
||||||
%% benefit for debugging.
|
|
||||||
CSum = crypto:hash(sha,
|
CSum = crypto:hash(sha,
|
||||||
term_to_binary(P#projection_v1{epoch_csum= <<>>,
|
term_to_binary(P#projection_v1{epoch_csum= <<>>,
|
||||||
creation_time=undefined,
|
|
||||||
dbg2=[]})),
|
dbg2=[]})),
|
||||||
P#projection_v1{epoch_csum=CSum}.
|
P#projection_v1{epoch_csum=CSum}.
|
||||||
|
|
||||||
|
@ -164,7 +146,6 @@ get_epoch_id(#projection_v1{epoch_number=Epoch, epoch_csum=CSum}) ->
|
||||||
%% @doc Create a proplist-style summary of a projection record.
|
%% @doc Create a proplist-style summary of a projection record.
|
||||||
|
|
||||||
make_summary(#projection_v1{epoch_number=EpochNum,
|
make_summary(#projection_v1{epoch_number=EpochNum,
|
||||||
epoch_csum= <<_CSum4:4/binary, _/binary>>,
|
|
||||||
all_members=_All_list,
|
all_members=_All_list,
|
||||||
mode=CMode,
|
mode=CMode,
|
||||||
witnesses=Witness_list,
|
witnesses=Witness_list,
|
||||||
|
@ -172,11 +153,20 @@ make_summary(#projection_v1{epoch_number=EpochNum,
|
||||||
author_server=Author,
|
author_server=Author,
|
||||||
upi=UPI_list,
|
upi=UPI_list,
|
||||||
repairing=Repairing_list,
|
repairing=Repairing_list,
|
||||||
|
inner=Inner,
|
||||||
|
flap=Flap,
|
||||||
dbg=Dbg, dbg2=Dbg2}) ->
|
dbg=Dbg, dbg2=Dbg2}) ->
|
||||||
[{epoch,EpochNum}, {csum,_CSum4},
|
InnerInfo = if is_record(Inner, projection_v1) ->
|
||||||
{all, _All_list},
|
[{inner, make_summary(Inner)}];
|
||||||
{author,Author}, {mode,CMode},{witnesses, Witness_list},
|
true ->
|
||||||
|
[]
|
||||||
|
end,
|
||||||
|
[{epoch,EpochNum},{author,Author},
|
||||||
|
{mode,CMode},{witnesses, Witness_list},
|
||||||
{upi,UPI_list},{repair,Repairing_list},{down,Down_list}] ++
|
{upi,UPI_list},{repair,Repairing_list},{down,Down_list}] ++
|
||||||
|
InnerInfo ++
|
||||||
|
[{flap, Flap}] ++
|
||||||
|
%% [{flap, lists:flatten(io_lib:format("~p", [Flap]))}] ++
|
||||||
[{d,Dbg}, {d2,Dbg2}].
|
[{d,Dbg}, {d2,Dbg2}].
|
||||||
|
|
||||||
%% @doc Make a `p_srvr_dict()' out of a list of `p_srvr()' or out of a
|
%% @doc Make a `p_srvr_dict()' out of a list of `p_srvr()' or out of a
|
||||||
|
@ -211,6 +201,3 @@ make_members_dict(Ps) ->
|
||||||
exit({badarg, {make_members_dict, lists:filter(F_neither, Ps)}})
|
exit({badarg, {make_members_dict, lists:filter(F_neither, Ps)}})
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
make_epoch_id(#projection_v1{epoch_number=Epoch, epoch_csum=CSum}) ->
|
|
||||||
{Epoch, CSum}.
|
|
||||||
|
|
|
@ -40,7 +40,6 @@
|
||||||
|
|
||||||
-module(machi_projection_store).
|
-module(machi_projection_store).
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
-include("machi_projection.hrl").
|
||||||
-define(V(X,Y), ok).
|
-define(V(X,Y), ok).
|
||||||
%% -include("machi_verbose.hrl").
|
%% -include("machi_verbose.hrl").
|
||||||
|
@ -60,8 +59,7 @@
|
||||||
get_all_projections/2, get_all_projections/3,
|
get_all_projections/2, get_all_projections/3,
|
||||||
list_all_projections/2, list_all_projections/3
|
list_all_projections/2, list_all_projections/3
|
||||||
]).
|
]).
|
||||||
-export([set_wedge_notify_pid/2, get_wedge_notify_pid/1,
|
-export([set_wedge_notify_pid/2]).
|
||||||
set_consistency_mode/2]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
%% gen_server callbacks
|
||||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
||||||
|
@ -74,8 +72,7 @@
|
||||||
private_dir = "" :: string(),
|
private_dir = "" :: string(),
|
||||||
wedge_notify_pid :: pid() | atom(),
|
wedge_notify_pid :: pid() | atom(),
|
||||||
max_public_epochid = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()},
|
max_public_epochid = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()},
|
||||||
max_private_epochid = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()},
|
max_private_epochid = ?NO_EPOCH :: {-1 | non_neg_integer(), binary()}
|
||||||
consistency_mode=ap_mode :: 'ap_mode' | 'cp_mode'
|
|
||||||
}).
|
}).
|
||||||
|
|
||||||
%% @doc Start a new projection store server.
|
%% @doc Start a new projection store server.
|
||||||
|
@ -137,7 +134,6 @@ write(PidSpec, ProjType, Proj, Timeout)
|
||||||
is_record(Proj, projection_v1),
|
is_record(Proj, projection_v1),
|
||||||
is_integer(Proj#projection_v1.epoch_number),
|
is_integer(Proj#projection_v1.epoch_number),
|
||||||
Proj#projection_v1.epoch_number >= 0 ->
|
Proj#projection_v1.epoch_number >= 0 ->
|
||||||
testing_sleep_perhaps(),
|
|
||||||
g_call(PidSpec, {write, ProjType, Proj}, Timeout).
|
g_call(PidSpec, {write, ProjType, Proj}, Timeout).
|
||||||
|
|
||||||
%% @doc Fetch all projection records of type `ProjType'.
|
%% @doc Fetch all projection records of type `ProjType'.
|
||||||
|
@ -163,16 +159,7 @@ list_all_projections(PidSpec, ProjType, Timeout)
|
||||||
g_call(PidSpec, {list_all_projections, ProjType}, Timeout).
|
g_call(PidSpec, {list_all_projections, ProjType}, Timeout).
|
||||||
|
|
||||||
set_wedge_notify_pid(PidSpec, NotifyWedgeStateChanges) ->
|
set_wedge_notify_pid(PidSpec, NotifyWedgeStateChanges) ->
|
||||||
gen_server:call(PidSpec, {set_wedge_notify_pid, NotifyWedgeStateChanges},
|
gen_server:call(PidSpec, {set_wedge_notify_pid, NotifyWedgeStateChanges}).
|
||||||
infinity).
|
|
||||||
|
|
||||||
get_wedge_notify_pid(PidSpec) ->
|
|
||||||
gen_server:call(PidSpec, {get_wedge_notify_pid},
|
|
||||||
infinity).
|
|
||||||
|
|
||||||
set_consistency_mode(PidSpec, CMode)
|
|
||||||
when CMode == ap_mode; CMode == cp_mode ->
|
|
||||||
gen_server:call(PidSpec, {set_consistency_mode, CMode}, infinity).
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
@ -237,10 +224,6 @@ handle_call({{list_all_projections, ProjType}, LC1}, _From, S) ->
|
||||||
{reply, {{ok, find_all(Dir)}, LC2}, S};
|
{reply, {{ok, find_all(Dir)}, LC2}, S};
|
||||||
handle_call({set_wedge_notify_pid, NotifyWedgeStateChanges}, _From, S) ->
|
handle_call({set_wedge_notify_pid, NotifyWedgeStateChanges}, _From, S) ->
|
||||||
{reply, ok, S#state{wedge_notify_pid=NotifyWedgeStateChanges}};
|
{reply, ok, S#state{wedge_notify_pid=NotifyWedgeStateChanges}};
|
||||||
handle_call({get_wedge_notify_pid}, _From, S) ->
|
|
||||||
{reply, {ok, S#state.wedge_notify_pid}, S};
|
|
||||||
handle_call({set_consistency_mode, CMode}, _From, S) ->
|
|
||||||
{reply, ok, S#state{consistency_mode=CMode}};
|
|
||||||
handle_call(_Request, _From, S) ->
|
handle_call(_Request, _From, S) ->
|
||||||
Reply = {whaaaaaaaaaaaaazz, _Request},
|
Reply = {whaaaaaaaaaaaaazz, _Request},
|
||||||
{reply, Reply, S}.
|
{reply, Reply, S}.
|
||||||
|
@ -278,57 +261,32 @@ do_proj_read(ProjType, Epoch, S_or_Dir) ->
|
||||||
{{error, Else}, S_or_Dir}
|
{{error, Else}, S_or_Dir}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
do_proj_write(ProjType, Proj, S) ->
|
do_proj_write(public=ProjType, Proj, S) ->
|
||||||
do_proj_write2(ProjType, Proj, S).
|
do_proj_write2(ProjType, Proj, S);
|
||||||
|
do_proj_write(private=ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
|
||||||
do_proj_write2(ProjType, #projection_v1{epoch_csum=CSum}=Proj, S) ->
|
case S#state.max_public_epochid of
|
||||||
case (machi_projection:update_checksum(Proj))#projection_v1.epoch_csum of
|
{PublicEpoch, _} when PublicEpoch =< Epoch ->
|
||||||
CSum2 when CSum2 == CSum ->
|
do_proj_write2(ProjType, Proj, S);
|
||||||
do_proj_write3(ProjType, Proj, S);
|
{PublicEpoch, _} ->
|
||||||
_Else ->
|
|
||||||
{{error, bad_arg}, S}
|
{{error, bad_arg}, S}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
do_proj_write3(ProjType, #projection_v1{epoch_number=Epoch,
|
do_proj_write2(ProjType, #projection_v1{epoch_number=Epoch}=Proj, S) ->
|
||||||
epoch_csum=CSum}=Proj, S) ->
|
|
||||||
%% TODO: We probably ought to check the projection checksum for sanity, eh?
|
%% TODO: We probably ought to check the projection checksum for sanity, eh?
|
||||||
Dir = pick_path(ProjType, S),
|
Dir = pick_path(ProjType, S),
|
||||||
Path = filename:join(Dir, epoch2name(Epoch)),
|
Path = filename:join(Dir, epoch2name(Epoch)),
|
||||||
case file:read_file(Path) of
|
case file:read_file_info(Path) of
|
||||||
{ok, _Bin} when ProjType == public ->
|
{ok, _FI} ->
|
||||||
{{error, written}, S};
|
{{error, written}, S};
|
||||||
{ok, Bin} when ProjType == private ->
|
|
||||||
#projection_v1{epoch_number=CurEpoch,
|
|
||||||
epoch_csum=CurCSum} = _CurProj = binary_to_term(Bin),
|
|
||||||
%% We've already checked that CSum is correct matches the
|
|
||||||
%% contents of this new projection version. If the epoch_csum
|
|
||||||
%% values match, and if we trust the value on disk (TODO paranoid
|
|
||||||
%% check that, also), then the only difference must be the dbg2
|
|
||||||
%% list, which is ok.
|
|
||||||
if CurEpoch == Epoch, CurCSum == CSum ->
|
|
||||||
do_proj_write4(ProjType, Proj, Path, Epoch, S);
|
|
||||||
true ->
|
|
||||||
%% io:format(user, "OUCH: on disk: ~w\n", [machi_projection:make_summary(binary_to_term(Bin))]),
|
|
||||||
%% io:format(user, "OUCH: clobber: ~w\n", [machi_projection:make_summary(Proj)]),
|
|
||||||
%% io:format(user, "OUCH: clobber: ~p\n", [Proj#projection_v1.dbg2]),
|
|
||||||
%% {{error, written, CurEpoch, Epoch, CurCSum, CSum}, S}
|
|
||||||
{{error, written}, S}
|
|
||||||
end;
|
|
||||||
{error, enoent} ->
|
{error, enoent} ->
|
||||||
do_proj_write4(ProjType, Proj, Path, Epoch, S);
|
{ok, FH} = file:open(Path, [write, raw, binary]),
|
||||||
{error, Else} ->
|
|
||||||
{{error, Else}, S}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_proj_write4(ProjType, Proj, Path, Epoch, #state{consistency_mode=CMode}=S) ->
|
|
||||||
{{ok, FH}, Epoch, Path} = {file:open(Path, [write, raw, binary]), Epoch, Path},
|
|
||||||
ok = file:write(FH, term_to_binary(Proj)),
|
ok = file:write(FH, term_to_binary(Proj)),
|
||||||
ok = file:sync(FH),
|
ok = file:sync(FH),
|
||||||
ok = file:close(FH),
|
ok = file:close(FH),
|
||||||
EffectiveProj = Proj,
|
EffectiveProj = machi_chain_manager1:inner_projection_or_self(Proj),
|
||||||
EffectiveEpoch = EffectiveProj#projection_v1.epoch_number,
|
EffectiveEpoch = EffectiveProj#projection_v1.epoch_number,
|
||||||
EpochId = machi_projection:get_epoch_id(Proj),
|
EpochId = {Epoch, Proj#projection_v1.epoch_csum},
|
||||||
EffectiveEpochId = machi_projection:get_epoch_id(EffectiveProj),
|
EffectiveEpochId = {EffectiveEpoch, EffectiveProj#projection_v1.epoch_csum},
|
||||||
%%
|
%%
|
||||||
NewS = if ProjType == public,
|
NewS = if ProjType == public,
|
||||||
Epoch > element(1, S#state.max_public_epochid) ->
|
Epoch > element(1, S#state.max_public_epochid) ->
|
||||||
|
@ -347,20 +305,17 @@ do_proj_write4(ProjType, Proj, Path, Epoch, #state{consistency_mode=CMode}=S) ->
|
||||||
S#state{max_public_epochid=EpochId};
|
S#state{max_public_epochid=EpochId};
|
||||||
ProjType == private,
|
ProjType == private,
|
||||||
Epoch > element(1, S#state.max_private_epochid) ->
|
Epoch > element(1, S#state.max_private_epochid) ->
|
||||||
if CMode == ap_mode ->
|
|
||||||
update_wedge_state(
|
update_wedge_state(
|
||||||
S#state.wedge_notify_pid, false,
|
S#state.wedge_notify_pid, false,
|
||||||
EffectiveEpochId);
|
EffectiveEpochId),
|
||||||
true ->
|
|
||||||
%% If ProjType == private and CMode == cp_mode, then
|
|
||||||
%% the unwedge action is not performed here!
|
|
||||||
ok
|
|
||||||
end,
|
|
||||||
S#state{max_private_epochid=EpochId};
|
S#state{max_private_epochid=EpochId};
|
||||||
true ->
|
true ->
|
||||||
S
|
S
|
||||||
end,
|
end,
|
||||||
{ok, NewS}.
|
{ok, NewS};
|
||||||
|
{error, Else} ->
|
||||||
|
{{error, Else}, S}
|
||||||
|
end.
|
||||||
|
|
||||||
update_wedge_state(PidSpec, Boolean, {0,_}=EpochId) ->
|
update_wedge_state(PidSpec, Boolean, {0,_}=EpochId) ->
|
||||||
%% Epoch #0 is a special case: no projection has been written yet.
|
%% Epoch #0 is a special case: no projection has been written yet.
|
||||||
|
@ -387,6 +342,7 @@ wait_for_liveness(PidSpec, StartTime, WaitTime) ->
|
||||||
undefined ->
|
undefined ->
|
||||||
case timer:now_diff(os:timestamp(), StartTime) div 1000 of
|
case timer:now_diff(os:timestamp(), StartTime) div 1000 of
|
||||||
X when X < WaitTime ->
|
X when X < WaitTime ->
|
||||||
|
io:format(user, "\nYOO ~p ~p\n", [PidSpec, lists:sort(registered())]),
|
||||||
timer:sleep(1),
|
timer:sleep(1),
|
||||||
wait_for_liveness(PidSpec, StartTime, WaitTime)
|
wait_for_liveness(PidSpec, StartTime, WaitTime)
|
||||||
end;
|
end;
|
||||||
|
@ -432,18 +388,6 @@ lclock_get() ->
|
||||||
lclock_update(LC) ->
|
lclock_update(LC) ->
|
||||||
lamport_clock:update(LC).
|
lamport_clock:update(LC).
|
||||||
|
|
||||||
testing_sleep_perhaps() ->
|
|
||||||
try
|
|
||||||
[{_,Max}] = ets:lookup(?TEST_ETS_TABLE, projection_store_sleep_time),
|
|
||||||
MSec = random:uniform(Max),
|
|
||||||
io:format(user, "{", []),
|
|
||||||
timer:sleep(MSec),
|
|
||||||
io:format(user, "}", []),
|
|
||||||
ok
|
|
||||||
catch _X:_Y ->
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
||||||
-else. % TEST
|
-else. % TEST
|
||||||
|
|
||||||
lclock_init() ->
|
lclock_init() ->
|
||||||
|
@ -455,7 +399,4 @@ lclock_get() ->
|
||||||
lclock_update(_LC) ->
|
lclock_update(_LC) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
testing_sleep_perhaps() ->
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-endif. % TEST
|
-endif. % TEST
|
||||||
|
|
|
@ -22,10 +22,6 @@
|
||||||
%% proxy-process style API for hiding messy details such as TCP
|
%% proxy-process style API for hiding messy details such as TCP
|
||||||
%% connection/disconnection with the remote Machi server.
|
%% connection/disconnection with the remote Machi server.
|
||||||
%%
|
%%
|
||||||
%% Please see {@link machi_flu1_client} the "Client API implemntation notes"
|
|
||||||
%% section for how this module relates to the rest of the client API
|
|
||||||
%% implementation.
|
|
||||||
%%
|
|
||||||
%% Machi is intentionally avoiding using distributed Erlang for
|
%% Machi is intentionally avoiding using distributed Erlang for
|
||||||
%% Machi's communication. This design decision makes Erlang-side code
|
%% Machi's communication. This design decision makes Erlang-side code
|
||||||
%% more difficult & complex, but it's the price to pay for some
|
%% more difficult & complex, but it's the price to pay for some
|
||||||
|
@ -61,9 +57,10 @@
|
||||||
%% FLU1 API
|
%% FLU1 API
|
||||||
-export([
|
-export([
|
||||||
%% File API
|
%% File API
|
||||||
append_chunk/6, append_chunk/8,
|
append_chunk/4, append_chunk/5,
|
||||||
read_chunk/7, read_chunk/8,
|
append_chunk_extra/5, append_chunk_extra/6,
|
||||||
checksum_list/2, checksum_list/3,
|
read_chunk/5, read_chunk/6,
|
||||||
|
checksum_list/3, checksum_list/4,
|
||||||
list_files/2, list_files/3,
|
list_files/2, list_files/3,
|
||||||
wedge_status/1, wedge_status/2,
|
wedge_status/1, wedge_status/2,
|
||||||
|
|
||||||
|
@ -81,8 +78,7 @@
|
||||||
quit/1,
|
quit/1,
|
||||||
|
|
||||||
%% Internal API
|
%% Internal API
|
||||||
write_chunk/7, write_chunk/8,
|
write_chunk/5, write_chunk/6,
|
||||||
trim_chunk/6, trim_chunk/7,
|
|
||||||
|
|
||||||
%% Helpers
|
%% Helpers
|
||||||
stop_proxies/1, start_proxies/1
|
stop_proxies/1, start_proxies/1
|
||||||
|
@ -107,39 +103,51 @@ start_link(#p_srvr{}=I) ->
|
||||||
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
%% with `Prefix'.
|
%% with `Prefix'.
|
||||||
|
|
||||||
append_chunk(PidSpec, NSInfo, EpochID, Prefix, Chunk, CSum) ->
|
append_chunk(PidSpec, EpochID, Prefix, Chunk) ->
|
||||||
append_chunk(PidSpec, NSInfo, EpochID, Prefix, Chunk, CSum,
|
append_chunk(PidSpec, EpochID, Prefix, Chunk, infinity).
|
||||||
#append_opts{}, infinity).
|
|
||||||
|
|
||||||
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
%% with `Prefix'.
|
%% with `Prefix'.
|
||||||
|
|
||||||
append_chunk(PidSpec, NSInfo, EpochID, Prefix, Chunk, CSum, Opts,
|
append_chunk(PidSpec, EpochID, Prefix, Chunk, Timeout) ->
|
||||||
Timeout) ->
|
gen_server:call(PidSpec, {req, {append_chunk, EpochID, Prefix, Chunk}},
|
||||||
gen_server:call(PidSpec, {req, {append_chunk, NSInfo, EpochID,
|
Timeout).
|
||||||
Prefix, Chunk, CSum, Opts, Timeout}},
|
|
||||||
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
|
%% with `Prefix'.
|
||||||
|
|
||||||
|
append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra)
|
||||||
|
when is_integer(ChunkExtra), ChunkExtra >= 0 ->
|
||||||
|
append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra, infinity).
|
||||||
|
|
||||||
|
%% @doc Append a chunk (binary- or iolist-style) of data to a file
|
||||||
|
%% with `Prefix'.
|
||||||
|
|
||||||
|
append_chunk_extra(PidSpec, EpochID, Prefix, Chunk, ChunkExtra, Timeout) ->
|
||||||
|
gen_server:call(PidSpec, {req, {append_chunk_extra, EpochID, Prefix,
|
||||||
|
Chunk, ChunkExtra}},
|
||||||
Timeout).
|
Timeout).
|
||||||
|
|
||||||
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
||||||
|
|
||||||
read_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size, Opts) ->
|
read_chunk(PidSpec, EpochID, File, Offset, Size) ->
|
||||||
read_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size, Opts, infinity).
|
read_chunk(PidSpec, EpochID, File, Offset, Size, infinity).
|
||||||
|
|
||||||
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
%% @doc Read a chunk of data of size `Size' from `File' at `Offset'.
|
||||||
|
|
||||||
read_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size, Opts, Timeout) ->
|
read_chunk(PidSpec, EpochID, File, Offset, Size, Timeout) ->
|
||||||
gen_server:call(PidSpec, {req, {read_chunk, NSInfo, EpochID, File, Offset, Size, Opts}},
|
gen_server:call(PidSpec, {req, {read_chunk, EpochID, File, Offset, Size}},
|
||||||
Timeout).
|
Timeout).
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
|
|
||||||
checksum_list(PidSpec, File) ->
|
checksum_list(PidSpec, EpochID, File) ->
|
||||||
checksum_list(PidSpec, File, infinity).
|
checksum_list(PidSpec, EpochID, File, infinity).
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
|
|
||||||
checksum_list(PidSpec, File, Timeout) ->
|
checksum_list(PidSpec, EpochID, File, Timeout) ->
|
||||||
gen_server:call(PidSpec, {req, {checksum_list, File}},
|
gen_server:call(PidSpec, {req, {checksum_list, EpochID, File}},
|
||||||
Timeout).
|
Timeout).
|
||||||
|
|
||||||
%% @doc Fetch the list of all files on the remote FLU.
|
%% @doc Fetch the list of all files on the remote FLU.
|
||||||
|
@ -280,19 +288,18 @@ quit(PidSpec) ->
|
||||||
%% @doc Write a chunk (binary- or iolist-style) of data to a file
|
%% @doc Write a chunk (binary- or iolist-style) of data to a file
|
||||||
%% with `Prefix' at `Offset'.
|
%% with `Prefix' at `Offset'.
|
||||||
|
|
||||||
write_chunk(PidSpec, NSInfo, EpochID, File, Offset, Chunk, CSum) ->
|
write_chunk(PidSpec, EpochID, File, Offset, Chunk) ->
|
||||||
write_chunk(PidSpec, NSInfo, EpochID, File, Offset, Chunk, CSum, infinity).
|
write_chunk(PidSpec, EpochID, File, Offset, Chunk, infinity).
|
||||||
|
|
||||||
%% @doc Write a chunk (binary- or iolist-style) of data to a file
|
%% @doc Write a chunk (binary- or iolist-style) of data to a file
|
||||||
%% with `Prefix' at `Offset'.
|
%% with `Prefix' at `Offset'.
|
||||||
|
|
||||||
write_chunk(PidSpec, NSInfo, EpochID, File, Offset, Chunk, CSum, Timeout) ->
|
write_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) ->
|
||||||
case gen_server:call(PidSpec, {req, {write_chunk, NSInfo, EpochID, File, Offset, Chunk, CSum}},
|
case gen_server:call(PidSpec, {req, {write_chunk, EpochID, File, Offset, Chunk}},
|
||||||
Timeout) of
|
Timeout) of
|
||||||
{error, written}=Err ->
|
{error, written}=Err ->
|
||||||
Size = byte_size(Chunk),
|
case read_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) of
|
||||||
case read_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size, undefined, Timeout) of
|
{ok, Chunk2} when Chunk2 == Chunk ->
|
||||||
{ok, {[{File, Offset, Chunk2, _}], []}} when Chunk2 == Chunk ->
|
|
||||||
%% See equivalent comment inside write_projection().
|
%% See equivalent comment inside write_projection().
|
||||||
ok;
|
ok;
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -302,18 +309,6 @@ write_chunk(PidSpec, NSInfo, EpochID, File, Offset, Chunk, CSum, Timeout) ->
|
||||||
Else
|
Else
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
trim_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size) ->
|
|
||||||
trim_chunk(PidSpec, NSInfo, EpochID, File, Offset, Size, infinity).
|
|
||||||
|
|
||||||
%% @doc Write a chunk (binary- or iolist-style) of data to a file
|
|
||||||
%% with `Prefix' at `Offset'.
|
|
||||||
|
|
||||||
trim_chunk(PidSpec, NSInfo, EpochID, File, Offset, Chunk, Timeout) ->
|
|
||||||
gen_server:call(PidSpec,
|
|
||||||
{req, {trim_chunk, NSInfo, EpochID, File, Offset, Chunk}},
|
|
||||||
Timeout).
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
init([I]) ->
|
init([I]) ->
|
||||||
|
@ -375,24 +370,21 @@ do_req_retry(_Req, 2, Err, S) ->
|
||||||
do_req_retry(Req, Depth, _Err, S) ->
|
do_req_retry(Req, Depth, _Err, S) ->
|
||||||
do_req(Req, Depth + 1, try_connect(disconnect(S))).
|
do_req(Req, Depth + 1, try_connect(disconnect(S))).
|
||||||
|
|
||||||
make_req_fun({append_chunk, NSInfo, EpochID,
|
make_req_fun({append_chunk, EpochID, Prefix, Chunk},
|
||||||
Prefix, Chunk, CSum, Opts, Timeout},
|
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:append_chunk(Sock, NSInfo, EpochID,
|
fun() -> Mod:append_chunk(Sock, EpochID, Prefix, Chunk) end;
|
||||||
Prefix, Chunk, CSum, Opts, Timeout)
|
make_req_fun({append_chunk_extra, EpochID, Prefix, Chunk, ChunkExtra},
|
||||||
end;
|
|
||||||
make_req_fun({read_chunk, NSInfo, EpochID, File, Offset, Size, Opts},
|
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:read_chunk(Sock, NSInfo, EpochID, File, Offset, Size, Opts) end;
|
fun() -> Mod:append_chunk_extra(Sock, EpochID, Prefix, Chunk, ChunkExtra) end;
|
||||||
make_req_fun({write_chunk, NSInfo, EpochID, File, Offset, Chunk, CSum},
|
make_req_fun({read_chunk, EpochID, File, Offset, Size},
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:write_chunk(Sock, NSInfo, EpochID, File, Offset, Chunk, CSum) end;
|
fun() -> Mod:read_chunk(Sock, EpochID, File, Offset, Size) end;
|
||||||
make_req_fun({trim_chunk, NSInfo, EpochID, File, Offset, Size},
|
make_req_fun({write_chunk, EpochID, File, Offset, Chunk},
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:trim_chunk(Sock, NSInfo, EpochID, File, Offset, Size) end;
|
fun() -> Mod:write_chunk(Sock, EpochID, File, Offset, Chunk) end;
|
||||||
make_req_fun({checksum_list, File},
|
make_req_fun({checksum_list, EpochID, File},
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:checksum_list(Sock, File) end;
|
fun() -> Mod:checksum_list(Sock, EpochID, File) end;
|
||||||
make_req_fun({list_files, EpochID},
|
make_req_fun({list_files, EpochID},
|
||||||
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
|
||||||
fun() -> Mod:list_files(Sock, EpochID) end;
|
fun() -> Mod:list_files(Sock, EpochID) end;
|
||||||
|
@ -404,7 +396,8 @@ make_req_fun({get_epoch_id},
|
||||||
fun() -> case Mod:read_latest_projection(Sock, private) of
|
fun() -> case Mod:read_latest_projection(Sock, private) of
|
||||||
{ok, P} ->
|
{ok, P} ->
|
||||||
#projection_v1{epoch_number=Epoch,
|
#projection_v1{epoch_number=Epoch,
|
||||||
epoch_csum=CSum} = P,
|
epoch_csum=CSum} =
|
||||||
|
machi_chain_manager1:inner_projection_or_self(P),
|
||||||
{ok, {Epoch, CSum}};
|
{ok, {Epoch, CSum}};
|
||||||
Error ->
|
Error ->
|
||||||
Error
|
Error
|
||||||
|
|
194
src/machi_sequencer.erl
Normal file
194
src/machi_sequencer.erl
Normal file
|
@ -0,0 +1,194 @@
|
||||||
|
%% -------------------------------------------------------------------
|
||||||
|
%%
|
||||||
|
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% This file is provided to you under the Apache License,
|
||||||
|
%% Version 2.0 (the "License"); you may not use this file
|
||||||
|
%% except in compliance with the License. You may obtain
|
||||||
|
%% a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing,
|
||||||
|
%% software distributed under the License is distributed on an
|
||||||
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
%% KIND, either express or implied. See the License for the
|
||||||
|
%% specific language governing permissions and limitations
|
||||||
|
%% under the License.
|
||||||
|
%%
|
||||||
|
%% -------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% @doc "Mothballed" sequencer code, perhaps to be reused sometime in
|
||||||
|
%% the future?
|
||||||
|
|
||||||
|
-module(machi_sequencer).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
|
||||||
|
-include_lib("kernel/include/file.hrl").
|
||||||
|
|
||||||
|
-define(CONFIG_DIR, "./config").
|
||||||
|
-define(DATA_DIR, "./data").
|
||||||
|
|
||||||
|
seq(Server, Prefix, Size) when is_binary(Prefix), is_integer(Size), Size > -1 ->
|
||||||
|
Server ! {seq, self(), Prefix, Size},
|
||||||
|
receive
|
||||||
|
{assignment, File, Offset} ->
|
||||||
|
{File, Offset}
|
||||||
|
after 1*1000 ->
|
||||||
|
bummer
|
||||||
|
end.
|
||||||
|
|
||||||
|
seq_direct(Prefix, Size) when is_binary(Prefix), is_integer(Size), Size > -1 ->
|
||||||
|
RegName = make_regname(Prefix),
|
||||||
|
seq(RegName, Prefix, Size).
|
||||||
|
|
||||||
|
start_server() ->
|
||||||
|
start_server(?MODULE).
|
||||||
|
|
||||||
|
start_server(Name) ->
|
||||||
|
spawn_link(fun() -> run_server(Name) end).
|
||||||
|
|
||||||
|
run_server(Name) ->
|
||||||
|
register(Name, self()),
|
||||||
|
ets:new(?MODULE, [named_table, public, {write_concurrency, true}]),
|
||||||
|
server_loop().
|
||||||
|
|
||||||
|
server_loop() ->
|
||||||
|
receive
|
||||||
|
{seq, From, Prefix, Size} ->
|
||||||
|
spawn(fun() -> server_dispatch(From, Prefix, Size) end),
|
||||||
|
server_loop()
|
||||||
|
end.
|
||||||
|
|
||||||
|
server_dispatch(From, Prefix, Size) ->
|
||||||
|
RegName = make_regname(Prefix),
|
||||||
|
case whereis(RegName) of
|
||||||
|
undefined ->
|
||||||
|
start_prefix_server(Prefix),
|
||||||
|
timer:sleep(1),
|
||||||
|
server_dispatch(From, Prefix, Size);
|
||||||
|
Pid ->
|
||||||
|
Pid ! {seq, From, Prefix, Size}
|
||||||
|
end,
|
||||||
|
exit(normal).
|
||||||
|
|
||||||
|
start_prefix_server(Prefix) ->
|
||||||
|
spawn(fun() -> run_prefix_server(Prefix) end).
|
||||||
|
|
||||||
|
run_prefix_server(Prefix) ->
|
||||||
|
true = register(make_regname(Prefix), self()),
|
||||||
|
ok = filelib:ensure_dir(?CONFIG_DIR ++ "/unused"),
|
||||||
|
ok = filelib:ensure_dir(?DATA_DIR ++ "/unused"),
|
||||||
|
FileNum = read_max_filenum(Prefix) + 1,
|
||||||
|
ok = increment_max_filenum(Prefix),
|
||||||
|
prefix_server_loop(Prefix, FileNum).
|
||||||
|
|
||||||
|
prefix_server_loop(Prefix, FileNum) ->
|
||||||
|
File = make_data_filename(Prefix, FileNum),
|
||||||
|
prefix_server_loop(Prefix, File, FileNum, 0).
|
||||||
|
|
||||||
|
prefix_server_loop(Prefix, File, FileNum, Offset) ->
|
||||||
|
receive
|
||||||
|
{seq, From, Prefix, Size} ->
|
||||||
|
From ! {assignment, File, Offset},
|
||||||
|
prefix_server_loop(Prefix, File, FileNum, Offset + Size)
|
||||||
|
after 30*1000 ->
|
||||||
|
io:format("timeout: ~p server stopping\n", [Prefix]),
|
||||||
|
exit(normal)
|
||||||
|
end.
|
||||||
|
|
||||||
|
make_regname(Prefix) ->
|
||||||
|
erlang:binary_to_atom(Prefix, latin1).
|
||||||
|
|
||||||
|
make_config_filename(Prefix) ->
|
||||||
|
lists:flatten(io_lib:format("~s/~s", [?CONFIG_DIR, Prefix])).
|
||||||
|
|
||||||
|
make_data_filename(Prefix, FileNum) ->
|
||||||
|
erlang:iolist_to_binary(io_lib:format("~s/~s.~w",
|
||||||
|
[?DATA_DIR, Prefix, FileNum])).
|
||||||
|
|
||||||
|
read_max_filenum(Prefix) ->
|
||||||
|
case file:read_file_info(make_config_filename(Prefix)) of
|
||||||
|
{error, enoent} ->
|
||||||
|
0;
|
||||||
|
{ok, FI} ->
|
||||||
|
FI#file_info.size
|
||||||
|
end.
|
||||||
|
|
||||||
|
increment_max_filenum(Prefix) ->
|
||||||
|
{ok, FH} = file:open(make_config_filename(Prefix), [append]),
|
||||||
|
ok = file:write(FH, "x"),
|
||||||
|
%% ok = file:sync(FH),
|
||||||
|
ok = file:close(FH).
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%% basho_bench callbacks
|
||||||
|
|
||||||
|
-define(SEQ, ?MODULE).
|
||||||
|
|
||||||
|
new(1) ->
|
||||||
|
start_server(),
|
||||||
|
timer:sleep(100),
|
||||||
|
{ok, unused};
|
||||||
|
new(_Id) ->
|
||||||
|
{ok, unused}.
|
||||||
|
|
||||||
|
run(null, _KeyGen, _ValgueGen, State) ->
|
||||||
|
{ok, State};
|
||||||
|
run(keygen_then_null, KeyGen, _ValgueGen, State) ->
|
||||||
|
_Prefix = KeyGen(),
|
||||||
|
{ok, State};
|
||||||
|
run(seq, KeyGen, _ValgueGen, State) ->
|
||||||
|
Prefix = KeyGen(),
|
||||||
|
{_, _} = ?SEQ:seq(?SEQ, Prefix, 1),
|
||||||
|
{ok, State};
|
||||||
|
run(seq_direct, KeyGen, _ValgueGen, State) ->
|
||||||
|
Prefix = KeyGen(),
|
||||||
|
Name = ?SEQ:make_regname(Prefix),
|
||||||
|
case get(Name) of
|
||||||
|
undefined ->
|
||||||
|
case whereis(Name) of
|
||||||
|
undefined ->
|
||||||
|
{_, _} = ?SEQ:seq(?SEQ, Prefix, 1);
|
||||||
|
Pid ->
|
||||||
|
put(Name, Pid),
|
||||||
|
{_, _} = ?SEQ:seq(Pid, Prefix, 1)
|
||||||
|
end;
|
||||||
|
Pid ->
|
||||||
|
{_, _} = ?SEQ:seq(Pid, Prefix, 1)
|
||||||
|
end,
|
||||||
|
{ok, State};
|
||||||
|
run(seq_ets, KeyGen, _ValgueGen, State) ->
|
||||||
|
Tab = ?MODULE,
|
||||||
|
Prefix = KeyGen(),
|
||||||
|
Res = try
|
||||||
|
BigNum = ets:update_counter(Tab, Prefix, 1),
|
||||||
|
BigBin = <<BigNum:80/big>>,
|
||||||
|
<<FileNum:32/big, Offset:48/big>> = BigBin,
|
||||||
|
%% if Offset rem 1000 == 0 ->
|
||||||
|
%% io:format("~p,~p ", [FileNum, Offset]);
|
||||||
|
%% true ->
|
||||||
|
%% ok
|
||||||
|
%% end,
|
||||||
|
{fakefake, FileNum, Offset}
|
||||||
|
catch error:badarg ->
|
||||||
|
FileNum2 = 1, Offset2 = 0,
|
||||||
|
FileBin = <<FileNum2:32/big>>,
|
||||||
|
OffsetBin = <<Offset2:48/big>>,
|
||||||
|
Glop = <<FileBin/binary, OffsetBin/binary>>,
|
||||||
|
<<Base:80/big>> = Glop,
|
||||||
|
%% if Prefix == <<"42">> -> io:format("base:~w\n", [Base]); true -> ok end,
|
||||||
|
%% Base = 0,
|
||||||
|
case ets:insert_new(Tab, {Prefix, Base}) of
|
||||||
|
true ->
|
||||||
|
{<<"fakefakefake">>, Base};
|
||||||
|
false ->
|
||||||
|
Result2 = ets:update_counter(Tab, Prefix, 1),
|
||||||
|
{<<"fakefakefake">>, Result2}
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
Res = Res,
|
||||||
|
{ok, State}.
|
||||||
|
|
|
@ -47,6 +47,8 @@ start_link() ->
|
||||||
supervisor:start_link({local, ?SERVER}, ?MODULE, []).
|
supervisor:start_link({local, ?SERVER}, ?MODULE, []).
|
||||||
|
|
||||||
init([]) ->
|
init([]) ->
|
||||||
|
%% {_, Ps} = process_info(self(), links),
|
||||||
|
%% [unlink(P) || P <- Ps],
|
||||||
RestartStrategy = one_for_one,
|
RestartStrategy = one_for_one,
|
||||||
MaxRestarts = 1000,
|
MaxRestarts = 1000,
|
||||||
MaxSecondsBetweenRestarts = 3600,
|
MaxSecondsBetweenRestarts = 3600,
|
||||||
|
@ -60,16 +62,9 @@ init([]) ->
|
||||||
ServerSup =
|
ServerSup =
|
||||||
{machi_flu_sup, {machi_flu_sup, start_link, []},
|
{machi_flu_sup, {machi_flu_sup, start_link, []},
|
||||||
Restart, Shutdown, Type, []},
|
Restart, Shutdown, Type, []},
|
||||||
RanchSup = {ranch_sup, {ranch_sup, start_link, []},
|
|
||||||
Restart, Shutdown, supervisor, [ranch_sup]},
|
{ok, {SupFlags, [ServerSup]}}.
|
||||||
LifecycleMgr =
|
|
||||||
{machi_lifecycle_mgr, {machi_lifecycle_mgr, start_link, []},
|
%% AChild = {'AName', {'AModule', start_link, []},
|
||||||
Restart, Shutdown, worker, []},
|
%% Restart, Shutdown, Type, ['AModule']},
|
||||||
RunningApps = [A || {A,_D,_V} <- application:which_applications()],
|
%% {ok, {SupFlags, [AChild]}}.
|
||||||
Specs = case lists:member(ranch, RunningApps) of
|
|
||||||
true ->
|
|
||||||
[ServerSup, LifecycleMgr];
|
|
||||||
false ->
|
|
||||||
[ServerSup, RanchSup, LifecycleMgr]
|
|
||||||
end,
|
|
||||||
{ok, {SupFlags, Specs}}.
|
|
||||||
|
|
|
@ -25,35 +25,26 @@
|
||||||
-export([
|
-export([
|
||||||
checksum_chunk/1,
|
checksum_chunk/1,
|
||||||
make_tagged_csum/1, make_tagged_csum/2,
|
make_tagged_csum/1, make_tagged_csum/2,
|
||||||
make_client_csum/1,
|
|
||||||
unmake_tagged_csum/1,
|
unmake_tagged_csum/1,
|
||||||
hexstr_to_bin/1, bin_to_hexstr/1,
|
hexstr_to_bin/1, bin_to_hexstr/1,
|
||||||
hexstr_to_int/1, int_to_hexstr/2, int_to_hexbin/2,
|
hexstr_to_int/1, int_to_hexstr/2, int_to_hexbin/2,
|
||||||
make_binary/1, make_string/1,
|
make_binary/1, make_string/1,
|
||||||
make_regname/1,
|
make_regname/1,
|
||||||
make_config_filename/4, make_config_filename/2,
|
make_config_filename/2,
|
||||||
make_checksum_filename/4, make_checksum_filename/2,
|
make_checksum_filename/4, make_checksum_filename/2,
|
||||||
make_data_filename/6, make_data_filename/2,
|
make_data_filename/4, make_data_filename/2,
|
||||||
make_projection_filename/2,
|
make_projection_filename/2,
|
||||||
is_valid_filename/1,
|
read_max_filenum/2, increment_max_filenum/2,
|
||||||
parse_filename/1,
|
|
||||||
read_max_filenum/4, increment_max_filenum/4,
|
|
||||||
info_msg/2, verb/1, verb/2,
|
info_msg/2, verb/1, verb/2,
|
||||||
mbytes/1,
|
mbytes/1,
|
||||||
pretty_time/0, pretty_time/2,
|
|
||||||
%% TCP protocol helpers
|
%% TCP protocol helpers
|
||||||
connect/2, connect/3,
|
connect/2, connect/3,
|
||||||
%% List twiddling
|
%% List twiddling
|
||||||
permutations/1, perms/1,
|
permutations/1, perms/1,
|
||||||
combinations/1, ordered_combinations/1,
|
combinations/1, ordered_combinations/1,
|
||||||
mk_order/2,
|
mk_order/2
|
||||||
%% Other
|
|
||||||
wait_for_death/2, wait_for_life/2,
|
|
||||||
bool2int/1,
|
|
||||||
int2bool/1,
|
|
||||||
read_opts_default/1,
|
|
||||||
ns_info_default/1
|
|
||||||
]).
|
]).
|
||||||
|
-compile(export_all).
|
||||||
|
|
||||||
-include("machi.hrl").
|
-include("machi.hrl").
|
||||||
-include("machi_projection.hrl").
|
-include("machi_projection.hrl").
|
||||||
|
@ -71,27 +62,17 @@ make_regname(Prefix) when is_list(Prefix) ->
|
||||||
|
|
||||||
%% @doc Calculate a config file path, by common convention.
|
%% @doc Calculate a config file path, by common convention.
|
||||||
|
|
||||||
-spec make_config_filename(string(), machi_dt:namespace(), machi_dt:locator(), string()) ->
|
|
||||||
string().
|
|
||||||
make_config_filename(DataDir, NS, NSLocator, Prefix) ->
|
|
||||||
NSLocator_str = int_to_hexstr(NSLocator, 32),
|
|
||||||
lists:flatten(io_lib:format("~s/config/~s^~s^~s",
|
|
||||||
[DataDir, Prefix, NS, NSLocator_str])).
|
|
||||||
|
|
||||||
%% @doc Calculate a config file path, by common convention.
|
|
||||||
|
|
||||||
-spec make_config_filename(string(), string()) ->
|
-spec make_config_filename(string(), string()) ->
|
||||||
string().
|
string().
|
||||||
make_config_filename(DataDir, Filename) ->
|
make_config_filename(DataDir, Prefix) ->
|
||||||
lists:flatten(io_lib:format("~s/config/~s",
|
lists:flatten(io_lib:format("~s/config/~s", [DataDir, Prefix])).
|
||||||
[DataDir, Filename])).
|
|
||||||
|
|
||||||
%% @doc Calculate a checksum file path, by common convention.
|
%% @doc Calculate a checksum file path, by common convention.
|
||||||
|
|
||||||
-spec make_checksum_filename(string(), string(), atom()|string()|binary(), integer()) ->
|
-spec make_checksum_filename(string(), string(), atom()|string()|binary(), integer()) ->
|
||||||
string().
|
string().
|
||||||
make_checksum_filename(DataDir, Prefix, SequencerName, FileNum) ->
|
make_checksum_filename(DataDir, Prefix, SequencerName, FileNum) ->
|
||||||
lists:flatten(io_lib:format("~s/config/~s^~s^~w.csum",
|
lists:flatten(io_lib:format("~s/config/~s.~s.~w.csum",
|
||||||
[DataDir, Prefix, SequencerName, FileNum])).
|
[DataDir, Prefix, SequencerName, FileNum])).
|
||||||
|
|
||||||
%% @doc Calculate a checksum file path, by common convention.
|
%% @doc Calculate a checksum file path, by common convention.
|
||||||
|
@ -105,22 +86,11 @@ make_checksum_filename(DataDir, FileName) ->
|
||||||
|
|
||||||
%% @doc Calculate a file data file path, by common convention.
|
%% @doc Calculate a file data file path, by common convention.
|
||||||
|
|
||||||
-spec make_data_filename(string(), machi_dt:namespace(), machi_dt:locator(), string(), atom()|string()|binary(), integer()|string()) ->
|
-spec make_data_filename(string(), string(), atom()|string()|binary(), integer()) ->
|
||||||
{binary(), string()}.
|
{binary(), string()}.
|
||||||
make_data_filename(DataDir, NS, NSLocator, Prefix, SequencerName, FileNum)
|
make_data_filename(DataDir, Prefix, SequencerName, FileNum) ->
|
||||||
when is_integer(FileNum) ->
|
File = erlang:iolist_to_binary(io_lib:format("~s.~s.~w",
|
||||||
NSLocator_str = int_to_hexstr(NSLocator, 32),
|
[Prefix, SequencerName, FileNum])),
|
||||||
File = erlang:iolist_to_binary(io_lib:format("~s^~s^~s^~s^~w",
|
|
||||||
[Prefix, NS, NSLocator_str, SequencerName, FileNum])),
|
|
||||||
make_data_filename2(DataDir, File);
|
|
||||||
make_data_filename(DataDir, NS, NSLocator, Prefix, SequencerName, String)
|
|
||||||
when is_list(String) ->
|
|
||||||
NSLocator_str = int_to_hexstr(NSLocator, 32),
|
|
||||||
File = erlang:iolist_to_binary(io_lib:format("~s^~s^~s^~s^~s",
|
|
||||||
[Prefix, NS, NSLocator_str, SequencerName, string])),
|
|
||||||
make_data_filename2(DataDir, File).
|
|
||||||
|
|
||||||
make_data_filename2(DataDir, File) ->
|
|
||||||
FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
|
FullPath = lists:flatten(io_lib:format("~s/data/~s", [DataDir, File])),
|
||||||
{File, FullPath}.
|
{File, FullPath}.
|
||||||
|
|
||||||
|
@ -144,49 +114,13 @@ make_projection_filename(DataDir, "") ->
|
||||||
make_projection_filename(DataDir, File) ->
|
make_projection_filename(DataDir, File) ->
|
||||||
lists:flatten(io_lib:format("~s/projection/~s", [DataDir, File])).
|
lists:flatten(io_lib:format("~s/projection/~s", [DataDir, File])).
|
||||||
|
|
||||||
%% @doc Given a filename, return true if it is a valid machi filename,
|
|
||||||
%% false otherwise.
|
|
||||||
-spec is_valid_filename( Filename :: string() ) -> true | false.
|
|
||||||
is_valid_filename(Filename) ->
|
|
||||||
case parse_filename(Filename) of
|
|
||||||
{} -> false;
|
|
||||||
{_,_,_,_,_} -> true
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% @doc Given a machi filename, return a set of components in a list.
|
|
||||||
%% The components will be:
|
|
||||||
%% <ul>
|
|
||||||
%% <li>Prefix</li>
|
|
||||||
%% <li>Cluster namespace</li>
|
|
||||||
%% <li>Cluster locator</li>
|
|
||||||
%% <li>UUID</li>
|
|
||||||
%% <li>Sequence number</li>
|
|
||||||
%% </ul>
|
|
||||||
%%
|
|
||||||
%% Invalid filenames will return an empty list.
|
|
||||||
-spec parse_filename( Filename :: string() ) -> {} | {string(), machi_dt:namespace(), machi_dt:locator(), string(), string() }.
|
|
||||||
parse_filename(Filename) ->
|
|
||||||
case string:tokens(Filename, "^") of
|
|
||||||
[Prefix, NS, NSLocator, UUID, SeqNo] ->
|
|
||||||
{Prefix, NS, list_to_integer(NSLocator), UUID, SeqNo};
|
|
||||||
[Prefix, NSLocator, UUID, SeqNo] ->
|
|
||||||
%% string:tokens() doesn't consider "foo^^bar" as 3 tokens {sigh}
|
|
||||||
case re:replace(Filename, "[^^]+", "x", [global,{return,binary}]) of
|
|
||||||
<<"x^^x^x^x">> ->
|
|
||||||
{Prefix, <<"">>, list_to_integer(NSLocator), UUID, SeqNo};
|
|
||||||
_ ->
|
|
||||||
{}
|
|
||||||
end;
|
|
||||||
_ -> {}
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% @doc Read the file size of a config file, which is used as the
|
%% @doc Read the file size of a config file, which is used as the
|
||||||
%% basis for a minimum sequence number.
|
%% basis for a minimum sequence number.
|
||||||
|
|
||||||
-spec read_max_filenum(string(), machi_dt:namespace(), machi_dt:locator(), string()) ->
|
-spec read_max_filenum(string(), string()) ->
|
||||||
non_neg_integer().
|
non_neg_integer().
|
||||||
read_max_filenum(DataDir, NS, NSLocator, Prefix) ->
|
read_max_filenum(DataDir, Prefix) ->
|
||||||
case file:read_file_info(make_config_filename(DataDir, NS, NSLocator, Prefix)) of
|
case file:read_file_info(make_config_filename(DataDir, Prefix)) of
|
||||||
{error, enoent} ->
|
{error, enoent} ->
|
||||||
0;
|
0;
|
||||||
{ok, FI} ->
|
{ok, FI} ->
|
||||||
|
@ -196,11 +130,11 @@ read_max_filenum(DataDir, NS, NSLocator, Prefix) ->
|
||||||
%% @doc Increase the file size of a config file, which is used as the
|
%% @doc Increase the file size of a config file, which is used as the
|
||||||
%% basis for a minimum sequence number.
|
%% basis for a minimum sequence number.
|
||||||
|
|
||||||
-spec increment_max_filenum(string(), machi_dt:namespace(), machi_dt:locator(), string()) ->
|
-spec increment_max_filenum(string(), string()) ->
|
||||||
ok | {error, term()}.
|
ok | {error, term()}.
|
||||||
increment_max_filenum(DataDir, NS, NSLocator, Prefix) ->
|
increment_max_filenum(DataDir, Prefix) ->
|
||||||
try
|
try
|
||||||
{ok, FH} = file:open(make_config_filename(DataDir, NS, NSLocator, Prefix), [append]),
|
{ok, FH} = file:open(make_config_filename(DataDir, Prefix), [append]),
|
||||||
ok = file:write(FH, "x"),
|
ok = file:write(FH, "x"),
|
||||||
ok = file:sync(FH),
|
ok = file:sync(FH),
|
||||||
ok = file:close(FH)
|
ok = file:close(FH)
|
||||||
|
@ -289,48 +223,22 @@ int_to_hexbin(I, I_size) ->
|
||||||
checksum_chunk(Chunk) when is_binary(Chunk); is_list(Chunk) ->
|
checksum_chunk(Chunk) when is_binary(Chunk); is_list(Chunk) ->
|
||||||
crypto:hash(sha, Chunk).
|
crypto:hash(sha, Chunk).
|
||||||
|
|
||||||
convert_csum_tag(A) when is_atom(A)->
|
|
||||||
A;
|
|
||||||
convert_csum_tag(?CSUM_TAG_NONE) ->
|
|
||||||
?CSUM_TAG_NONE_ATOM;
|
|
||||||
convert_csum_tag(?CSUM_TAG_CLIENT_SHA) ->
|
|
||||||
?CSUM_TAG_CLIENT_SHA_ATOM;
|
|
||||||
convert_csum_tag(?CSUM_TAG_SERVER_SHA) ->
|
|
||||||
?CSUM_TAG_SERVER_SHA_ATOM;
|
|
||||||
convert_csum_tag(?CSUM_TAG_SERVER_REGEN_SHA) ->
|
|
||||||
?CSUM_TAG_SERVER_REGEN_SHA_ATOM.
|
|
||||||
|
|
||||||
%% @doc Create a tagged checksum
|
%% @doc Create a tagged checksum
|
||||||
|
|
||||||
make_tagged_csum(none) ->
|
make_tagged_csum(none) ->
|
||||||
<<?CSUM_TAG_NONE:8>>;
|
<<?CSUM_TAG_NONE:8>>;
|
||||||
make_tagged_csum(<<>>) ->
|
|
||||||
<<?CSUM_TAG_NONE:8>>;
|
|
||||||
make_tagged_csum({Tag, CSum}) ->
|
make_tagged_csum({Tag, CSum}) ->
|
||||||
make_tagged_csum(convert_csum_tag(Tag), CSum).
|
make_tagged_csum(Tag, CSum).
|
||||||
|
|
||||||
%% @doc Makes tagged csum. Each meanings are:
|
make_tagged_csum(none, _SHA) ->
|
||||||
%% none / ?CSUM_TAG_NONE
|
|
||||||
%% - a suspicious and nonsense checksum
|
|
||||||
%% client_sha / ?CSUM_TAG_CLIENT_SHA
|
|
||||||
%% - a valid checksum given by client and stored in server
|
|
||||||
%% server_sha / ?CSUM_TAG_SERVER_SHA
|
|
||||||
%% - a valid checksum generated by and stored in server
|
|
||||||
%% server_regen_sha / ?CSUM_TAG_SERVER_REGEN_SHA
|
|
||||||
%% - a valid checksum generated by server in an ad hoc manner, not stored in server
|
|
||||||
-spec make_tagged_csum(machi_dt:csum_tag(), binary()) -> machi_dt:chunk_csum().
|
|
||||||
make_tagged_csum(?CSUM_TAG_NONE_ATOM, _SHA) ->
|
|
||||||
<<?CSUM_TAG_NONE:8>>;
|
<<?CSUM_TAG_NONE:8>>;
|
||||||
make_tagged_csum(?CSUM_TAG_CLIENT_SHA_ATOM, SHA) ->
|
make_tagged_csum(client_sha, SHA) ->
|
||||||
<<?CSUM_TAG_CLIENT_SHA:8, SHA/binary>>;
|
<<?CSUM_TAG_CLIENT_SHA:8, SHA/binary>>;
|
||||||
make_tagged_csum(?CSUM_TAG_SERVER_SHA_ATOM, SHA) ->
|
make_tagged_csum(server_sha, SHA) ->
|
||||||
<<?CSUM_TAG_SERVER_SHA:8, SHA/binary>>;
|
<<?CSUM_TAG_SERVER_SHA:8, SHA/binary>>;
|
||||||
make_tagged_csum(?CSUM_TAG_SERVER_REGEN_SHA_ATOM, SHA) ->
|
make_tagged_csum(server_regen_sha, SHA) ->
|
||||||
<<?CSUM_TAG_SERVER_REGEN_SHA:8, SHA/binary>>.
|
<<?CSUM_TAG_SERVER_REGEN_SHA:8, SHA/binary>>.
|
||||||
|
|
||||||
make_client_csum(BinOrList) ->
|
|
||||||
make_tagged_csum(?CSUM_TAG_CLIENT_SHA_ATOM, checksum_chunk(BinOrList)).
|
|
||||||
|
|
||||||
unmake_tagged_csum(<<Tag:8, Rest/binary>>) ->
|
unmake_tagged_csum(<<Tag:8, Rest/binary>>) ->
|
||||||
{Tag, Rest}.
|
{Tag, Rest}.
|
||||||
|
|
||||||
|
@ -354,15 +262,6 @@ mbytes(0) ->
|
||||||
mbytes(Size) ->
|
mbytes(Size) ->
|
||||||
lists:flatten(io_lib:format("~.1.0f", [max(0.1, Size / (1024*1024))])).
|
lists:flatten(io_lib:format("~.1.0f", [max(0.1, Size / (1024*1024))])).
|
||||||
|
|
||||||
pretty_time() ->
|
|
||||||
{_,_,C} = os:timestamp(),
|
|
||||||
MSec = trunc(C / 1000),
|
|
||||||
pretty_time(time(), MSec).
|
|
||||||
|
|
||||||
pretty_time({HH,MM,SS}, MSec) ->
|
|
||||||
lists:flatten(
|
|
||||||
io_lib:format("~2..0w:~2..0w:~2..0w.~3..0w", [HH, MM, SS, MSec])).
|
|
||||||
|
|
||||||
%% @doc Log an 'info' level message.
|
%% @doc Log an 'info' level message.
|
||||||
|
|
||||||
-spec info_msg(string(), list()) -> term().
|
-spec info_msg(string(), list()) -> term().
|
||||||
|
@ -378,19 +277,8 @@ wait_for_death(Pid, Iters) when is_pid(Pid) ->
|
||||||
false ->
|
false ->
|
||||||
ok;
|
ok;
|
||||||
true ->
|
true ->
|
||||||
timer:sleep(10),
|
|
||||||
wait_for_death(Pid, Iters-1)
|
|
||||||
end.
|
|
||||||
|
|
||||||
wait_for_life(Reg, 0) ->
|
|
||||||
exit({not_alive_yet, Reg});
|
|
||||||
wait_for_life(Reg, Iters) when is_atom(Reg) ->
|
|
||||||
case erlang:whereis(Reg) of
|
|
||||||
Pid when is_pid(Pid) ->
|
|
||||||
{ok, Pid};
|
|
||||||
_ ->
|
|
||||||
timer:sleep(1),
|
timer:sleep(1),
|
||||||
wait_for_life(Reg, Iters-1)
|
wait_for_death(Pid, Iters-1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%
|
||||||
|
@ -443,23 +331,3 @@ mk_order(UPI2, Repair1) ->
|
||||||
error -> error
|
error -> error
|
||||||
end || X <- UPI2],
|
end || X <- UPI2],
|
||||||
UPI2_order.
|
UPI2_order.
|
||||||
|
|
||||||
%% C-style conversion for PB usage.
|
|
||||||
bool2int(true) -> 1;
|
|
||||||
bool2int(false) -> 0.
|
|
||||||
int2bool(0) -> false;
|
|
||||||
int2bool(I) when is_integer(I) -> true.
|
|
||||||
|
|
||||||
read_opts_default(#read_opts{}=NSInfo) ->
|
|
||||||
NSInfo;
|
|
||||||
read_opts_default(A) when A == 'undefined'; A == 'noopt'; A == 'none' ->
|
|
||||||
#read_opts{};
|
|
||||||
read_opts_default(A) when is_atom(A) ->
|
|
||||||
#read_opts{}.
|
|
||||||
|
|
||||||
ns_info_default(#ns_info{}=NSInfo) ->
|
|
||||||
NSInfo;
|
|
||||||
ns_info_default(A) when is_atom(A) ->
|
|
||||||
#ns_info{}.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,6 @@
|
||||||
|
|
||||||
-module(machi_yessir_client).
|
-module(machi_yessir_client).
|
||||||
|
|
||||||
-ifdef(TODO_refactoring_deferred).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
-include("machi.hrl").
|
||||||
-include("machi_projection.hrl").
|
-include("machi_projection.hrl").
|
||||||
|
|
||||||
|
@ -32,7 +30,7 @@
|
||||||
append_chunk/4, append_chunk/5,
|
append_chunk/4, append_chunk/5,
|
||||||
append_chunk_extra/5, append_chunk_extra/6,
|
append_chunk_extra/5, append_chunk_extra/6,
|
||||||
read_chunk/5, read_chunk/6,
|
read_chunk/5, read_chunk/6,
|
||||||
checksum_list/2, checksum_list/3,
|
checksum_list/3, checksum_list/4,
|
||||||
list_files/2, list_files/3,
|
list_files/2, list_files/3,
|
||||||
wedge_status/1, wedge_status/2,
|
wedge_status/1, wedge_status/2,
|
||||||
|
|
||||||
|
@ -175,24 +173,24 @@ read_chunk(_Host, _TcpPort, EpochID, File, Offset, Size)
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
|
|
||||||
checksum_list(#yessir{name=Name,chunk_size=ChunkSize}, File) ->
|
checksum_list(#yessir{name=Name,chunk_size=ChunkSize}, _EpochID, File) ->
|
||||||
case get({Name,offset,File}) of
|
case get({Name,offset,File}) of
|
||||||
undefined ->
|
undefined ->
|
||||||
{error, no_such_file};
|
{error, no_such_file};
|
||||||
MaxOffset ->
|
MaxOffset ->
|
||||||
C = machi_util:make_tagged_csum(client_sha,
|
C = machi_util:make_tagged_csum(client_sha,
|
||||||
make_csum(Name, ChunkSize)),
|
make_csum(Name, ChunkSize)),
|
||||||
Cs = [{Offset, ChunkSize, C} ||
|
Cs = [machi_flu1:encode_csum_file_entry_bin(Offset, ChunkSize, C) ||
|
||||||
Offset <- lists:seq(?MINIMUM_OFFSET, MaxOffset, ChunkSize)],
|
Offset <- lists:seq(?MINIMUM_OFFSET, MaxOffset, ChunkSize)],
|
||||||
{ok, term_to_binary(Cs)}
|
{ok, Cs}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Fetch the list of chunk checksums for `File'.
|
%% @doc Fetch the list of chunk checksums for `File'.
|
||||||
|
|
||||||
checksum_list(_Host, _TcpPort, File) ->
|
checksum_list(_Host, _TcpPort, EpochID, File) ->
|
||||||
Sock = connect(#p_srvr{proto_mod=?MODULE}),
|
Sock = connect(#p_srvr{proto_mod=?MODULE}),
|
||||||
try
|
try
|
||||||
checksum_list(Sock, File)
|
checksum_list(Sock, EpochID, File)
|
||||||
after
|
after
|
||||||
disconnect(Sock)
|
disconnect(Sock)
|
||||||
end.
|
end.
|
||||||
|
@ -452,7 +450,7 @@ connect(#p_srvr{name=Name, props=Props})->
|
||||||
chunk_size=ChunkSize
|
chunk_size=ChunkSize
|
||||||
},
|
},
|
||||||
%% Add fake dict entries for these files
|
%% Add fake dict entries for these files
|
||||||
_ = [begin
|
[begin
|
||||||
Prefix = list_to_binary(io_lib:format("fake~w", [X])),
|
Prefix = list_to_binary(io_lib:format("fake~w", [X])),
|
||||||
{ok, _} = append_chunk_extra(Sock, {1,<<"unused">>}, Prefix, <<>>, FileSize)
|
{ok, _} = append_chunk_extra(Sock, {1,<<"unused">>}, Prefix, <<>>, FileSize)
|
||||||
end || X <- lists:seq(1, NumFiles)],
|
end || X <- lists:seq(1, NumFiles)],
|
||||||
|
@ -460,10 +458,10 @@ connect(#p_srvr{name=Name, props=Props})->
|
||||||
Sock.
|
Sock.
|
||||||
|
|
||||||
disconnect(#yessir{name=Name}) ->
|
disconnect(#yessir{name=Name}) ->
|
||||||
_ = [erase(K) || {{N,offset,_}=K, _V} <- get(), N == Name],
|
[erase(K) || {{N,offset,_}=K, _V} <- get(), N == Name],
|
||||||
_ = [erase(K) || {{N,chunk,_}=K, _V} <- get(), N == Name],
|
[erase(K) || {{N,chunk,_}=K, _V} <- get(), N == Name],
|
||||||
_ = [erase(K) || {{N,csum,_}=K, _V} <- get(), N == Name],
|
[erase(K) || {{N,csum,_}=K, _V} <- get(), N == Name],
|
||||||
_ = [erase(K) || {{N,proj,_,_}=K, _V} <- get(), N == Name],
|
[erase(K) || {{N,proj,_,_}=K, _V} <- get(), N == Name],
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
%% Example use:
|
%% Example use:
|
||||||
|
@ -511,5 +509,3 @@ disconnect(#yessir{name=Name}) ->
|
||||||
%% =INFO REPORT==== 17-May-2015::18:57:52 ===
|
%% =INFO REPORT==== 17-May-2015::18:57:52 ===
|
||||||
%% Repair success: tail a of [a] finished ap_mode repair ID {a,{1431,856671,140404}}: ok
|
%% Repair success: tail a of [a] finished ap_mode repair ID {a,{1431,856671,140404}}: ok
|
||||||
%% Stats [{t_in_files,0},{t_in_chunks,10413},{t_in_bytes,682426368},{t_out_files,0},{t_out_chunks,10413},{t_out_bytes,682426368},{t_bad_chunks,0},{t_elapsed_seconds,1.591}]
|
%% Stats [{t_in_files,0},{t_in_chunks,10413},{t_in_bytes,682426368},{t_out_files,0},{t_out_chunks,10413},{t_out_bytes,682426368},{t_bad_chunks,0},{t_elapsed_seconds,1.591}]
|
||||||
|
|
||||||
-endif. % TODO_refactoring_deferred
|
|
||||||
|
|
|
@ -353,5 +353,7 @@ find_common_prefix([H|L1], [H|L2]) ->
|
||||||
find_common_prefix(_, _) ->
|
find_common_prefix(_, _) ->
|
||||||
[].
|
[].
|
||||||
|
|
||||||
inner_projection_exists(_) ->
|
inner_projection_exists(#projection_v1{inner=undefined}) ->
|
||||||
false.
|
false;
|
||||||
|
inner_projection_exists(#projection_v1{inner=_}) ->
|
||||||
|
true.
|
||||||
|
|
|
@ -33,33 +33,25 @@
|
||||||
-define(FLU_C, machi_flu1_client).
|
-define(FLU_C, machi_flu1_client).
|
||||||
|
|
||||||
verify_file_checksums_test_() ->
|
verify_file_checksums_test_() ->
|
||||||
{setup,
|
{timeout, 60, fun() -> verify_file_checksums_test2() end}.
|
||||||
fun() -> os:cmd("rm -rf ./data") end,
|
|
||||||
fun(_) -> os:cmd("rm -rf ./data") end,
|
|
||||||
{timeout, 60, fun() -> verify_file_checksums_test2() end}
|
|
||||||
}.
|
|
||||||
|
|
||||||
verify_file_checksums_test2() ->
|
verify_file_checksums_test2() ->
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
TcpPort = 32958,
|
TcpPort = 32958,
|
||||||
DataDir = "./data",
|
DataDir = "./data",
|
||||||
W_props = [{initial_wedged, false}],
|
W_props = [{initial_wedged, false}],
|
||||||
NSInfo = undefined,
|
FLU1 = machi_flu1_test:setup_test_flu(verify1_flu, TcpPort, DataDir,
|
||||||
NoCSum = <<>>,
|
|
||||||
try
|
|
||||||
machi_test_util:start_flu_package(verify1_flu, TcpPort, DataDir,
|
|
||||||
W_props),
|
W_props),
|
||||||
Sock1 = ?FLU_C:connect(#p_srvr{address=Host, port=TcpPort}),
|
Sock1 = ?FLU_C:connect(#p_srvr{address=Host, port=TcpPort}),
|
||||||
try
|
try
|
||||||
Prefix = <<"verify_prefix">>,
|
Prefix = <<"verify_prefix">>,
|
||||||
NumChunks = 10,
|
NumChunks = 10,
|
||||||
[{ok, _} = ?FLU_C:append_chunk(Sock1, NSInfo, ?DUMMY_PV1_EPOCH,
|
[{ok, _} = ?FLU_C:append_chunk(Sock1, ?DUMMY_PV1_EPOCH,
|
||||||
Prefix, <<X:(X*8)/big>>, NoCSum) ||
|
Prefix, <<X:(X*8)/big>>) ||
|
||||||
X <- lists:seq(1, NumChunks)],
|
X <- lists:seq(1, NumChunks)],
|
||||||
{ok, [{_FileSize,File}]} = ?FLU_C:list_files(Sock1, ?DUMMY_PV1_EPOCH),
|
{ok, [{_FileSize,File}]} = ?FLU_C:list_files(Sock1, ?DUMMY_PV1_EPOCH),
|
||||||
?assertEqual({ok, []},
|
{ok, []} = machi_admin_util:verify_file_checksums_remote(
|
||||||
machi_admin_util:verify_file_checksums_remote(
|
Host, TcpPort, ?DUMMY_PV1_EPOCH, File),
|
||||||
Host, TcpPort, ?DUMMY_PV1_EPOCH, File)),
|
|
||||||
|
|
||||||
%% Clobber the first 3 chunks, which are sizes 1/2/3.
|
%% Clobber the first 3 chunks, which are sizes 1/2/3.
|
||||||
{_, Path} = machi_util:make_data_filename(DataDir,binary_to_list(File)),
|
{_, Path} = machi_util:make_data_filename(DataDir,binary_to_list(File)),
|
||||||
|
@ -82,10 +74,8 @@ verify_file_checksums_test2() ->
|
||||||
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
catch ?FLU_C:quit(Sock1)
|
catch ?FLU_C:quit(Sock1),
|
||||||
end
|
ok = ?FLU:stop(FLU1)
|
||||||
after
|
|
||||||
catch machi_test_util:stop_flu_package()
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-endif. % !PULSE
|
-endif. % !PULSE
|
||||||
|
|
|
@ -1,616 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% EQC single-threaded and concurrent test for file operations and repair
|
|
||||||
%% under simulated network partition.
|
|
||||||
|
|
||||||
%% The main purpose is to confirm no dataloss, i.e. every chunk that
|
|
||||||
%% has been successfully written (ACK received) by append/write
|
|
||||||
%% opration will be read after partition heals.
|
|
||||||
%%
|
|
||||||
%% All updating -- append, write and trim -- operations are executed
|
|
||||||
%% through CR client, not directly by flu1 client, in order to be
|
|
||||||
%% end-to-end test (in single chain point of veiw.) There may be churn
|
|
||||||
%% for projections by simulated network partition.
|
|
||||||
%%
|
|
||||||
%% Test steps
|
|
||||||
%% 1. Setup single chain.
|
|
||||||
%% 2. Execute updating operations and simulated partition (by eqc_statem).
|
|
||||||
%% Every updating results are recorded in ETS tables.
|
|
||||||
%% 3. When {error, timeout|partition} happens, trigger management tick for
|
|
||||||
%% every chain manager process.
|
|
||||||
%% 4. After commands are executed, remove patition and wait for the chain
|
|
||||||
%% without down nodes nor repairing nodes.
|
|
||||||
%% 5. Asserting written results so that each record be read from the
|
|
||||||
%% chain and data be the same with written one.
|
|
||||||
|
|
||||||
%% Improvements to-do's
|
|
||||||
%% - Use higher concurrency, e.g. 10+
|
|
||||||
%% - Random length for binary to write
|
|
||||||
%% - Operations other than append, write, trim
|
|
||||||
%% - Use checksum instead of binary to save memory
|
|
||||||
%% - More variety for partitioning pattern: non-constant failure
|
|
||||||
%% - Stop and restart
|
|
||||||
%% - Suspend and resume of some erlang processes
|
|
||||||
|
|
||||||
-module(machi_ap_repair_eqc).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-ifdef(EQC).
|
|
||||||
-compile(export_all).
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
-include("machi_verbose.hrl").
|
|
||||||
-include_lib("eqc/include/eqc.hrl").
|
|
||||||
-include_lib("eqc/include/eqc_statem.hrl").
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-record(target, {verbose=false,
|
|
||||||
flu_names,
|
|
||||||
mgr_names}).
|
|
||||||
|
|
||||||
-record(state, {num,
|
|
||||||
verbose=false,
|
|
||||||
flu_names,
|
|
||||||
mgr_names,
|
|
||||||
cr_count}).
|
|
||||||
|
|
||||||
%% ETS table names
|
|
||||||
-define(WRITTEN_TAB, written). % Successfully written data
|
|
||||||
-define(ACCPT_TAB, accpt). % Errors with no harm, e.g. timeout
|
|
||||||
-define(FAILED_TAB, failed). % Uncategorized errors, when happenes
|
|
||||||
% it should be re-categorized to accept or critical
|
|
||||||
-define(CRITICAL_TAB, critical). % Critical errors, e.g. double write to the same key
|
|
||||||
|
|
||||||
-define(QC_OUT(P),
|
|
||||||
eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
|
|
||||||
|
|
||||||
%% EUNIT TEST DEFINITION
|
|
||||||
prop_repair_test_() ->
|
|
||||||
{PropTO, EUnitTO} = eqc_timeout(60),
|
|
||||||
Verbose = eqc_verbose(),
|
|
||||||
{spawn,
|
|
||||||
[{timeout, EUnitTO,
|
|
||||||
?_assertEqual(
|
|
||||||
true,
|
|
||||||
eqc:quickcheck(eqc:testing_time(
|
|
||||||
PropTO, ?QC_OUT(noshrink(prop_repair(Verbose))))))}]}.
|
|
||||||
|
|
||||||
prop_repair_par_test_() ->
|
|
||||||
{PropTO, EUnitTO} = eqc_timeout(60),
|
|
||||||
Verbose = eqc_verbose(),
|
|
||||||
{spawn,
|
|
||||||
[{timeout, EUnitTO,
|
|
||||||
?_assertEqual(
|
|
||||||
true,
|
|
||||||
eqc:quickcheck(eqc:testing_time(
|
|
||||||
PropTO, ?QC_OUT(noshrink(prop_repair_par(Verbose))))))}]}.
|
|
||||||
|
|
||||||
%% Model
|
|
||||||
|
|
||||||
weight(_S, change_partition) -> 20;
|
|
||||||
weight(_S, _) -> 100.
|
|
||||||
|
|
||||||
%% Append
|
|
||||||
|
|
||||||
append_args(#state{cr_count=CRCount}=S) ->
|
|
||||||
[choose(1, CRCount), chunk(), S].
|
|
||||||
|
|
||||||
append(CRIndex, Bin, #state{verbose=V}=S) ->
|
|
||||||
CRList = cr_list(),
|
|
||||||
{_SimSelfName, C} = lists:nth(CRIndex, CRList),
|
|
||||||
Prefix = <<"pre">>,
|
|
||||||
Len = byte_size(Bin),
|
|
||||||
NSInfo = #ns_info{},
|
|
||||||
NoCSum = <<>>,
|
|
||||||
Opts1 = #append_opts{},
|
|
||||||
Res = (catch machi_cr_client:append_chunk(C, NSInfo, Prefix, Bin, NoCSum, Opts1, sec(1))),
|
|
||||||
case Res of
|
|
||||||
{ok, {_Off, Len, _FileName}=Key} ->
|
|
||||||
case ets:insert_new(?WRITTEN_TAB, {Key, Bin}) of
|
|
||||||
true ->
|
|
||||||
[?V("<o>", []) || V],
|
|
||||||
ok;
|
|
||||||
false ->
|
|
||||||
%% The Key is alread written, WHY!!!????
|
|
||||||
case ets:lookup(?WRITTEN_TAB, Key) of
|
|
||||||
[{Key, Bin}] ->
|
|
||||||
%% TODO: The identical binary is alread inserted in
|
|
||||||
%% written table. Is this acceptable??? Hmm, maybe NO...
|
|
||||||
[?V("<dws:~w>", [Key]) || V],
|
|
||||||
true = ets:insert_new(?ACCPT_TAB,
|
|
||||||
{make_ref(), double_write_same, Key}),
|
|
||||||
{acceptable_error, doublewrite_the_same};
|
|
||||||
[{Key, OtherBin}] ->
|
|
||||||
[?V("<dwd:~w:~w>", [Key, {OtherBin, Bin}]) || V],
|
|
||||||
true = ets:insert_new(?CRITICAL_TAB,
|
|
||||||
{make_ref(), double_write_diff, Key}),
|
|
||||||
R = {critical_error,
|
|
||||||
{doublewrite_diff, Key, {OtherBin, Bin}}},
|
|
||||||
%% TODO: when double write happens, it seems that
|
|
||||||
%% repair process got stack with endless loop. To
|
|
||||||
%% avoit it, return error here.
|
|
||||||
%% If this error/1 will be removed, one can possibly
|
|
||||||
%% know double write frequency/rate.
|
|
||||||
error(R)
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
{error, partition} ->
|
|
||||||
[?V("<pt>", []) || V],
|
|
||||||
true = ets:insert_new(?ACCPT_TAB, {make_ref(), timeout}),
|
|
||||||
_ = tick(S),
|
|
||||||
{acceptable_error, partition};
|
|
||||||
{'EXIT', {timeout, _}} ->
|
|
||||||
[?V("<to:~w:~w>", [_SimSelfName, C]) || V],
|
|
||||||
true = ets:insert_new(?ACCPT_TAB, {make_ref(), timeout}),
|
|
||||||
_ = tick(S),
|
|
||||||
{acceptable_error, timeout};
|
|
||||||
{ok, {_Off, UnexpectedLen, _FileName}=Key} ->
|
|
||||||
[?V("<XX>", []) || V],
|
|
||||||
true = ets:insert_new(?CRITICAL_TAB, {make_ref(), unexpected_len, Key}),
|
|
||||||
{critical_error, {unexpected_len, Key, Len, UnexpectedLen}};
|
|
||||||
{error, _Reason} = Error ->
|
|
||||||
[?V("<er>", []) || V],
|
|
||||||
true = ets:insert_new(?FAILED_TAB, {make_ref(), Error}),
|
|
||||||
{other_error, Error};
|
|
||||||
Other ->
|
|
||||||
[?V("<er>", []) || V],
|
|
||||||
true = ets:insert_new(?FAILED_TAB, {make_ref(), Other}),
|
|
||||||
{other_error, Other}
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% Change partition
|
|
||||||
|
|
||||||
change_partition_args(#state{flu_names=FLUNames}=S) ->
|
|
||||||
%% [partition(FLUNames), S].
|
|
||||||
[partition_sym(FLUNames), S].
|
|
||||||
|
|
||||||
change_partition(Partition,
|
|
||||||
#state{verbose=Verbose, flu_names=FLUNames}=S) ->
|
|
||||||
[case Partition of
|
|
||||||
[] -> ?V("## Turn OFF partition: ~w~n", [Partition]);
|
|
||||||
_ -> ?V("## Turn ON partition: ~w~n", [Partition])
|
|
||||||
end || Verbose],
|
|
||||||
machi_partition_simulator:always_these_partitions(Partition),
|
|
||||||
_ = machi_partition_simulator:get(FLUNames),
|
|
||||||
%% Don't wait for stable chain, tick will be executed on demand
|
|
||||||
%% in append oprations
|
|
||||||
_ = tick(S),
|
|
||||||
|
|
||||||
ok.
|
|
||||||
|
|
||||||
%% Generators
|
|
||||||
|
|
||||||
num() ->
|
|
||||||
choose(2, 5).
|
|
||||||
|
|
||||||
cr_count(Num) ->
|
|
||||||
Num * 3.
|
|
||||||
|
|
||||||
%% Returns a list like
|
|
||||||
%% `[{#p_srvr{name=a, port=7501, ..}, "./eqc/data.eqc.a/"}, ...]'
|
|
||||||
all_list_extra(Num) ->
|
|
||||||
{PortBase, DirBase} = get_port_dir_base(),
|
|
||||||
[begin
|
|
||||||
FLUNameStr = [$a + I - 1],
|
|
||||||
FLUName = list_to_atom(FLUNameStr),
|
|
||||||
MgrName = machi_flu_psup:make_mgr_supname(FLUName),
|
|
||||||
{#p_srvr{name=FLUName, address="localhost", port=PortBase+I,
|
|
||||||
props=[{chmgr, MgrName}]},
|
|
||||||
DirBase ++ "/data.eqc." ++ FLUNameStr}
|
|
||||||
end || I <- lists:seq(1, Num)].
|
|
||||||
|
|
||||||
sublist(L) ->
|
|
||||||
?LET(K, nat(),
|
|
||||||
?LET(L2, eqc_gen:vector(K, eqc_gen:oneof(L)),
|
|
||||||
lists:usort(L2))).
|
|
||||||
|
|
||||||
%% Generator for possibly assymmetric partition information
|
|
||||||
partition(FLUNames) ->
|
|
||||||
frequency([{10, return([])},
|
|
||||||
{20, non_empty(sublist(flu_ordered_pairs(FLUNames)))}]).
|
|
||||||
|
|
||||||
%% Generator for symmetric partition information
|
|
||||||
partition_sym(FLUNames) ->
|
|
||||||
?LET(Pairs, non_empty(sublist(flu_pairs(FLUNames))),
|
|
||||||
lists:flatmap(fun({One, Another}) -> [{One, Another}, {Another, One}] end,
|
|
||||||
Pairs)).
|
|
||||||
|
|
||||||
flu_ordered_pairs(FLUNames) ->
|
|
||||||
[{From, To} || From <- FLUNames, To <- FLUNames, From =/= To].
|
|
||||||
|
|
||||||
flu_pairs(FLUNames) ->
|
|
||||||
[{One, Another} || One <- FLUNames, Another <- FLUNames, One > Another].
|
|
||||||
|
|
||||||
chunk() ->
|
|
||||||
non_empty(binary(10)).
|
|
||||||
|
|
||||||
%% Properties
|
|
||||||
|
|
||||||
prop_repair(Verbose) ->
|
|
||||||
error_logger:tty(false),
|
|
||||||
application:load(sasl),
|
|
||||||
application:set_env(sasl, sasl_error_logger, false),
|
|
||||||
|
|
||||||
Seed = {1445,935441,287549},
|
|
||||||
?FORALL(Num, num(),
|
|
||||||
?FORALL(Cmds, commands(?MODULE, initial_state(Num, Verbose)),
|
|
||||||
begin
|
|
||||||
Target = setup_target(Num, Seed, Verbose),
|
|
||||||
{H, S1, Res0} = run_commands(?MODULE, Cmds),
|
|
||||||
%% ?V("S1=~w~n", [S1]),
|
|
||||||
?V("==== Start post operations, stabilize and confirm results~n", []),
|
|
||||||
_ = stabilize(commands_len(Cmds), Target),
|
|
||||||
{Dataloss, Critical} = confirm_result(Target),
|
|
||||||
_ = cleanup(Target),
|
|
||||||
pretty_commands(
|
|
||||||
?MODULE, Cmds, {H, S1, Res0},
|
|
||||||
aggregate(with_title(cmds), command_names(Cmds),
|
|
||||||
collect(with_title(length5), (length(Cmds) div 5) * 5,
|
|
||||||
{Dataloss, Critical} =:= {0, 0})))
|
|
||||||
end)).
|
|
||||||
|
|
||||||
prop_repair_par(Verbose) ->
|
|
||||||
error_logger:tty(false),
|
|
||||||
application:load(sasl),
|
|
||||||
application:set_env(sasl, sasl_error_logger, false),
|
|
||||||
|
|
||||||
Seed = {1445,935441,287549},
|
|
||||||
?FORALL(Num, num(),
|
|
||||||
?FORALL(Cmds,
|
|
||||||
%% Now try-and-err'ing, how to control command length and concurrency?
|
|
||||||
?SUCHTHAT(Cmds0, ?SIZED(Size, resize(Size,
|
|
||||||
parallel_commands(?MODULE, initial_state(Num, Verbose)))),
|
|
||||||
commands_len(Cmds0) > 20
|
|
||||||
andalso
|
|
||||||
concurrency(Cmds0) > 2),
|
|
||||||
begin
|
|
||||||
CmdsLen= commands_len(Cmds),
|
|
||||||
Target = setup_target(Num, Seed, Verbose),
|
|
||||||
{Seq, Par, Res0} = run_parallel_commands(?MODULE, Cmds),
|
|
||||||
%% ?V("Seq=~w~n", [Seq]),
|
|
||||||
%% ?V("Par=~w~n", [Par]),
|
|
||||||
?V("==== Start post operations, stabilize and confirm results~n", []),
|
|
||||||
{FinalRes, {Dataloss, Critical}} =
|
|
||||||
case Res0 of
|
|
||||||
ok ->
|
|
||||||
Res1 = stabilize(CmdsLen, Target),
|
|
||||||
{Res1, confirm_result(Target)};
|
|
||||||
_ ->
|
|
||||||
?V("Res0=~w~n", [Res0]),
|
|
||||||
{Res0, {undefined, undefined}}
|
|
||||||
end,
|
|
||||||
_ = cleanup(Target),
|
|
||||||
%% Process is leaking? This log line can be removed after fix.
|
|
||||||
[?V("process_count=~w~n", [erlang:system_info(process_count)]) || Verbose],
|
|
||||||
pretty_commands(
|
|
||||||
?MODULE, Cmds, {Seq, Par, Res0},
|
|
||||||
aggregate(with_title(cmds), command_names(Cmds),
|
|
||||||
collect(with_title(length5), (CmdsLen div 5) * 5,
|
|
||||||
collect(with_title(conc), concurrency(Cmds),
|
|
||||||
{FinalRes, {Dataloss, Critical}} =:= {ok, {0, 0}})))
|
|
||||||
)
|
|
||||||
end)).
|
|
||||||
|
|
||||||
%% Initilization / setup
|
|
||||||
|
|
||||||
%% Fake initialization function for debugging in shell like:
|
|
||||||
%% > eqc_gen:sample(eqc_statem:commands(machi_ap_repair_eqc)).
|
|
||||||
%% but not so helpful.
|
|
||||||
initial_state() ->
|
|
||||||
#state{cr_count=3}.
|
|
||||||
|
|
||||||
initial_state(Num, Verbose) ->
|
|
||||||
AllListE = all_list_extra(Num),
|
|
||||||
FLUNames = [P#p_srvr.name || {P, _Dir} <- AllListE],
|
|
||||||
MgrNames = [{Name, machi_flu_psup:make_mgr_supname(Name)} || Name <- FLUNames],
|
|
||||||
#state{num=Num, verbose=Verbose,
|
|
||||||
flu_names=FLUNames, mgr_names=MgrNames,
|
|
||||||
cr_count=cr_count(Num)}.
|
|
||||||
|
|
||||||
setup_target(Num, Seed, Verbose) ->
|
|
||||||
%% ?V("setup_target(Num=~w, Seed=~w~nn", [Num, Seed]),
|
|
||||||
AllListE = all_list_extra(Num),
|
|
||||||
FLUNames = [P#p_srvr.name || {P, _Dir} <- AllListE],
|
|
||||||
MgrNames = [{Name, machi_flu_psup:make_mgr_supname(Name)} || Name <- FLUNames],
|
|
||||||
Dict = orddict:from_list([{P#p_srvr.name, P} || {P, _Dir} <- AllListE]),
|
|
||||||
|
|
||||||
setup_chain(Seed, AllListE, FLUNames, MgrNames, Dict),
|
|
||||||
_ = setup_cpool(AllListE, FLUNames, Dict),
|
|
||||||
|
|
||||||
Target = #target{flu_names=FLUNames, mgr_names=MgrNames,
|
|
||||||
verbose=Verbose},
|
|
||||||
%% Don't wait for complete chain. Even partialy completed, the chain
|
|
||||||
%% should work fine. Right?
|
|
||||||
wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames,
|
|
||||||
20, Verbose),
|
|
||||||
Target.
|
|
||||||
|
|
||||||
setup_chain(Seed, AllListE, FLUNames, MgrNames, Dict) ->
|
|
||||||
ok = shutdown_hard(),
|
|
||||||
[begin
|
|
||||||
machi_test_util:clean_up_dir(Dir),
|
|
||||||
filelib:ensure_dir(Dir ++ "/not-used")
|
|
||||||
end || {_P, Dir} <- AllListE],
|
|
||||||
[catch ets:delete(T) || T <- tabs()],
|
|
||||||
|
|
||||||
[ets:new(T, [set, public, named_table,
|
|
||||||
{write_concurrency, true}, {read_concurrency, true}]) ||
|
|
||||||
T <- tabs()],
|
|
||||||
{ok, _} = application:ensure_all_started(machi),
|
|
||||||
|
|
||||||
SimSpec = {part_sim,
|
|
||||||
{machi_partition_simulator, start_link, [{0,0,0}, 0, 100]},
|
|
||||||
permanent, 500, worker, []},
|
|
||||||
{ok, _PSimPid} = supervisor:start_child(machi_sup, SimSpec),
|
|
||||||
ok = machi_partition_simulator:set_seed(Seed),
|
|
||||||
_Partitions = machi_partition_simulator:get(FLUNames),
|
|
||||||
|
|
||||||
%% Start FLUs and setup the chain
|
|
||||||
FLUOpts = [{use_partition_simulator, true},
|
|
||||||
%% {private_write_verbose, true},
|
|
||||||
{active_mode, false},
|
|
||||||
{simulate_repair, false}],
|
|
||||||
[{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, FLUOpts) ||
|
|
||||||
{#p_srvr{name=Name, port=Port}, Dir} <- AllListE],
|
|
||||||
[machi_chain_manager1:set_chain_members(MgrName, Dict) || {_, MgrName} <- MgrNames],
|
|
||||||
ok.
|
|
||||||
|
|
||||||
setup_cpool(AllListE, FLUNames, Dict) ->
|
|
||||||
Num = length(AllListE),
|
|
||||||
FCList = [begin
|
|
||||||
{ok, PCPid} = machi_proxy_flu1_client:start_link(P),
|
|
||||||
{Name, PCPid}
|
|
||||||
end || {_, #p_srvr{name=Name}=P} <- Dict],
|
|
||||||
%% CR clients are pooled, each has "name" which is interpreted "From"
|
|
||||||
%% side of simulated partition.
|
|
||||||
SimSelfNames = lists:append(lists:duplicate(cr_count(Num), FLUNames)),
|
|
||||||
CRList = [begin
|
|
||||||
{ok, C} = machi_cr_client:start_link(
|
|
||||||
[P || {_, P} <- Dict],
|
|
||||||
[{use_partition_simulator, true},
|
|
||||||
{simulator_self_name, SimSelfName},
|
|
||||||
{simulator_members, FLUNames}]),
|
|
||||||
{SimSelfName, C}
|
|
||||||
end || SimSelfName <- SimSelfNames],
|
|
||||||
catch ets:delete(cpool),
|
|
||||||
ets:new(cpool, [set, protected, named_table, {read_concurrency, true}]),
|
|
||||||
ets:insert(cpool, {fc_list, FCList}),
|
|
||||||
ets:insert(cpool, {cr_list, CRList}),
|
|
||||||
{CRList, FCList}.
|
|
||||||
|
|
||||||
fc_list() ->
|
|
||||||
[{fc_list, FCList}] = ets:lookup(cpool, fc_list),
|
|
||||||
FCList.
|
|
||||||
|
|
||||||
cr_list() ->
|
|
||||||
[{cr_list, CRList}] = ets:lookup(cpool, cr_list),
|
|
||||||
CRList.
|
|
||||||
|
|
||||||
%% Post run_commands
|
|
||||||
|
|
||||||
stabilize(0, _T) ->
|
|
||||||
ok;
|
|
||||||
stabilize(_CmdsLen, #target{flu_names=FLUNames, mgr_names=MgrNames,
|
|
||||||
verbose=Verbose}) ->
|
|
||||||
machi_partition_simulator:no_partitions(),
|
|
||||||
true = wait_until_stable(chain_state_all_ok(FLUNames), FLUNames, MgrNames,
|
|
||||||
100, Verbose),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
chain_state_all_ok(FLUNames) ->
|
|
||||||
[{FLUName, {FLUNames, [], []}} || FLUName <- FLUNames].
|
|
||||||
|
|
||||||
confirm_result(_T) ->
|
|
||||||
[{_, C} | _] = cr_list(),
|
|
||||||
[{written, _Written}, {accpt, Accpt},
|
|
||||||
{failed, Failed}, {critical, Critical}] = tab_counts(),
|
|
||||||
{OK, Dataloss} = confirm_written(C),
|
|
||||||
?V(" Written=~w, DATALOSS=~w, Acceptable=~w~n", [OK, Dataloss, Accpt]),
|
|
||||||
?V(" Failed=~w, Critical=~w~n~n", [Failed, Critical]),
|
|
||||||
DirBase = get_dir_base(),
|
|
||||||
Suffix = dump_file_suffix(),
|
|
||||||
case Failed of
|
|
||||||
0 -> ok;
|
|
||||||
_ ->
|
|
||||||
DumpFailed = filename:join(DirBase, "dump-failed-" ++ Suffix),
|
|
||||||
?V("Dump failed ETS tab to: ~s~n", [DumpFailed]),
|
|
||||||
ets:tab2file(?FAILED_TAB, DumpFailed)
|
|
||||||
end,
|
|
||||||
case Critical of
|
|
||||||
0 -> ok;
|
|
||||||
_ ->
|
|
||||||
DumpCritical = filename:join(DirBase, "dump-critical-" ++ Suffix),
|
|
||||||
?V("Dump critical ETS tab to: ~w~n", [DumpCritical]),
|
|
||||||
ets:tab2file(?CRITICAL_TAB, DumpCritical)
|
|
||||||
end,
|
|
||||||
{Dataloss, Critical}.
|
|
||||||
|
|
||||||
confirm_written(C) ->
|
|
||||||
ets:foldl(
|
|
||||||
fun({Key, Bin}, {OK, NG}) ->
|
|
||||||
case assert_chunk(C, Key, Bin) of
|
|
||||||
ok -> {OK+1, NG};
|
|
||||||
{error, _} -> {OK, NG+1}
|
|
||||||
end
|
|
||||||
end, {0, 0}, ?WRITTEN_TAB).
|
|
||||||
|
|
||||||
assert_chunk(C, {Off, Len, FileName}=Key, Bin) ->
|
|
||||||
%% TODO: This probably a bug, read_chunk respnds with filename of `string()' type
|
|
||||||
%% TODO : Use CSum instead of binary (after disuccsion about CSum is calmed down?)
|
|
||||||
NSInfo = undefined,
|
|
||||||
case (catch machi_cr_client:read_chunk(C, NSInfo, FileName, Off, Len, undefined, sec(3))) of
|
|
||||||
{ok, {[{FileName, Off, Bin, _}], []}} ->
|
|
||||||
ok;
|
|
||||||
{ok, Got} ->
|
|
||||||
?V("read_chunk got different binary for Key=~p~n", [Key]),
|
|
||||||
?V(" Expected: ~p~n", [{[{FileName, Off, Bin, <<"CSum-NYI">>}], []}]),
|
|
||||||
?V(" Got: ~p~n", [Got]),
|
|
||||||
{error, different_binary};
|
|
||||||
{error, Reason} ->
|
|
||||||
?V("read_chunk error for Key=~p: ~p~n", [Key, Reason]),
|
|
||||||
{error, Reason};
|
|
||||||
Other ->
|
|
||||||
?V("read_chunk other error for Key=~p: ~p~n", [Key, Other]),
|
|
||||||
{error, Other}
|
|
||||||
end.
|
|
||||||
|
|
||||||
cleanup(_Target) ->
|
|
||||||
[begin unlink(FC), catch exit(FC, kill) end || {_, FC} <- fc_list()],
|
|
||||||
[begin unlink(CR), catch exit(CR, kill) end || {_, CR} <- cr_list()],
|
|
||||||
_ = shutdown_hard().
|
|
||||||
|
|
||||||
%% Internal misc utilities
|
|
||||||
|
|
||||||
eqc_verbose() ->
|
|
||||||
os:getenv("EQC_VERBOSE") =:= "true".
|
|
||||||
|
|
||||||
eqc_timeout(Default) ->
|
|
||||||
PropTimeout = case os:getenv("EQC_TIME") of
|
|
||||||
false -> Default;
|
|
||||||
V -> list_to_integer(V)
|
|
||||||
end,
|
|
||||||
{PropTimeout, PropTimeout * 300}.
|
|
||||||
|
|
||||||
get_port_dir_base() ->
|
|
||||||
I = case os:getenv("EQC_BASE_PORT") of
|
|
||||||
false -> 0;
|
|
||||||
V -> list_to_integer(V)
|
|
||||||
end,
|
|
||||||
D = get_dir_base(),
|
|
||||||
{7400 + (I * 100), D ++ "/" ++ integer_to_list(I)}.
|
|
||||||
|
|
||||||
get_dir_base() ->
|
|
||||||
case os:getenv("EQC_BASE_DIR") of
|
|
||||||
false -> "./eqc";
|
|
||||||
DD -> DD
|
|
||||||
end.
|
|
||||||
|
|
||||||
shutdown_hard() ->
|
|
||||||
_STOP = application:stop(machi),
|
|
||||||
timer:sleep(100).
|
|
||||||
|
|
||||||
tick(#state{flu_names=FLUNames, mgr_names=MgrNames,
|
|
||||||
verbose=Verbose}) ->
|
|
||||||
tick(FLUNames, MgrNames, Verbose).
|
|
||||||
|
|
||||||
tick(FLUNames, MgrNames, Verbose) ->
|
|
||||||
tick(FLUNames, MgrNames, 2, 100, Verbose).
|
|
||||||
|
|
||||||
tick(FLUNames, MgrNames, Iter, SleepMax, Verbose) ->
|
|
||||||
TickFun = tick_fun(FLUNames, MgrNames, self()),
|
|
||||||
TickFun(Iter, 0, SleepMax),
|
|
||||||
FCList = fc_list(),
|
|
||||||
[?V("## Chain state after tick()=~w~n", [chain_state(FCList)]) || Verbose].
|
|
||||||
|
|
||||||
tick_fun(FLUNames, MgrNames, Parent) ->
|
|
||||||
fun(Iters, SleepMin, SleepMax) ->
|
|
||||||
%% ?V("^", []),
|
|
||||||
Trigger =
|
|
||||||
fun(FLUName, MgrName) ->
|
|
||||||
random:seed(now()),
|
|
||||||
[begin
|
|
||||||
erlang:yield(),
|
|
||||||
SleepMaxRand = random:uniform(SleepMax + 1),
|
|
||||||
%% io:format(user, "{t}", []),
|
|
||||||
Elapsed = machi_chain_manager1:sleep_ranked_order(
|
|
||||||
SleepMin, SleepMaxRand,
|
|
||||||
FLUName, FLUNames),
|
|
||||||
MgrName ! tick_check_environment,
|
|
||||||
%% Be more unfair by not sleeping here.
|
|
||||||
timer:sleep(max(SleepMax - Elapsed, 1)),
|
|
||||||
ok
|
|
||||||
end || _ <- lists:seq(1, Iters)],
|
|
||||||
Parent ! {done, self()}
|
|
||||||
end,
|
|
||||||
Pids = [{spawn(fun() -> Trigger(FLUName, MgrName) end), FLUName} ||
|
|
||||||
{FLUName, MgrName} <- MgrNames ],
|
|
||||||
[receive
|
|
||||||
{done, ThePid} ->
|
|
||||||
ok
|
|
||||||
after 120*1000 ->
|
|
||||||
exit({icky_timeout, M_name})
|
|
||||||
end || {ThePid, M_name} <- Pids]
|
|
||||||
end.
|
|
||||||
|
|
||||||
wait_until_stable(ExpectedChainState, FLUNames, MgrNames, Verbose) ->
|
|
||||||
wait_until_stable(ExpectedChainState, FLUNames, MgrNames, 20, Verbose).
|
|
||||||
|
|
||||||
wait_until_stable(ExpectedChainState, FLUNames, MgrNames, Retries, Verbose) ->
|
|
||||||
TickFun = tick_fun(FLUNames, MgrNames, self()),
|
|
||||||
FCList = fc_list(),
|
|
||||||
wait_until_stable1(ExpectedChainState, TickFun, FCList, Retries, Verbose).
|
|
||||||
|
|
||||||
wait_until_stable1(ExpectedChainState, _TickFun, FCList, 0, _Verbose) ->
|
|
||||||
?V(" [ERROR] _ExpectedChainState ~p\n", [ExpectedChainState]),
|
|
||||||
?V(" [ERROR] wait_until_stable failed.... : ~p~n", [chain_state(FCList)]),
|
|
||||||
?V(" [ERROR] norm.... : ~p~n", [normalize_chain_state(chain_state(FCList))]),
|
|
||||||
false;
|
|
||||||
wait_until_stable1(ExpectedChainState, TickFun, FCList, Reties, Verbose) ->
|
|
||||||
[TickFun(3, 0, 100) || _ <- lists:seq(1, 3)],
|
|
||||||
Normalized = normalize_chain_state(chain_state(FCList)),
|
|
||||||
case Normalized of
|
|
||||||
ExpectedChainState ->
|
|
||||||
[?V(" Got stable chain: ~w~n", [chain_state(FCList)]) || Verbose],
|
|
||||||
true;
|
|
||||||
_ ->
|
|
||||||
[?V(" NOT YET stable chain: ~w~n", [chain_state(FCList)]) || Verbose],
|
|
||||||
wait_until_stable1(ExpectedChainState, TickFun, FCList, Reties-1, Verbose)
|
|
||||||
end.
|
|
||||||
|
|
||||||
normalize_chain_state(ChainState) ->
|
|
||||||
lists:usort([{FLUName,
|
|
||||||
{lists:usort(UPI), lists:usort(Repairing), lists:usort(Down)}} ||
|
|
||||||
{FLUName, {_EpochNo, UPI, Repairing, Down}} <- ChainState]).
|
|
||||||
|
|
||||||
chain_state(FCList) ->
|
|
||||||
lists:usort(
|
|
||||||
[case (catch machi_proxy_flu1_client:read_latest_projection(C, private, sec(5))) of
|
|
||||||
{ok, #projection_v1{epoch_number=EpochNo, upi=UPI,
|
|
||||||
repairing=Repairing, down=Down}} ->
|
|
||||||
{FLUName, {EpochNo, UPI, Repairing, Down}};
|
|
||||||
Other ->
|
|
||||||
{FLUName, Other}
|
|
||||||
end || {FLUName, C} <- FCList]).
|
|
||||||
|
|
||||||
tabs() -> [?WRITTEN_TAB, ?ACCPT_TAB, ?FAILED_TAB, ?CRITICAL_TAB].
|
|
||||||
|
|
||||||
tab_counts() ->
|
|
||||||
[{T, ets:info(T, size)} || T <- tabs()].
|
|
||||||
|
|
||||||
sec(Sec) ->
|
|
||||||
timer:seconds(Sec).
|
|
||||||
|
|
||||||
commands_len({SeqCmds, ParCmdsList} = _Cmds) ->
|
|
||||||
lists:sum([length(SeqCmds) | [length(P) || P <- ParCmdsList]]);
|
|
||||||
commands_len(Cmds) ->
|
|
||||||
length(Cmds).
|
|
||||||
|
|
||||||
concurrency({_SeqCmds, ParCmdsList} = _Cmds) -> length(ParCmdsList);
|
|
||||||
concurrency(_) -> 1.
|
|
||||||
|
|
||||||
dump_file_suffix() ->
|
|
||||||
{{Year, Month, Day}, {Hour, Min, Sec}} = calendar:local_time(),
|
|
||||||
lists:flatten(
|
|
||||||
io_lib:format("~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0B.000Z",
|
|
||||||
[Year, Month, Day, Hour, Min, Sec])).
|
|
||||||
|
|
||||||
-endif. % EQC
|
|
||||||
-endif. % TEST
|
|
File diff suppressed because it is too large
Load diff
|
@ -81,7 +81,7 @@ unanimous_report(Epoch, Namez) ->
|
||||||
FLU_Projs = [{FLUName,
|
FLU_Projs = [{FLUName,
|
||||||
case ?FLU_PC:read_projection(FLU, private, Epoch) of
|
case ?FLU_PC:read_projection(FLU, private, Epoch) of
|
||||||
{ok, T} ->
|
{ok, T} ->
|
||||||
T;
|
machi_chain_manager1:inner_projection_or_self(T);
|
||||||
_Else ->
|
_Else ->
|
||||||
not_in_this_epoch
|
not_in_this_epoch
|
||||||
end} || {FLUName, FLU} <- Namez],
|
end} || {FLUName, FLU} <- Namez],
|
||||||
|
@ -230,7 +230,7 @@ prop_compare_legacy_with_v2_chain_transition_check(Style) ->
|
||||||
case Style of
|
case Style of
|
||||||
primitive ->
|
primitive ->
|
||||||
New_res = ?MGR:chain_state_transition_is_sane(
|
New_res = ?MGR:chain_state_transition_is_sane(
|
||||||
Author1, UPI1, Repair1, Author2, UPI2, Author2),
|
Author1, UPI1, Repair1, Author2, UPI2),
|
||||||
New_p = case New_res of true -> true;
|
New_p = case New_res of true -> true;
|
||||||
_ -> false
|
_ -> false
|
||||||
end;
|
end;
|
||||||
|
@ -273,80 +273,75 @@ make_prop_ets() ->
|
||||||
|
|
||||||
-endif. % EQC
|
-endif. % EQC
|
||||||
|
|
||||||
make_advance_fun(FitList, FLUList, MgrList, Num) ->
|
|
||||||
fun() ->
|
|
||||||
[begin
|
|
||||||
[catch machi_fitness:trigger_early_adjustment(Fit, Tgt) ||
|
|
||||||
Fit <- FitList,
|
|
||||||
Tgt <- FLUList ],
|
|
||||||
[catch ?MGR:trigger_react_to_env(Mgr) || Mgr <- MgrList],
|
|
||||||
ok
|
|
||||||
end || _ <- lists:seq(1, Num)]
|
|
||||||
end.
|
|
||||||
|
|
||||||
smoke0_test() ->
|
smoke0_test() ->
|
||||||
|
{ok, _} = machi_partition_simulator:start_link({1,2,3}, 50, 50),
|
||||||
|
Host = "localhost",
|
||||||
TcpPort = 6623,
|
TcpPort = 6623,
|
||||||
{[Pa], [M0], _Dirs} = machi_test_util:start_flu_packages(
|
{ok, FLUa} = machi_flu1:start_link([{a,TcpPort,"./data.a"}]),
|
||||||
1, TcpPort, "./data.", []),
|
Pa = #p_srvr{name=a, address=Host, port=TcpPort},
|
||||||
|
Members_Dict = machi_projection:make_members_dict([Pa]),
|
||||||
|
%% Egadz, more racing on startup, yay. TODO fix.
|
||||||
|
timer:sleep(1),
|
||||||
{ok, FLUaP} = ?FLU_PC:start_link(Pa),
|
{ok, FLUaP} = ?FLU_PC:start_link(Pa),
|
||||||
|
{ok, M0} = ?MGR:start_link(a, Members_Dict, [{active_mode, false}]),
|
||||||
|
_SockA = machi_util:connect(Host, TcpPort),
|
||||||
try
|
try
|
||||||
pong = ?MGR:ping(M0)
|
pong = ?MGR:ping(M0)
|
||||||
after
|
after
|
||||||
|
ok = ?MGR:stop(M0),
|
||||||
|
ok = machi_flu1:stop(FLUa),
|
||||||
ok = ?FLU_PC:quit(FLUaP),
|
ok = ?FLU_PC:quit(FLUaP),
|
||||||
machi_test_util:stop_flu_packages()
|
ok = machi_partition_simulator:stop()
|
||||||
end.
|
end.
|
||||||
|
|
||||||
smoke1_test_() ->
|
smoke1_test() ->
|
||||||
{timeout, 1*60, fun() -> smoke1_test2() end}.
|
machi_partition_simulator:start_link({1,2,3}, 100, 0),
|
||||||
|
|
||||||
smoke1_test2() ->
|
|
||||||
TcpPort = 62777,
|
TcpPort = 62777,
|
||||||
MgrOpts = [{active_mode,false}],
|
FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}, {c,TcpPort+2,"./data.c"}],
|
||||||
try
|
P_s = [#p_srvr{name=Name, address="localhost", port=Port} ||
|
||||||
{Ps, MgrNames, _Dirs} = machi_test_util:start_flu_packages(
|
{Name,Port,_Dir} <- FluInfo],
|
||||||
3, TcpPort, "./data.", MgrOpts),
|
|
||||||
MembersDict = machi_projection:make_members_dict(Ps),
|
|
||||||
[machi_chain_manager1:set_chain_members(M, MembersDict) || M <- MgrNames],
|
|
||||||
Ma = hd(MgrNames),
|
|
||||||
|
|
||||||
{ok, P1} = ?MGR:test_calc_projection(Ma, false),
|
[machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo],
|
||||||
|
FLUs = [element(2, machi_flu1:start_link([{Name,Port,Dir}])) ||
|
||||||
|
{Name,Port,Dir} <- FluInfo],
|
||||||
|
MembersDict = machi_projection:make_members_dict(P_s),
|
||||||
|
{ok, M0} = ?MGR:start_link(a, MembersDict, [{active_mode,false}]),
|
||||||
|
try
|
||||||
|
{ok, P1} = ?MGR:test_calc_projection(M0, false),
|
||||||
% DERP! Check for race with manager's proxy vs. proj listener
|
% DERP! Check for race with manager's proxy vs. proj listener
|
||||||
ok = lists:foldl(
|
case ?MGR:test_read_latest_public_projection(M0, false) of
|
||||||
fun(_, {_,{true,[{c,ok},{b,ok},{a,ok}]}}) ->
|
{error, partition} -> timer:sleep(500);
|
||||||
ok; % Short-circuit remaining attempts
|
_ -> ok
|
||||||
(_, ok) ->
|
end,
|
||||||
ok; % Skip remaining!
|
{local_write_result, ok,
|
||||||
(_, _Else) ->
|
{remote_write_results, [{b,ok},{c,ok}]}} =
|
||||||
timer:sleep(10),
|
?MGR:test_write_public_projection(M0, P1),
|
||||||
?MGR:test_write_public_projection(Ma, P1)
|
{unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(M0, false),
|
||||||
end, not_ok, lists:seq(1, 1000)),
|
|
||||||
%% Writing the exact same projection multiple times returns ok:
|
|
||||||
%% no change!
|
|
||||||
{_,{true,[{c,ok},{b,ok},{a,ok}]}} = ?MGR:test_write_public_projection(Ma, P1),
|
|
||||||
{unanimous, P1, Extra1} = ?MGR:test_read_latest_public_projection(Ma, false),
|
|
||||||
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_packages()
|
ok = ?MGR:stop(M0),
|
||||||
|
[ok = machi_flu1:stop(X) || X <- FLUs],
|
||||||
|
ok = machi_partition_simulator:stop()
|
||||||
end.
|
end.
|
||||||
|
|
||||||
nonunanimous_setup_and_fix_test_() ->
|
nonunanimous_setup_and_fix_test() ->
|
||||||
os:cmd("rm -f /tmp/moomoo.*"),
|
machi_partition_simulator:start_link({1,2,3}, 100, 0),
|
||||||
{timeout, 1*60, fun() -> nonunanimous_setup_and_fix_test2() end}.
|
|
||||||
|
|
||||||
nonunanimous_setup_and_fix_test2() ->
|
|
||||||
TcpPort = 62877,
|
TcpPort = 62877,
|
||||||
MgrOpts = [{active_mode,false}],
|
FluInfo = [{a,TcpPort+0,"./data.a"}, {b,TcpPort+1,"./data.b"}],
|
||||||
{Ps, [Ma,Mb,Mc], Dirs} = machi_test_util:start_flu_packages(
|
P_s = [#p_srvr{name=Name, address="localhost", port=Port} ||
|
||||||
3, TcpPort, "./data.", MgrOpts),
|
{Name,Port,_Dir} <- FluInfo],
|
||||||
MembersDict = machi_projection:make_members_dict(Ps),
|
|
||||||
ChainName = my_little_chain,
|
|
||||||
[machi_chain_manager1:set_chain_members(M, ChainName, 0, ap_mode,
|
|
||||||
MembersDict, []) || M <- [Ma, Mb]],
|
|
||||||
|
|
||||||
[Proxy_a, Proxy_b, Proxy_c] = Proxies =
|
|
||||||
[element(2, ?FLU_PC:start_link(P)) || P <- Ps],
|
|
||||||
|
|
||||||
|
[machi_flu1_test:clean_up_data_dir(Dir) || {_,_,Dir} <- FluInfo],
|
||||||
|
FLUs = [element(2, machi_flu1:start_link([{Name,Port,Dir}])) ||
|
||||||
|
{Name,Port,Dir} <- FluInfo],
|
||||||
|
[Proxy_a, Proxy_b] = Proxies =
|
||||||
|
[element(2,?FLU_PC:start_link(P)) || P <- P_s],
|
||||||
|
MembersDict = machi_projection:make_members_dict(P_s),
|
||||||
|
XX = [],
|
||||||
|
%% XX = [{private_write_verbose,true}],
|
||||||
|
{ok, Ma} = ?MGR:start_link(a, MembersDict, [{active_mode, false}]++XX),
|
||||||
|
{ok, Mb} = ?MGR:start_link(b, MembersDict, [{active_mode, false}]++XX),
|
||||||
try
|
try
|
||||||
{ok, P1} = ?MGR:test_calc_projection(Ma, false),
|
{ok, P1} = ?MGR:test_calc_projection(Ma, false),
|
||||||
|
|
||||||
|
@ -377,120 +372,24 @@ nonunanimous_setup_and_fix_test2() ->
|
||||||
{ok, P2pa} = ?FLU_PC:read_latest_projection(Proxy_a, private),
|
{ok, P2pa} = ?FLU_PC:read_latest_projection(Proxy_a, private),
|
||||||
P2 = P2pa#projection_v1{dbg2=[]},
|
P2 = P2pa#projection_v1{dbg2=[]},
|
||||||
|
|
||||||
%% Poke FLUb to react ... should be using the same private proj
|
%% %% FLUb should have nothing written to private because it hasn't
|
||||||
%% as FLUa.
|
%% %% reacted yet.
|
||||||
{now_using, _, EpochNum_a} = ?MGR:trigger_react_to_env(Mb),
|
%% {error, not_written} = ?FLU_PC:read_latest_projection(Proxy_b, private),
|
||||||
|
|
||||||
|
%% %% Poke FLUb to react ... should be using the same private proj
|
||||||
|
%% %% as FLUa.
|
||||||
|
%% {now_using, _, EpochNum_a} = ?MGR:trigger_react_to_env(Mb),
|
||||||
{ok, P2pb} = ?FLU_PC:read_latest_projection(Proxy_b, private),
|
{ok, P2pb} = ?FLU_PC:read_latest_projection(Proxy_b, private),
|
||||||
P2 = P2pb#projection_v1{dbg2=[]},
|
P2 = P2pb#projection_v1{dbg2=[]},
|
||||||
|
|
||||||
Mgrs = [a_chmgr, b_chmgr, c_chmgr],
|
timer:sleep(3000),
|
||||||
Advance = make_advance_fun([a_fitness,b_fitness,c_fitness],
|
|
||||||
[a,b,c],
|
|
||||||
Mgrs,
|
|
||||||
3),
|
|
||||||
Advance(),
|
|
||||||
{_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Ma),
|
|
||||||
{_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
{_, _, TheEpoch_3} = ?MGR:trigger_react_to_env(Mc),
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Remove 'a' from the chain.\n", []),
|
|
||||||
|
|
||||||
MembersDict4 = machi_projection:make_members_dict(tl(Ps)),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(
|
|
||||||
Mb, ChainName, TheEpoch_3, ap_mode, MembersDict4, []),
|
|
||||||
|
|
||||||
Advance(),
|
|
||||||
{ok, {true, _,_,_}} = ?FLU_PC:wedge_status(Proxy_a),
|
|
||||||
{_, _, TheEpoch_4} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
{_, _, TheEpoch_4} = ?MGR:trigger_react_to_env(Mc),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Add a to the chain again (a is running).\n", []),
|
|
||||||
|
|
||||||
MembersDict5 = machi_projection:make_members_dict(Ps),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(
|
|
||||||
Mb, ChainName, TheEpoch_4, ap_mode, MembersDict5, []),
|
|
||||||
|
|
||||||
Advance(),
|
|
||||||
{_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Ma),
|
|
||||||
{_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
{_, _, TheEpoch_5} = ?MGR:trigger_react_to_env(Mc),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[a]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- Proxies],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Stop a while a chain member, advance b&c.\n", []),
|
|
||||||
|
|
||||||
ok = machi_flu_psup:stop_flu_package(a),
|
|
||||||
Advance(),
|
|
||||||
{_, _, TheEpoch_6} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
{_, _, TheEpoch_6} = ?MGR:trigger_react_to_env(Mc),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Remove 'a' from the chain.\n", []),
|
|
||||||
|
|
||||||
MembersDict7 = machi_projection:make_members_dict(tl(Ps)),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(
|
|
||||||
Mb, ChainName, TheEpoch_6, ap_mode, MembersDict7, []),
|
|
||||||
|
|
||||||
Advance(),
|
|
||||||
{_, _, TheEpoch_7} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
{_, _, TheEpoch_7} = ?MGR:trigger_react_to_env(Mc),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Start a, advance.\n", []),
|
|
||||||
|
|
||||||
Opts = [{active_mode, false}, {initial_wedged, true}],
|
|
||||||
#p_srvr{name=NameA} = hd(Ps),
|
|
||||||
{ok,_}=machi_flu_psup:start_flu_package(NameA, TcpPort+1, hd(Dirs), Opts),
|
|
||||||
Advance(),
|
|
||||||
{ok, {true, _,_,_}} = ?FLU_PC:wedge_status(Proxy_a),
|
|
||||||
{ok, {false, EpochID_8,_,_}} = ?FLU_PC:wedge_status(Proxy_b),
|
|
||||||
{ok, {false, EpochID_8,_,_}} = ?FLU_PC:wedge_status(Proxy_c),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- tl(Proxies)],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Stop a, delete a's data, leave it stopped\n", []),
|
|
||||||
|
|
||||||
ok = machi_flu_psup:stop_flu_package(a),
|
|
||||||
Advance(),
|
|
||||||
machi_flu1_test:clean_up_data_dir(hd(Dirs)),
|
|
||||||
{ok, {false, _,_,_}} = ?FLU_PC:wedge_status(Proxy_b),
|
|
||||||
{ok, {false, _,_,_}} = ?FLU_PC:wedge_status(Proxy_c),
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Add a to the chain again (a is stopped).\n", []),
|
|
||||||
|
|
||||||
MembersDict9 = machi_projection:make_members_dict(Ps),
|
|
||||||
{_, _, TheEpoch_9} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(
|
|
||||||
Mb, ChainName, TheEpoch_9, ap_mode, MembersDict9, []),
|
|
||||||
Advance(),
|
|
||||||
{_, _, TheEpoch_9b} = ?MGR:trigger_react_to_env(Mb),
|
|
||||||
true = (TheEpoch_9b > TheEpoch_9),
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("STEP: Start a, and it joins like it ought to\n", []),
|
|
||||||
|
|
||||||
{ok,_}=machi_flu_psup:start_flu_package(NameA, TcpPort+1, hd(Dirs), Opts),
|
|
||||||
Advance(),
|
|
||||||
{ok, {false, {TheEpoch10,_},_,_}} = ?FLU_PC:wedge_status(Proxy_a),
|
|
||||||
{ok, {false, {TheEpoch10,_},_,_}} = ?FLU_PC:wedge_status(Proxy_b),
|
|
||||||
{ok, {false, {TheEpoch10,_},_,_}} = ?FLU_PC:wedge_status(Proxy_c),
|
|
||||||
[{ok, #projection_v1{upi=[b,c], repairing=[a]}} =
|
|
||||||
?FLU_PC:read_latest_projection(Pxy, private) || Pxy <- Proxies],
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
|
ok = ?MGR:stop(Ma),
|
||||||
|
ok = ?MGR:stop(Mb),
|
||||||
[ok = ?FLU_PC:quit(X) || X <- Proxies],
|
[ok = ?FLU_PC:quit(X) || X <- Proxies],
|
||||||
machi_test_util:stop_flu_packages()
|
[ok = machi_flu1:stop(X) || X <- FLUs],
|
||||||
|
ok = machi_partition_simulator:stop()
|
||||||
end.
|
end.
|
||||||
|
|
||||||
unanimous_report_test() ->
|
unanimous_report_test() ->
|
||||||
|
|
|
@ -1,68 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_cinfo_test).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
%% smoke_test() will try just dump cluster_info and call each functions
|
|
||||||
|
|
||||||
smoke_test_() ->
|
|
||||||
{setup,
|
|
||||||
fun setup/0,
|
|
||||||
fun cleanup/1,
|
|
||||||
[
|
|
||||||
fun() -> machi_cinfo:public_projection(a) end,
|
|
||||||
fun() -> machi_cinfo:private_projection(a) end,
|
|
||||||
fun() -> machi_cinfo:fitness(a) end,
|
|
||||||
fun() -> machi_cinfo:chain_manager(a) end,
|
|
||||||
fun() -> machi_cinfo:flu1(a) end,
|
|
||||||
fun() -> machi_cinfo:dump() end
|
|
||||||
]}.
|
|
||||||
|
|
||||||
setup() ->
|
|
||||||
machi_cinfo:register(),
|
|
||||||
Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}},
|
|
||||||
{b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}},
|
|
||||||
{c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}}
|
|
||||||
],
|
|
||||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
|
||||||
{ok, SupPid} = machi_sup:start_link(),
|
|
||||||
%% Only run a, don't run b & c so we have 100% failures talking to them
|
|
||||||
[begin
|
|
||||||
#p_srvr{name=Name, port=Port, props=Dir} = P,
|
|
||||||
{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, [])
|
|
||||||
end || {_,P} <- [hd(Ps)]],
|
|
||||||
machi_chain_manager1:set_chain_members(a_chmgr, orddict:from_list(Ps)),
|
|
||||||
{SupPid, Ps}.
|
|
||||||
|
|
||||||
cleanup({SupPid, Ps}) ->
|
|
||||||
exit(SupPid, normal),
|
|
||||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
|
||||||
machi_util:wait_for_death(SupPid, 100),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-endif. % !PULSE
|
|
||||||
-endif. % TEST
|
|
|
@ -32,7 +32,6 @@ smoke_test_() -> {timeout, 1*60, fun() -> smoke_test2() end}.
|
||||||
|
|
||||||
setup_smoke_test(Host, PortBase, Os, Witness_list) ->
|
setup_smoke_test(Host, PortBase, Os, Witness_list) ->
|
||||||
os:cmd("rm -rf ./data.a ./data.b ./data.c"),
|
os:cmd("rm -rf ./data.a ./data.b ./data.c"),
|
||||||
{ok, _} = machi_util:wait_for_life(machi_flu_sup, 100),
|
|
||||||
|
|
||||||
F = fun(X) -> case lists:member(X, Witness_list) of
|
F = fun(X) -> case lists:member(X, Witness_list) of
|
||||||
true ->
|
true ->
|
||||||
|
@ -58,15 +57,9 @@ setup_smoke_test(Host, PortBase, Os, Witness_list) ->
|
||||||
%% 4. Wait until all others are using epoch id from #3.
|
%% 4. Wait until all others are using epoch id from #3.
|
||||||
%%
|
%%
|
||||||
%% Damn, this is a pain to make 100% deterministic, bleh.
|
%% Damn, this is a pain to make 100% deterministic, bleh.
|
||||||
CMode = if Witness_list == [] -> ap_mode;
|
ok = machi_chain_manager1:set_chain_members(a_chmgr, D, Witness_list),
|
||||||
Witness_list /= [] -> cp_mode
|
ok = machi_chain_manager1:set_chain_members(b_chmgr, D, Witness_list),
|
||||||
end,
|
ok = machi_chain_manager1:set_chain_members(c_chmgr, D, Witness_list),
|
||||||
ok = machi_chain_manager1:set_chain_members(a_chmgr, ch0, 0, CMode,
|
|
||||||
D, Witness_list),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(b_chmgr, ch0, 0, CMode,
|
|
||||||
D, Witness_list),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(c_chmgr, ch0, 0, CMode,
|
|
||||||
D, Witness_list),
|
|
||||||
run_ticks([a_chmgr,b_chmgr,c_chmgr]),
|
run_ticks([a_chmgr,b_chmgr,c_chmgr]),
|
||||||
%% Everyone is settled on the same damn epoch id.
|
%% Everyone is settled on the same damn epoch id.
|
||||||
{ok, EpochID} = machi_flu1_client:get_latest_epochid(Host, PortBase+0,
|
{ok, EpochID} = machi_flu1_client:get_latest_epochid(Host, PortBase+0,
|
||||||
|
@ -102,13 +95,11 @@ run_ticks(MgrList) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
smoke_test2() ->
|
smoke_test2() ->
|
||||||
{ok, SupPid} = machi_sup:start_link(),
|
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||||
error_logger:tty(false),
|
error_logger:tty(false),
|
||||||
try
|
try
|
||||||
Prefix = <<"pre">>,
|
Prefix = <<"pre">>,
|
||||||
Chunk1 = <<"yochunk">>,
|
Chunk1 = <<"yochunk">>,
|
||||||
NSInfo = undefined,
|
|
||||||
NoCSum = <<>>,
|
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
PortBase = 64454,
|
PortBase = 64454,
|
||||||
Os = [{ignore_stability_time, true}, {active_mode, false}],
|
Os = [{ignore_stability_time, true}, {active_mode, false}],
|
||||||
|
@ -116,119 +107,95 @@ smoke_test2() ->
|
||||||
|
|
||||||
%% Whew ... ok, now start some damn tests.
|
%% Whew ... ok, now start some damn tests.
|
||||||
{ok, C1} = machi_cr_client:start_link([P || {_,P}<-orddict:to_list(D)]),
|
{ok, C1} = machi_cr_client:start_link([P || {_,P}<-orddict:to_list(D)]),
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, NoCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1),
|
||||||
{ok, {Off1,Size1,File1}} =
|
{ok, {Off1,Size1,File1}} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, NoCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1),
|
||||||
BadCSum = {?CSUM_TAG_CLIENT_SHA, crypto:hash(sha, "foo")},
|
Chunk1_badcs = {<<?CSUM_TAG_CLIENT_SHA:8, 0:(8*20)>>, Chunk1},
|
||||||
{error, bad_checksum} =
|
{error, bad_checksum} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, BadCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1_badcs),
|
||||||
{ok, {[{_, Off1, Chunk1, _}], []}} =
|
{ok, Chunk1} = machi_cr_client:read_chunk(C1, File1, Off1, Size1),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, Off1, Size1, undefined),
|
|
||||||
{ok, PPP} = machi_flu1_client:read_latest_projection(Host, PortBase+0,
|
{ok, PPP} = machi_flu1_client:read_latest_projection(Host, PortBase+0,
|
||||||
private),
|
private),
|
||||||
%% Verify that the client's CR wrote to all of them.
|
%% Verify that the client's CR wrote to all of them.
|
||||||
[{ok, {[{_, Off1, Chunk1, _}], []}} =
|
[{ok, Chunk1} = machi_flu1_client:read_chunk(
|
||||||
machi_flu1_client:read_chunk(
|
Host, PortBase+X, EpochID, File1, Off1, Size1) ||
|
||||||
Host, PortBase+X, NSInfo, EpochID, File1, Off1, Size1, undefined) ||
|
|
||||||
X <- [0,1,2] ],
|
X <- [0,1,2] ],
|
||||||
|
|
||||||
%% Test read repair: Manually write to head, then verify that
|
%% Test read repair: Manually write to head, then verify that
|
||||||
%% read-repair fixes all.
|
%% read-repair fixes all.
|
||||||
FooOff1 = Off1 + (1024*1024),
|
FooOff1 = Off1 + (1024*1024),
|
||||||
[{error, not_written} = machi_flu1_client:read_chunk(
|
[{error, not_written} = machi_flu1_client:read_chunk(
|
||||||
Host, PortBase+X, NSInfo, EpochID,
|
Host, PortBase+X, EpochID,
|
||||||
File1, FooOff1, Size1, undefined) || X <- [0,1,2] ],
|
File1, FooOff1, Size1) || X <- [0,1,2] ],
|
||||||
ok = machi_flu1_client:write_chunk(Host, PortBase+0, NSInfo, EpochID,
|
ok = machi_flu1_client:write_chunk(Host, PortBase+0, EpochID,
|
||||||
File1, FooOff1, Chunk1, NoCSum),
|
File1, FooOff1, Chunk1),
|
||||||
{ok, {[{File1, FooOff1, Chunk1, _}=_YY], []}} =
|
{ok, Chunk1} = machi_cr_client:read_chunk(C1, File1, FooOff1, Size1),
|
||||||
machi_flu1_client:read_chunk(Host, PortBase+0, NSInfo, EpochID,
|
[{X,{ok, Chunk1}} = {X,machi_flu1_client:read_chunk(
|
||||||
File1, FooOff1, Size1, undefined),
|
Host, PortBase+X, EpochID,
|
||||||
{ok, {[{File1, FooOff1, Chunk1, _}], []}} =
|
File1, FooOff1, Size1)} || X <- [0,1,2] ],
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, FooOff1, Size1, undefined),
|
|
||||||
[?assertMatch({X,{ok, {[{_, FooOff1, Chunk1, _}], []}}},
|
|
||||||
{X,machi_flu1_client:read_chunk(
|
|
||||||
Host, PortBase+X, NSInfo, EpochID,
|
|
||||||
File1, FooOff1, Size1, undefined)})
|
|
||||||
|| X <- [0,1,2] ],
|
|
||||||
|
|
||||||
%% Test read repair: Manually write to middle, then same checking.
|
%% Test read repair: Manually write to middle, then same checking.
|
||||||
FooOff2 = Off1 + (2*1024*1024),
|
FooOff2 = Off1 + (2*1024*1024),
|
||||||
Chunk2 = <<"Middle repair chunk">>,
|
Chunk2 = <<"Middle repair chunk">>,
|
||||||
Size2 = size(Chunk2),
|
Size2 = size(Chunk2),
|
||||||
ok = machi_flu1_client:write_chunk(Host, PortBase+1, NSInfo, EpochID,
|
ok = machi_flu1_client:write_chunk(Host, PortBase+1, EpochID,
|
||||||
File1, FooOff2, Chunk2, NoCSum),
|
File1, FooOff2, Chunk2),
|
||||||
{ok, {[{File1, FooOff2, Chunk2, _}], []}} =
|
{ok, Chunk2} = machi_cr_client:read_chunk(C1, File1, FooOff2, Size2),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, FooOff2, Size2, undefined),
|
[{X,{ok, Chunk2}} = {X,machi_flu1_client:read_chunk(
|
||||||
[{X,{ok, {[{File1, FooOff2, Chunk2, _}], []}}} =
|
Host, PortBase+X, EpochID,
|
||||||
{X,machi_flu1_client:read_chunk(
|
File1, FooOff2, Size2)} || X <- [0,1,2] ],
|
||||||
Host, PortBase+X, NSInfo, EpochID,
|
|
||||||
File1, FooOff2, Size2, undefined)} || X <- [0,1,2] ],
|
|
||||||
|
|
||||||
%% Misc API smoke & minor regression checks
|
%% Misc API smoke & minor regression checks
|
||||||
{error, bad_arg} = machi_cr_client:read_chunk(C1, NSInfo, <<"no">>,
|
{error, not_written} = machi_cr_client:read_chunk(C1, <<"no">>,
|
||||||
999999999, 1, undefined),
|
999999999, 1),
|
||||||
{ok, {[{File1,Off1,Chunk1,_}, {File1,FooOff1,Chunk1,_}, {File1,FooOff2,Chunk2,_}],
|
{error, partial_read} = machi_cr_client:read_chunk(C1, File1,
|
||||||
[]}} =
|
Off1, 88888888),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, Off1, 88888888, undefined),
|
|
||||||
%% Checksum list return value is a primitive binary().
|
%% Checksum list return value is a primitive binary().
|
||||||
{ok, KludgeBin} = machi_cr_client:checksum_list(C1, File1),
|
{ok, KludgeBin} = machi_cr_client:checksum_list(C1, File1),
|
||||||
true = is_binary(KludgeBin),
|
true = is_binary(KludgeBin),
|
||||||
|
|
||||||
{error, bad_arg} = machi_cr_client:checksum_list(C1, <<"!!!!">>),
|
{error, no_such_file} = machi_cr_client:checksum_list(C1, <<"!!!!">>),
|
||||||
io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]),
|
%% Exactly one file right now
|
||||||
%% Exactly one file right now, e.g.,
|
|
||||||
%% {ok,[{2098202,<<"pre^b144ef13-db4d-4c9f-96e7-caff02dc754f^1">>}]}
|
|
||||||
{ok, [_]} = machi_cr_client:list_files(C1),
|
{ok, [_]} = machi_cr_client:list_files(C1),
|
||||||
|
|
||||||
%% Go back and test append_chunk() + extra and write_chunk()
|
%% Go back and test append_chunk_extra() and write_chunk()
|
||||||
Chunk10 = <<"It's a different chunk!">>,
|
Chunk10 = <<"It's a different chunk!">>,
|
||||||
Size10 = byte_size(Chunk10),
|
Size10 = byte_size(Chunk10),
|
||||||
Extra10 = 5,
|
Extra10 = 5,
|
||||||
Opts1 = #append_opts{chunk_extra=Extra10*Size10},
|
|
||||||
{ok, {Off10,Size10,File10}} =
|
{ok, {Off10,Size10,File10}} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk10,
|
machi_cr_client:append_chunk_extra(C1, Prefix, Chunk10,
|
||||||
NoCSum, Opts1),
|
Extra10 * Size10),
|
||||||
{ok, {[{_, Off10, Chunk10, _}], []}} =
|
{ok, Chunk10} = machi_cr_client:read_chunk(C1, File10, Off10, Size10),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File10, Off10, Size10, undefined),
|
|
||||||
[begin
|
[begin
|
||||||
Offx = Off10 + (Seq * Size10),
|
Offx = Off10 + (Seq * Size10),
|
||||||
%% TODO: uncomment written/not_written enforcement is available.
|
%% TODO: uncomment written/not_written enforcement is available.
|
||||||
%% {error,not_written} = machi_cr_client:read_chunk(C1, NSInfo, File10,
|
%% {error,not_written} = machi_cr_client:read_chunk(C1, File10,
|
||||||
%% Offx, Size10),
|
%% Offx, Size10),
|
||||||
{ok, {Offx,Size10,File10}} =
|
{ok, {Offx,Size10,File10}} =
|
||||||
machi_cr_client:write_chunk(C1, NSInfo, File10, Offx, Chunk10, NoCSum),
|
machi_cr_client:write_chunk(C1, File10, Offx, Chunk10),
|
||||||
{ok, {[{_, Offx, Chunk10, _}], []}} =
|
{ok, Chunk10} = machi_cr_client:read_chunk(C1, File10, Offx,
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File10, Offx, Size10, undefined)
|
Size10)
|
||||||
end || Seq <- lists:seq(1, Extra10)],
|
end || Seq <- lists:seq(1, Extra10)],
|
||||||
{ok, {Off11,Size11,File11}} =
|
{ok, {Off11,Size11,File11}} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk10, NoCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk10),
|
||||||
%% %% Double-check that our reserved extra bytes were really honored!
|
%% Double-check that our reserved extra bytes were really honored!
|
||||||
%% true = (Off11 > (Off10 + (Extra10 * Size10))),
|
true = (Off11 > (Off10 + (Extra10 * Size10))),
|
||||||
io:format(user, "\nFiles = ~p\n", [machi_cr_client:list_files(C1)]),
|
|
||||||
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
exit(SupPid, normal),
|
|
||||||
machi_util:wait_for_death(SupPid, 100),
|
|
||||||
error_logger:tty(true),
|
error_logger:tty(true),
|
||||||
catch application:stop(machi)
|
catch application:stop(machi),
|
||||||
|
exit(SupPid, normal)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
witness_smoke_test_() -> {timeout, 1*60, fun() -> witness_smoke_test2() end}.
|
witness_smoke_test_() -> {timeout, 1*60, fun() -> witness_smoke_test2() end}.
|
||||||
|
|
||||||
witness_smoke_test2() ->
|
witness_smoke_test2() ->
|
||||||
SupPid = case machi_sup:start_link() of
|
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||||
{ok, P} -> P;
|
error_logger:tty(false),
|
||||||
{error, {already_started, P1}} -> P1;
|
|
||||||
Other -> error(Other)
|
|
||||||
end,
|
|
||||||
%% TODO: I wonder why commenting this out makes this test pass
|
|
||||||
%% error_logger:tty(true),
|
|
||||||
try
|
try
|
||||||
Prefix = <<"pre">>,
|
Prefix = <<"pre">>,
|
||||||
Chunk1 = <<"yochunk">>,
|
Chunk1 = <<"yochunk">>,
|
||||||
NSInfo = undefined,
|
|
||||||
NoCSum = <<>>,
|
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
PortBase = 64444,
|
PortBase = 64444,
|
||||||
Os = [{ignore_stability_time, true}, {active_mode, false},
|
Os = [{ignore_stability_time, true}, {active_mode, false},
|
||||||
|
@ -238,46 +205,31 @@ witness_smoke_test2() ->
|
||||||
|
|
||||||
%% Whew ... ok, now start some damn tests.
|
%% Whew ... ok, now start some damn tests.
|
||||||
{ok, C1} = machi_cr_client:start_link([P || {_,P}<-orddict:to_list(D)]),
|
{ok, C1} = machi_cr_client:start_link([P || {_,P}<-orddict:to_list(D)]),
|
||||||
{ok, _} = machi_cr_client:append_chunk(C1, NSInfo, Prefix,
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1),
|
||||||
Chunk1, NoCSum),
|
|
||||||
{ok, {Off1,Size1,File1}} =
|
{ok, {Off1,Size1,File1}} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, NoCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1),
|
||||||
BadCSum = {?CSUM_TAG_CLIENT_SHA, crypto:hash(sha, "foo")},
|
Chunk1_badcs = {<<?CSUM_TAG_CLIENT_SHA:8, 0:(8*20)>>, Chunk1},
|
||||||
{error, bad_checksum} =
|
{error, bad_checksum} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, BadCSum),
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1_badcs),
|
||||||
{ok, {[{_, Off1, Chunk1, _}], []}} =
|
{ok, Chunk1} = machi_cr_client:read_chunk(C1, File1, Off1, Size1),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, Off1, Size1, undefined),
|
|
||||||
|
|
||||||
%% Stop 'b' and let the chain reset.
|
%% Stop 'b' and let the chain reset.
|
||||||
ok = machi_flu_psup:stop_flu_package(b),
|
ok = machi_flu_psup:stop_flu_package(b),
|
||||||
%% ok = machi_fitness:add_admin_down(a_fitness, admin_down_bogus_flu, [{why,because}]),
|
run_ticks([a_chmgr,c_chmgr]),
|
||||||
%% ok = machi_fitness:delete_admin_down(a_fitness, admin_down_bogus_flu),
|
|
||||||
%% Run ticks enough times to force auto-unwedge of both a & c.
|
|
||||||
[run_ticks([a_chmgr,c_chmgr]) || _ <- [1,2,3,4] ],
|
|
||||||
|
|
||||||
%% The chain should now be [a,c].
|
%% The chain should now be [a,c].
|
||||||
%% Let's wedge OurWitness and see what happens: timeout/partition.
|
%% Let's wedge OurWitness and see what happens: timeout/partition.
|
||||||
#p_srvr{name=WitName, address=WitA, port=WitP} =
|
#p_srvr{name=WitName, address=WitA, port=WitP} =
|
||||||
orddict:fetch(OurWitness, D),
|
orddict:fetch(OurWitness, D),
|
||||||
{ok, {false, EpochID2,_,_}} = machi_flu1_client:wedge_status(WitA, WitP),
|
{ok, {false, EpochID2}} = machi_flu1_client:wedge_status(WitA, WitP),
|
||||||
machi_flu1:wedge_myself(WitName, EpochID2),
|
machi_flu1:wedge_myself(WitName, EpochID2),
|
||||||
case machi_flu1_client:wedge_status(WitA, WitP) of
|
{ok, {true, EpochID2}} = machi_flu1_client:wedge_status(WitA, WitP),
|
||||||
{ok, {true, EpochID2,_,_}} ->
|
|
||||||
ok;
|
|
||||||
{ok, {false, EpochID2,_,_}} ->
|
|
||||||
%% This is racy. Work around it by sleeping a while.
|
|
||||||
timer:sleep(6*1000),
|
|
||||||
{ok, {true, EpochID2,_,_}} =
|
|
||||||
machi_flu1_client:wedge_status(WitA, WitP)
|
|
||||||
end,
|
|
||||||
|
|
||||||
%% Chunk1 is still readable: not affected by wedged witness head.
|
%% Chunk1 is still readable: not affected by wedged witness head.
|
||||||
{ok, {[{_, Off1, Chunk1, _}], []}} =
|
{ok, Chunk1} = machi_cr_client:read_chunk(C1, File1, Off1, Size1),
|
||||||
machi_cr_client:read_chunk(C1, NSInfo, File1, Off1, Size1, undefined),
|
|
||||||
%% But because the head is wedged, an append will fail.
|
%% But because the head is wedged, an append will fail.
|
||||||
{error, partition} =
|
{error, partition} =
|
||||||
machi_cr_client:append_chunk(C1, NSInfo, Prefix, Chunk1, NoCSum,
|
machi_cr_client:append_chunk(C1, Prefix, Chunk1, 1*1000),
|
||||||
#append_opts{}, 1*1000),
|
|
||||||
|
|
||||||
%% The witness's wedge status should cause timeout/partition
|
%% The witness's wedge status should cause timeout/partition
|
||||||
%% for write_chunk also.
|
%% for write_chunk also.
|
||||||
|
@ -286,7 +238,7 @@ witness_smoke_test2() ->
|
||||||
File10 = File1,
|
File10 = File1,
|
||||||
Offx = Off1 + (1 * Size10),
|
Offx = Off1 + (1 * Size10),
|
||||||
{error, partition} =
|
{error, partition} =
|
||||||
machi_cr_client:write_chunk(C1, NSInfo, File10, Offx, Chunk10, NoCSum, 1*1000),
|
machi_cr_client:write_chunk(C1, File10, Offx, Chunk10, 1*1000),
|
||||||
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
|
|
|
@ -1,131 +0,0 @@
|
||||||
-module(machi_csum_table_test).
|
|
||||||
-compile(export_all).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-define(HDR, {0, 1024, none}).
|
|
||||||
|
|
||||||
cleanup(Dir) ->
|
|
||||||
os:cmd("rm -rf " ++ Dir).
|
|
||||||
|
|
||||||
smoke_test() ->
|
|
||||||
Filename = "./temp-checksum-dumb-file",
|
|
||||||
_ = cleanup(Filename),
|
|
||||||
{ok, MC} = machi_csum_table:open(Filename, []),
|
|
||||||
?assertEqual([{1024, infinity}],
|
|
||||||
machi_csum_table:calc_unwritten_bytes(MC)),
|
|
||||||
Entry = {Offset, Size, Checksum} = {1064, 34, <<"deadbeef">>},
|
|
||||||
[] = machi_csum_table:find(MC, Offset, Size),
|
|
||||||
ok = machi_csum_table:write(MC, Offset, Size, Checksum),
|
|
||||||
[{1024, 40}, {1098, infinity}] = machi_csum_table:calc_unwritten_bytes(MC),
|
|
||||||
?assertEqual([Entry], machi_csum_table:find(MC, Offset, Size)),
|
|
||||||
ok = machi_csum_table:trim(MC, Offset, Size, undefined, undefined),
|
|
||||||
?assertEqual([{Offset, Size, trimmed}],
|
|
||||||
machi_csum_table:find(MC, Offset, Size)),
|
|
||||||
ok = machi_csum_table:close(MC),
|
|
||||||
ok = machi_csum_table:delete(MC).
|
|
||||||
|
|
||||||
close_test() ->
|
|
||||||
Filename = "./temp-checksum-dumb-file-2",
|
|
||||||
_ = cleanup(Filename),
|
|
||||||
{ok, MC} = machi_csum_table:open(Filename, []),
|
|
||||||
Entry = {Offset, Size, Checksum} = {1064, 34, <<"deadbeef">>},
|
|
||||||
[] = machi_csum_table:find(MC, Offset, Size),
|
|
||||||
ok = machi_csum_table:write(MC, Offset, Size, Checksum),
|
|
||||||
[Entry] = machi_csum_table:find(MC, Offset, Size),
|
|
||||||
ok = machi_csum_table:close(MC),
|
|
||||||
|
|
||||||
{ok, MC2} = machi_csum_table:open(Filename, []),
|
|
||||||
[Entry] = machi_csum_table:find(MC2, Offset, Size),
|
|
||||||
ok = machi_csum_table:trim(MC2, Offset, Size, undefined, undefined),
|
|
||||||
[{Offset, Size, trimmed}] = machi_csum_table:find(MC2, Offset, Size),
|
|
||||||
ok = machi_csum_table:delete(MC2).
|
|
||||||
|
|
||||||
smoke2_test() ->
|
|
||||||
Filename = "./temp-checksum-dumb-file-3",
|
|
||||||
_ = cleanup(Filename),
|
|
||||||
{ok, MC} = machi_csum_table:open(Filename, []),
|
|
||||||
Entry = {Offset, Size, Checksum} = {1025, 10, <<"deadbeef">>},
|
|
||||||
ok = machi_csum_table:write(MC, Offset, Size, Checksum),
|
|
||||||
?assertEqual([], machi_csum_table:find(MC, 0, 0)),
|
|
||||||
?assertEqual([?HDR], machi_csum_table:find(MC, 0, 1)),
|
|
||||||
[Entry] = machi_csum_table:find(MC, Offset, Size),
|
|
||||||
[?HDR] = machi_csum_table:find(MC, 1, 1024),
|
|
||||||
?assertEqual([?HDR, Entry],
|
|
||||||
machi_csum_table:find(MC, 1023, 1024)),
|
|
||||||
[Entry] = machi_csum_table:find(MC, 1024, 1024),
|
|
||||||
[Entry] = machi_csum_table:find(MC, 1025, 1024),
|
|
||||||
|
|
||||||
ok = machi_csum_table:trim(MC, Offset, Size, undefined, undefined),
|
|
||||||
[{Offset, Size, trimmed}] = machi_csum_table:find(MC, Offset, Size),
|
|
||||||
ok = machi_csum_table:close(MC),
|
|
||||||
ok = machi_csum_table:delete(MC).
|
|
||||||
|
|
||||||
smoke3_test() ->
|
|
||||||
Filename = "./temp-checksum-dumb-file-4",
|
|
||||||
_ = cleanup(Filename),
|
|
||||||
{ok, MC} = machi_csum_table:open(Filename, []),
|
|
||||||
Scenario =
|
|
||||||
[%% Command, {Offset, Size, Csum}, LeftNeighbor, RightNeibor
|
|
||||||
{?LINE, write, {2000, 10, <<"heh">>}, undefined, undefined},
|
|
||||||
{?LINE, write, {3000, 10, <<"heh">>}, undefined, undefined},
|
|
||||||
{?LINE, write, {4000, 10, <<"heh2">>}, undefined, undefined},
|
|
||||||
{?LINE, write, {4000, 10, <<"heh2">>}, undefined, undefined},
|
|
||||||
{?LINE, write, {4005, 10, <<"heh3">>}, {4000, 5, <<"heh2">>}, undefined},
|
|
||||||
{?LINE, write, {4005, 10, <<"heh3">>}, undefined, undefined},
|
|
||||||
{?LINE, trim, {3005, 10, <<>>}, {3000, 5, <<"heh">>}, undefined},
|
|
||||||
{?LINE, trim, {2000, 10, <<>>}, undefined, undefined},
|
|
||||||
{?LINE, trim, {2005, 5, <<>>}, {2000, 5, trimmed}, undefined},
|
|
||||||
{?LINE, trim, {3000, 5, <<>>}, undefined, undefined},
|
|
||||||
{?LINE, trim, {4000, 10, <<>>}, undefined, {4010, 5, <<"heh3">>}},
|
|
||||||
{?LINE, trim, {4010, 5, <<>>}, undefined, undefined},
|
|
||||||
{?LINE, trim, {0, 1024, <<>>}, undefined, undefined}
|
|
||||||
],
|
|
||||||
[ begin
|
|
||||||
%% ?debugVal({_Line, Chunk}),
|
|
||||||
{Offset, Size, Csum} = Chunk,
|
|
||||||
?assertEqual(LeftN0,
|
|
||||||
machi_csum_table:find_leftneighbor(MC, Offset)),
|
|
||||||
?assertEqual(RightN0,
|
|
||||||
machi_csum_table:find_rightneighbor(MC, Offset+Size)),
|
|
||||||
LeftN = case LeftN0 of
|
|
||||||
{OffsL, SizeL, trimmed} -> {OffsL, SizeL, trimmed};
|
|
||||||
{OffsL, SizeL, _} -> {OffsL, SizeL, <<"boom">>};
|
|
||||||
OtherL -> OtherL
|
|
||||||
end,
|
|
||||||
RightN = case RightN0 of
|
|
||||||
{OffsR, SizeR, _} -> {OffsR, SizeR, <<"boot">>};
|
|
||||||
OtherR -> OtherR
|
|
||||||
end,
|
|
||||||
case Cmd of
|
|
||||||
write ->
|
|
||||||
ok = machi_csum_table:write(MC, Offset, Size, Csum,
|
|
||||||
LeftN, RightN);
|
|
||||||
trim ->
|
|
||||||
ok = machi_csum_table:trim(MC, Offset, Size,
|
|
||||||
LeftN, RightN)
|
|
||||||
end
|
|
||||||
end || {_Line, Cmd, Chunk, LeftN0, RightN0} <- Scenario ],
|
|
||||||
?assert(not machi_csum_table:all_trimmed(MC, 10000)),
|
|
||||||
machi_csum_table:trim(MC, 0, 10000, undefined, undefined),
|
|
||||||
?assert(machi_csum_table:all_trimmed(MC, 10000)),
|
|
||||||
|
|
||||||
ok = machi_csum_table:close(MC),
|
|
||||||
ok = machi_csum_table:delete(MC).
|
|
||||||
|
|
||||||
%% TODO: add quickcheck test here
|
|
||||||
|
|
||||||
%% Previous implementation
|
|
||||||
-spec all_trimmed2(machi_csum_table:table(),
|
|
||||||
non_neg_integer(), non_neg_integer()) -> boolean().
|
|
||||||
all_trimmed2(CsumT, Left, Right) ->
|
|
||||||
Chunks = machi_csum_table:find(CsumT, Left, Right),
|
|
||||||
runthru(Chunks, Left, Right).
|
|
||||||
|
|
||||||
%% @doc make sure all trimmed chunks are continously chained
|
|
||||||
%% TODO: test with EQC
|
|
||||||
runthru([], Pos, Pos) -> true;
|
|
||||||
runthru([], Pos0, Pos) when Pos0 < Pos -> false;
|
|
||||||
runthru([{Offset0, Size0, trimmed}|T], Offset, Pos) when Offset0 =< Offset ->
|
|
||||||
runthru(T, Offset0+Size0, Pos);
|
|
||||||
runthru(_L, _O, _P) ->
|
|
||||||
false.
|
|
|
@ -1,478 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_file_proxy_eqc).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-ifdef(EQC).
|
|
||||||
-compile(export_all).
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include_lib("eqc/include/eqc.hrl").
|
|
||||||
-include_lib("eqc/include/eqc_statem.hrl").
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-define(QC_OUT(P),
|
|
||||||
eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)).
|
|
||||||
|
|
||||||
-define(TESTDIR, "./eqc").
|
|
||||||
|
|
||||||
%% EUNIT TEST DEFINITION
|
|
||||||
eqc_test_() ->
|
|
||||||
PropTimeout = case os:getenv("EQC_TIME") of
|
|
||||||
false -> 30;
|
|
||||||
V -> list_to_integer(V)
|
|
||||||
end,
|
|
||||||
{timeout, PropTimeout*2 + 30,
|
|
||||||
{spawn,
|
|
||||||
[
|
|
||||||
?_assertEqual(true, eqc:quickcheck(eqc:testing_time(PropTimeout, ?QC_OUT(prop_ok()))))
|
|
||||||
]
|
|
||||||
}}.
|
|
||||||
|
|
||||||
%% SHELL HELPERS
|
|
||||||
test() ->
|
|
||||||
test(100).
|
|
||||||
|
|
||||||
test(N) ->
|
|
||||||
quickcheck(numtests(N, prop_ok())).
|
|
||||||
|
|
||||||
check() ->
|
|
||||||
check(prop_ok(), current_counterexample()).
|
|
||||||
|
|
||||||
%% GENERATORS
|
|
||||||
|
|
||||||
csum_type() ->
|
|
||||||
elements([?CSUM_TAG_NONE, ?CSUM_TAG_CLIENT_SHA, ?CSUM_TAG_SERVER_SHA]).
|
|
||||||
|
|
||||||
csum(Type, Binary) ->
|
|
||||||
case Type of
|
|
||||||
?CSUM_TAG_NONE -> <<>>;
|
|
||||||
_ -> machi_util:checksum_chunk(Binary)
|
|
||||||
end.
|
|
||||||
|
|
||||||
position(P) ->
|
|
||||||
?LET(O, offset(), P + O).
|
|
||||||
|
|
||||||
offset() ->
|
|
||||||
?SUCHTHAT(X, int(), X >= 0).
|
|
||||||
|
|
||||||
offset_base() ->
|
|
||||||
elements([4096, 6144, 7168, 8192, 20480, 100000, 1000000]).
|
|
||||||
|
|
||||||
big_offset() ->
|
|
||||||
?LET(P, int(), ?LET(X, offset_base(), P+X)).
|
|
||||||
|
|
||||||
len() ->
|
|
||||||
?SUCHTHAT(X, int(), X >= 1).
|
|
||||||
|
|
||||||
data_with_csum() ->
|
|
||||||
?LET({B,T},{eqc_gen:largebinary(), csum_type()}, {B,T, csum(T, B)}).
|
|
||||||
%?LET({B,T},{eqc_gen:binary(), csum_type()}, {B,T, csum(T, B)}).
|
|
||||||
|
|
||||||
data_with_csum(Limit) ->
|
|
||||||
%?LET({B,T},{?LET(S, Limit, eqc_gen:largebinary(S)), csum_type()}, {B,T, csum(T, B)}).
|
|
||||||
?LET({B,T},{?LET(S, Limit, eqc_gen:binary(S)), csum_type()}, {B,T, csum(T, B)}).
|
|
||||||
|
|
||||||
intervals([]) ->
|
|
||||||
[];
|
|
||||||
intervals([N]) ->
|
|
||||||
[{N, choose(1,1)}];
|
|
||||||
intervals([A,B|T]) ->
|
|
||||||
[{A, oneof([choose(1, B-A), B-A])}|intervals([B|T])].
|
|
||||||
|
|
||||||
interval_list() ->
|
|
||||||
?LET(L,
|
|
||||||
oneof([list(choose(1025, 1033)), list(choose(1024, 4096))]),
|
|
||||||
intervals(lists:usort(L))).
|
|
||||||
|
|
||||||
shuffle_interval() ->
|
|
||||||
?LET(L, interval_list(), shuffle(L)).
|
|
||||||
|
|
||||||
get_written_interval(L) ->
|
|
||||||
?LET({O, Ln}, elements(L), {O+1, Ln-1}).
|
|
||||||
|
|
||||||
%% INITIALIZATION
|
|
||||||
|
|
||||||
-record(state, {pid, prev_extra = 0,
|
|
||||||
filename = undefined,
|
|
||||||
planned_writes=[],
|
|
||||||
planned_trims=[],
|
|
||||||
written=[],
|
|
||||||
trimmed=[]}).
|
|
||||||
|
|
||||||
initial_state() ->
|
|
||||||
{_, _, MS} = os:timestamp(),
|
|
||||||
Filename = test_server:temp_name("eqc_data") ++ "." ++ integer_to_list(MS),
|
|
||||||
#state{filename=Filename, written=[{0,1024}]}.
|
|
||||||
|
|
||||||
initial_state(I, T) ->
|
|
||||||
S=initial_state(),
|
|
||||||
S#state{written=[{0,1024}],
|
|
||||||
planned_writes=I,
|
|
||||||
planned_trims=T}.
|
|
||||||
|
|
||||||
weight(_S, rewrite) -> 1;
|
|
||||||
weight(_S, _) -> 2.
|
|
||||||
|
|
||||||
%% HELPERS
|
|
||||||
|
|
||||||
get_overlaps(_Offset, _Len, [], Acc) -> lists:reverse(Acc);
|
|
||||||
get_overlaps(Offset, Len, [{Pos, Sz} = Ck|T], Acc0)
|
|
||||||
%% Overlap judgement differnt from the one in machi_csum_table
|
|
||||||
%% [a=Offset, b), [x=Pos, y) ...
|
|
||||||
when
|
|
||||||
%% a =< x && x < b && b =< y
|
|
||||||
(Offset =< Pos andalso Pos < Offset + Len andalso Offset + Len =< Pos + Sz) orelse
|
|
||||||
%% a =< x && y < b
|
|
||||||
(Offset =< Pos andalso Pos + Sz < Offset + Len) orelse
|
|
||||||
%% x < a && a < y && y =< b
|
|
||||||
(Pos < Offset andalso Offset < Pos + Sz andalso Pos + Sz =< Offset + Len) orelse
|
|
||||||
%% x < a && b < y
|
|
||||||
(Pos < Offset + Len andalso Offset + Len < Pos + Sz) ->
|
|
||||||
get_overlaps(Offset, Len, T, [Ck|Acc0]);
|
|
||||||
get_overlaps(Offset, Len, [_Ck|T], Acc0) ->
|
|
||||||
get_overlaps(Offset, Len, T, Acc0).
|
|
||||||
|
|
||||||
%% Inefficient but simple easy code to verify by eyes - returns all
|
|
||||||
%% bytes that fits in (Offset, Len)
|
|
||||||
chop(Offset, Len, List) ->
|
|
||||||
ChopLeft = fun({Pos, Sz}) when Pos < Offset andalso Offset =< Pos + Sz ->
|
|
||||||
{Offset, Sz + Pos - Offset};
|
|
||||||
({Pos, Sz}) when Offset =< Pos andalso Pos + Sz < Offset + Len ->
|
|
||||||
{Pos, Sz};
|
|
||||||
({Pos, _Sz}) when Offset =< Pos ->
|
|
||||||
{Pos, Offset + Len - Pos}
|
|
||||||
end,
|
|
||||||
ChopRight = fun({Pos, Sz}) when Offset + Len < Pos + Sz ->
|
|
||||||
{Pos, Offset + Len - Pos};
|
|
||||||
({Pos, Sz}) ->
|
|
||||||
{Pos, Sz}
|
|
||||||
end,
|
|
||||||
Filter0 = fun({_, 0}) -> false;
|
|
||||||
(Other) -> {true, Other} end,
|
|
||||||
lists:filtermap(fun(E) -> Filter0(ChopRight(ChopLeft(E))) end,
|
|
||||||
List).
|
|
||||||
|
|
||||||
%% Returns all bytes that are at left side of the Offset
|
|
||||||
chopped_left(_Offset, []) -> undefined;
|
|
||||||
chopped_left(Offset, [{Pos,_Sz}|_]) when Pos < Offset ->
|
|
||||||
{Pos, Offset - Pos};
|
|
||||||
chopped_left(_, _) ->
|
|
||||||
undefined.
|
|
||||||
|
|
||||||
chopped_right(_Offset, []) -> undefined;
|
|
||||||
chopped_right(Offset, List) ->
|
|
||||||
{Pos, Sz} = lists:last(List),
|
|
||||||
if Offset < Pos + Sz ->
|
|
||||||
{Offset, Pos + Sz - Offset};
|
|
||||||
true ->
|
|
||||||
undefined
|
|
||||||
end.
|
|
||||||
|
|
||||||
cleanup_chunk(Offset, Length, ChunkList) ->
|
|
||||||
Overlaps = get_overlaps(Offset, Length, ChunkList, []),
|
|
||||||
NewCL0 = lists:foldl(fun lists:delete/2,
|
|
||||||
ChunkList, Overlaps),
|
|
||||||
NewCL1 = case chopped_left(Offset, Overlaps) of
|
|
||||||
undefined -> NewCL0;
|
|
||||||
LeftRemain -> [LeftRemain|NewCL0]
|
|
||||||
end,
|
|
||||||
NewCL2 = case chopped_right(Offset+Length, Overlaps) of
|
|
||||||
undefined -> NewCL1;
|
|
||||||
RightRemain -> [RightRemain|NewCL1]
|
|
||||||
end,
|
|
||||||
lists:sort(NewCL2).
|
|
||||||
|
|
||||||
is_error({error, _}) -> true;
|
|
||||||
is_error({error, _, _}) -> true;
|
|
||||||
is_error(Other) -> {expected_ERROR, Other}.
|
|
||||||
|
|
||||||
is_ok({ok, _, _}) -> true;
|
|
||||||
is_ok(ok) -> true;
|
|
||||||
is_ok(Other) -> {expected_OK, Other}.
|
|
||||||
|
|
||||||
get_offset({ok, _Filename, Offset}) -> Offset;
|
|
||||||
get_offset(_) -> error(badarg).
|
|
||||||
|
|
||||||
last_byte([]) -> 0;
|
|
||||||
last_byte(L0) ->
|
|
||||||
L1 = lists:map(fun({Pos, Sz}) -> Pos + Sz end, L0),
|
|
||||||
lists:last(lists:sort(L1)).
|
|
||||||
|
|
||||||
cleanup() ->
|
|
||||||
[begin
|
|
||||||
Fs = filelib:wildcard(?TESTDIR ++ Glob),
|
|
||||||
[file:delete(F) || F <- Fs],
|
|
||||||
[file:del_dir(F) || F <- Fs]
|
|
||||||
end || Glob <- ["*/*/*/*", "*/*/*", "*/*", "*"] ],
|
|
||||||
_ = file:del_dir(?TESTDIR),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
%% start
|
|
||||||
|
|
||||||
start_pre(S) ->
|
|
||||||
S#state.pid =:= undefined.
|
|
||||||
|
|
||||||
start_command(S) ->
|
|
||||||
{call, ?MODULE, start, [S]}.
|
|
||||||
|
|
||||||
start(#state{filename=File}) ->
|
|
||||||
{ok, Pid} = machi_file_proxy:start_link(some_flu, File, ?TESTDIR),
|
|
||||||
unlink(Pid),
|
|
||||||
Pid.
|
|
||||||
|
|
||||||
start_next(S, Pid, _) ->
|
|
||||||
S#state{pid = Pid}.
|
|
||||||
|
|
||||||
%% read
|
|
||||||
|
|
||||||
read_pre(S) ->
|
|
||||||
S#state.pid /= undefined.
|
|
||||||
|
|
||||||
read_args(S) ->
|
|
||||||
[S#state.pid, oneof([offset(), big_offset()]), len()].
|
|
||||||
|
|
||||||
read_post(S, [_Pid, Off, L], Res) ->
|
|
||||||
Written = get_overlaps(Off, L, S#state.written, []),
|
|
||||||
Chopped = chop(Off, L, Written),
|
|
||||||
Trimmed = get_overlaps(Off, L, S#state.trimmed, []),
|
|
||||||
Eof = lists:max([Pos+Sz||{Pos,Sz}<-S#state.written]),
|
|
||||||
case Res of
|
|
||||||
{ok, {Written0, Trimmed0}} ->
|
|
||||||
Written1 = lists:map(fun({_, Pos, Chunk, _}) ->
|
|
||||||
{Pos, iolist_size(Chunk)}
|
|
||||||
end, Written0),
|
|
||||||
Trimmed1 = lists:map(fun({_, Pos, Sz}) -> {Pos, Sz} end, Trimmed0),
|
|
||||||
Chopped =:= Written1
|
|
||||||
andalso Trimmed =:= Trimmed1;
|
|
||||||
%% TODO: such response are ugly, rethink the SPEC
|
|
||||||
{error, not_written} when Eof < Off + L ->
|
|
||||||
true;
|
|
||||||
{error, not_written} when Chopped =:= [] andalso Trimmed =:= [] ->
|
|
||||||
true;
|
|
||||||
_Other ->
|
|
||||||
is_error(Res)
|
|
||||||
end.
|
|
||||||
|
|
||||||
read_next(S, _Res, _Args) -> S.
|
|
||||||
|
|
||||||
read(Pid, Offset, Length) ->
|
|
||||||
machi_file_proxy:read(Pid, Offset, Length, [{needs_trimmed, true}]).
|
|
||||||
|
|
||||||
%% write
|
|
||||||
|
|
||||||
write_pre(S) ->
|
|
||||||
S#state.pid /= undefined andalso S#state.planned_writes /= [].
|
|
||||||
|
|
||||||
%% do not allow writes with empty data
|
|
||||||
write_pre(_S, [_Pid, _Extra, {<<>>, _Tag, _Csum}]) ->
|
|
||||||
?assert(false),
|
|
||||||
false;
|
|
||||||
write_pre(_S, _Args) ->
|
|
||||||
true.
|
|
||||||
|
|
||||||
write_args(S) ->
|
|
||||||
{Off, Len} = hd(S#state.planned_writes),
|
|
||||||
[S#state.pid, Off, data_with_csum(Len)].
|
|
||||||
|
|
||||||
write_post(S, [_Pid, Off, {Bin, _Tag, _Csum}] = _Args, Res) ->
|
|
||||||
Size = iolist_size(Bin),
|
|
||||||
case {get_overlaps(Off, Size, S#state.written, []),
|
|
||||||
get_overlaps(Off, Size, S#state.trimmed, [])} of
|
|
||||||
{[], []} ->
|
|
||||||
%% No overlap neither with written ranges nor trimmed
|
|
||||||
%% ranges; OK to write things.
|
|
||||||
eq(Res, ok);
|
|
||||||
{_, _} ->
|
|
||||||
%% overlap found in either or both at written or at
|
|
||||||
%% trimmed ranges; can't write.
|
|
||||||
is_error(Res)
|
|
||||||
end.
|
|
||||||
|
|
||||||
write_next(S, Res, [_Pid, Offset, {Bin, _Tag, _Csum}]) ->
|
|
||||||
S0 = case is_ok(Res) of
|
|
||||||
true ->
|
|
||||||
S#state{written = lists:sort(S#state.written ++ [{Offset, iolist_size(Bin)}]) };
|
|
||||||
_ ->
|
|
||||||
S
|
|
||||||
end,
|
|
||||||
S0#state{prev_extra = 0, planned_writes=tl(S0#state.planned_writes)}.
|
|
||||||
|
|
||||||
|
|
||||||
write(Pid, Offset, {Bin, Tag, Csum}) ->
|
|
||||||
Meta = [{client_csum_tag, Tag},
|
|
||||||
{client_csum, Csum}],
|
|
||||||
machi_file_proxy:write(Pid, Offset, Meta, Bin).
|
|
||||||
|
|
||||||
%% append
|
|
||||||
|
|
||||||
append_pre(S) ->
|
|
||||||
S#state.pid /= undefined.
|
|
||||||
|
|
||||||
%% do not allow appends with empty binary data
|
|
||||||
append_pre(_S, [_Pid, _Extra, {<<>>, _Tag, _Csum}]) ->
|
|
||||||
false;
|
|
||||||
append_pre(_S, _Args) ->
|
|
||||||
true.
|
|
||||||
|
|
||||||
append_args(S) ->
|
|
||||||
[S#state.pid, default(0, len()), data_with_csum()].
|
|
||||||
|
|
||||||
append(Pid, Extra, {Bin, Tag, Csum}) ->
|
|
||||||
Meta = [{client_csum_tag, Tag},
|
|
||||||
{client_csum, Csum}],
|
|
||||||
machi_file_proxy:append(Pid, Meta, Extra, Bin).
|
|
||||||
|
|
||||||
append_next(S, Res, [_Pid, Extra, {Bin, _Tag, _Csum}]) ->
|
|
||||||
case is_ok(Res) of
|
|
||||||
true ->
|
|
||||||
Offset = get_offset(Res),
|
|
||||||
S#state{prev_extra = Extra,
|
|
||||||
written = lists:sort(S#state.written ++ [{Offset, iolist_size(Bin)}])};
|
|
||||||
_Other ->
|
|
||||||
S
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% appends should always succeed unless the disk is full
|
|
||||||
%% or there's a hardware failure.
|
|
||||||
append_post(S, _Args, Res) ->
|
|
||||||
case is_ok(Res) of
|
|
||||||
true ->
|
|
||||||
Offset = get_offset(Res),
|
|
||||||
case erlang:max(last_byte(S#state.written),
|
|
||||||
last_byte(S#state.trimmed)) + S#state.prev_extra of
|
|
||||||
Offset ->
|
|
||||||
true;
|
|
||||||
UnexpectedByte ->
|
|
||||||
{wrong_offset_after_append,
|
|
||||||
{Offset, UnexpectedByte},
|
|
||||||
{S#state.written, S#state.prev_extra}}
|
|
||||||
end;
|
|
||||||
Error ->
|
|
||||||
Error
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% rewrite
|
|
||||||
|
|
||||||
rewrite_pre(S) ->
|
|
||||||
S#state.pid /= undefined andalso
|
|
||||||
(S#state.written ++ S#state.trimmed) /= [] .
|
|
||||||
|
|
||||||
rewrite_args(S) ->
|
|
||||||
?LET({Off, Len},
|
|
||||||
get_written_interval(S#state.written ++ S#state.trimmed),
|
|
||||||
[S#state.pid, Off, data_with_csum(Len)]).
|
|
||||||
|
|
||||||
rewrite(Pid, Offset, {Bin, Tag, Csum}) ->
|
|
||||||
Meta = [{client_csum_tag, Tag},
|
|
||||||
{client_csum, Csum}],
|
|
||||||
machi_file_proxy:write(Pid, Offset, Meta, Bin).
|
|
||||||
|
|
||||||
rewrite_post(_S, _Args, Res) ->
|
|
||||||
is_error(Res).
|
|
||||||
|
|
||||||
rewrite_next(S, _Res, _Args) ->
|
|
||||||
S#state{prev_extra = 0}.
|
|
||||||
|
|
||||||
%% trim
|
|
||||||
|
|
||||||
trim_pre(S) ->
|
|
||||||
S#state.pid /= undefined andalso S#state.planned_trims /= [].
|
|
||||||
|
|
||||||
trim_args(S) ->
|
|
||||||
{Offset, Length} = hd(S#state.planned_trims),
|
|
||||||
[S#state.pid, Offset, Length].
|
|
||||||
|
|
||||||
trim(Pid, Offset, Length) ->
|
|
||||||
machi_file_proxy:trim(Pid, Offset, Length, false).
|
|
||||||
|
|
||||||
trim_post(_S, [_Pid, _Offset, _Length], ok) ->
|
|
||||||
true;
|
|
||||||
trim_post(_S, [_Pid, _Offset, _Length], _Res) ->
|
|
||||||
false.
|
|
||||||
|
|
||||||
trim_next(S, Res, [_Pid, Offset, Length]) ->
|
|
||||||
S1 = case is_ok(Res) of
|
|
||||||
true ->
|
|
||||||
NewWritten = cleanup_chunk(Offset, Length, S#state.written),
|
|
||||||
Trimmed1 = cleanup_chunk(Offset, Length, S#state.trimmed),
|
|
||||||
NewTrimmed = lists:sort([{Offset, Length}|Trimmed1]),
|
|
||||||
S#state{trimmed=NewTrimmed,
|
|
||||||
written=NewWritten};
|
|
||||||
_Other ->
|
|
||||||
S
|
|
||||||
end,
|
|
||||||
S1#state{prev_extra=0,
|
|
||||||
planned_trims=tl(S#state.planned_trims)}.
|
|
||||||
|
|
||||||
stop_pre(S) ->
|
|
||||||
S#state.pid /= undefined.
|
|
||||||
|
|
||||||
stop_args(S) ->
|
|
||||||
[S#state.pid].
|
|
||||||
|
|
||||||
stop(Pid) ->
|
|
||||||
catch machi_file_proxy:stop(Pid).
|
|
||||||
|
|
||||||
stop_post(_, _, _) -> true.
|
|
||||||
|
|
||||||
stop_next(S, _, _) ->
|
|
||||||
S#state{pid=undefined, prev_extra=0}.
|
|
||||||
|
|
||||||
%% Property
|
|
||||||
|
|
||||||
prop_ok() ->
|
|
||||||
cleanup(),
|
|
||||||
?FORALL({I, T},
|
|
||||||
{shuffle_interval(), shuffle_interval()},
|
|
||||||
?FORALL(Cmds, parallel_commands(?MODULE, initial_state(I, T)),
|
|
||||||
begin
|
|
||||||
{H, S, Res} = run_parallel_commands(?MODULE, Cmds),
|
|
||||||
cleanup(),
|
|
||||||
pretty_commands(?MODULE, Cmds, {H, S, Res},
|
|
||||||
aggregate(command_names(Cmds), Res == ok))
|
|
||||||
end)).
|
|
||||||
|
|
||||||
%% Test for tester functions
|
|
||||||
chopper_test_() ->
|
|
||||||
[?_assertEqual([{0, 1024}],
|
|
||||||
get_overlaps(1, 1, [{0, 1024}], [])),
|
|
||||||
?_assertEqual([],
|
|
||||||
get_overlaps(10, 5, [{9, 1}, {15, 1}], [])),
|
|
||||||
?_assertEqual([{9,2},{14,1}],
|
|
||||||
get_overlaps(10, 5, [{9, 2}, {14, 1}], [])),
|
|
||||||
?_assertEqual([], chop(0, 0, [{0,2}])),
|
|
||||||
?_assertEqual([{0, 1}], chop(0, 1, [{0,2}])),
|
|
||||||
?_assertEqual([], chop(1, 0, [{0,2}])),
|
|
||||||
?_assertEqual([{1, 1}], chop(1, 1, [{0,2}])),
|
|
||||||
?_assertEqual([{1, 1}], chop(1, 2, [{0,2}])),
|
|
||||||
?_assertEqual([], chop(2, 1, [{0,2}])),
|
|
||||||
?_assertEqual([], chop(2, 2, [{0,2}])),
|
|
||||||
?_assertEqual([{1, 1}], chop(1, 3, [{0,2}])),
|
|
||||||
?_assertError(_, chop(3, 1, [{0,2}])),
|
|
||||||
?_assertEqual([], chop(2, 3, [{0,2}])),
|
|
||||||
?_assertEqual({0, 1}, chopped_left(1, [{0, 1024}])),
|
|
||||||
?_assertEqual([{0, 1}, {2, 1022}], cleanup_chunk(1, 1, [{0, 1024}])),
|
|
||||||
?_assertEqual([{2, 1022}], cleanup_chunk(0, 2, [{0, 1}, {2, 1022}])),
|
|
||||||
?_assert(true)
|
|
||||||
].
|
|
||||||
|
|
||||||
-endif. % EQC
|
|
||||||
-endif. % TEST
|
|
|
@ -1,142 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_file_proxy_test).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-compile(export_all).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
clean_up_data_dir(DataDir) ->
|
|
||||||
[begin
|
|
||||||
Fs = filelib:wildcard(DataDir ++ Glob),
|
|
||||||
[file:delete(F) || F <- Fs],
|
|
||||||
[file:del_dir(F) || F <- Fs]
|
|
||||||
end || Glob <- ["*/*/*/*", "*/*/*", "*/*", "*"] ],
|
|
||||||
_ = file:del_dir(DataDir),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
-define(TESTDIR, "./t").
|
|
||||||
-define(HYOOGE, 75 * 1024 * 1024). % 75 MBytes
|
|
||||||
|
|
||||||
random_binary_single() ->
|
|
||||||
%% OK, I guess it's not that random...
|
|
||||||
<<"Four score and seven years ago our fathers brought forth on this
|
|
||||||
continent a new nation, conceived in liberty, and dedicated to the
|
|
||||||
proposition that all men are created equal.
|
|
||||||
|
|
||||||
Now we are engaged in a great civil war, testing whether that nation, or any
|
|
||||||
nation so conceived and so dedicated, can long endure. We are met on a great
|
|
||||||
battlefield of that war. We have come to dedicate a portion of that field, as a
|
|
||||||
final resting place for those who here gave their lives that that nation
|
|
||||||
might live. It is altogether fitting and proper that we should do this.
|
|
||||||
|
|
||||||
But, in a larger sense, we can not dedicate, we can not consecrate, we can not
|
|
||||||
hallow this ground. The brave men, living and dead, who struggled here, have
|
|
||||||
consecrated it, far above our poor power to add or detract. The world will
|
|
||||||
little note, nor long remember what we say here, but it can never forget what
|
|
||||||
they did here. It is for us the living, rather, to be dedicated here to the
|
|
||||||
unfinished work which they who fought here have thus far so nobly advanced. It
|
|
||||||
is rather for us to be here dedicated to the great task remaining before us—
|
|
||||||
that from these honored dead we take increased devotion to that cause for which
|
|
||||||
they gave the last full measure of devotion— that we here highly resolve that
|
|
||||||
these dead shall not have died in vain— that this nation, under God, shall have
|
|
||||||
a new birth of freedom— and that government of the people, by the people, for
|
|
||||||
the people, shall not perish from the earth.">>.
|
|
||||||
|
|
||||||
random_binary(Start, End) ->
|
|
||||||
Size = byte_size(random_binary_single()) - 1,
|
|
||||||
case End > Size of
|
|
||||||
true ->
|
|
||||||
Copies = ( End div Size ) + 1,
|
|
||||||
D0 = binary:copy(random_binary_single(), Copies),
|
|
||||||
binary:part(<<D0/binary>>, Start, End);
|
|
||||||
false ->
|
|
||||||
binary:part(random_binary_single(), Start, End)
|
|
||||||
end.
|
|
||||||
|
|
||||||
setup() ->
|
|
||||||
{ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR),
|
|
||||||
Pid.
|
|
||||||
|
|
||||||
teardown(Pid) ->
|
|
||||||
catch machi_file_proxy:stop(Pid).
|
|
||||||
|
|
||||||
machi_file_proxy_test_() ->
|
|
||||||
clean_up_data_dir(?TESTDIR),
|
|
||||||
{setup,
|
|
||||||
fun setup/0,
|
|
||||||
fun teardown/1,
|
|
||||||
fun(Pid) ->
|
|
||||||
[
|
|
||||||
?_assertEqual({error, bad_arg}, machi_file_proxy:read(Pid, -1, -1)),
|
|
||||||
?_assertEqual({error, bad_arg}, machi_file_proxy:write(Pid, -1, <<"yo">>)),
|
|
||||||
?_assertEqual({error, bad_arg}, machi_file_proxy:append(Pid, [], -1, <<"krep">>)),
|
|
||||||
?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1)),
|
|
||||||
?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, 1)),
|
|
||||||
?_assertMatch({ok, {_, []}}, machi_file_proxy:read(Pid, 1, 1024)),
|
|
||||||
?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, 1024, ?HYOOGE)),
|
|
||||||
?_assertEqual({error, not_written}, machi_file_proxy:read(Pid, ?HYOOGE, 1)),
|
|
||||||
{timeout, 10,
|
|
||||||
?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, random_binary(0, ?HYOOGE)))},
|
|
||||||
?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))),
|
|
||||||
?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1024, <<"fail">>)),
|
|
||||||
?_assertEqual({error, written}, machi_file_proxy:write(Pid, 1, <<"fail">>)),
|
|
||||||
?_assertMatch({ok, {[{_, _, _, _}], []}}, machi_file_proxy:read(Pid, 1025, 1000)),
|
|
||||||
?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, [], 1024, <<"mind the gap">>)),
|
|
||||||
?_assertEqual(ok, machi_file_proxy:write(Pid, 2060, [], random_binary(0, 1024)))
|
|
||||||
]
|
|
||||||
end}.
|
|
||||||
|
|
||||||
multiple_chunks_read_test_() ->
|
|
||||||
clean_up_data_dir(?TESTDIR),
|
|
||||||
{setup,
|
|
||||||
fun setup/0,
|
|
||||||
fun teardown/1,
|
|
||||||
fun(Pid) ->
|
|
||||||
[
|
|
||||||
?_assertEqual(ok, machi_file_proxy:trim(Pid, 0, 1, false)),
|
|
||||||
?_assertMatch({ok, {[], [{"test", 0, 1}]}},
|
|
||||||
machi_file_proxy:read(Pid, 0, 1,
|
|
||||||
#read_opts{needs_trimmed=true})),
|
|
||||||
?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))),
|
|
||||||
?_assertEqual(ok, machi_file_proxy:write(Pid, 10000, <<"fail">>)),
|
|
||||||
?_assertEqual(ok, machi_file_proxy:write(Pid, 20000, <<"fail">>)),
|
|
||||||
?_assertEqual(ok, machi_file_proxy:write(Pid, 30000, <<"fail">>)),
|
|
||||||
%% Freeza
|
|
||||||
?_assertEqual(ok, machi_file_proxy:write(Pid, 530000, <<"fail">>)),
|
|
||||||
?_assertMatch({ok, {[{"test", 1024, _, _},
|
|
||||||
{"test", 10000, <<"fail">>, _},
|
|
||||||
{"test", 20000, <<"fail">>, _},
|
|
||||||
{"test", 30000, <<"fail">>, _},
|
|
||||||
{"test", 530000, <<"fail">>, _}], []}},
|
|
||||||
machi_file_proxy:read(Pid, 1024, 530000)),
|
|
||||||
?_assertMatch({ok, {[{"test", 1, _, _}], [{"test", 0, 1}]}},
|
|
||||||
machi_file_proxy:read(Pid, 0, 1024,
|
|
||||||
#read_opts{needs_trimmed=true}))
|
|
||||||
]
|
|
||||||
end}.
|
|
||||||
|
|
||||||
-endif. % !PULSE
|
|
||||||
-endif. % TEST.
|
|
|
@ -30,22 +30,6 @@
|
||||||
-define(FLU, machi_flu1).
|
-define(FLU, machi_flu1).
|
||||||
-define(FLU_C, machi_flu1_client).
|
-define(FLU_C, machi_flu1_client).
|
||||||
|
|
||||||
get_env_vars(App, Ks) ->
|
|
||||||
Raw = [application:get_env(App, K) || K <- Ks],
|
|
||||||
Old = lists:zip(Ks, Raw),
|
|
||||||
{App, Old}.
|
|
||||||
|
|
||||||
clean_up_env_vars({App, Old}) ->
|
|
||||||
[case Res of
|
|
||||||
undefined ->
|
|
||||||
application:unset_env(App, K);
|
|
||||||
{ok, V} ->
|
|
||||||
application:set_env(App, K, V)
|
|
||||||
end || {K, Res} <- Old].
|
|
||||||
|
|
||||||
filter_env_var({ok, V}) -> V;
|
|
||||||
filter_env_var(Else) -> Else.
|
|
||||||
|
|
||||||
clean_up_data_dir(DataDir) ->
|
clean_up_data_dir(DataDir) ->
|
||||||
[begin
|
[begin
|
||||||
Fs = filelib:wildcard(DataDir ++ Glob),
|
Fs = filelib:wildcard(DataDir ++ Glob),
|
||||||
|
@ -55,10 +39,10 @@ clean_up_data_dir(DataDir) ->
|
||||||
_ = file:del_dir(DataDir),
|
_ = file:del_dir(DataDir),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
start_flu_package(RegName, TcpPort, DataDir) ->
|
setup_test_flu(RegName, TcpPort, DataDir) ->
|
||||||
start_flu_package(RegName, TcpPort, DataDir, []).
|
setup_test_flu(RegName, TcpPort, DataDir, []).
|
||||||
|
|
||||||
start_flu_package(RegName, TcpPort, DataDir, Props) ->
|
setup_test_flu(RegName, TcpPort, DataDir, Props) ->
|
||||||
case proplists:get_value(save_data_dir, Props) of
|
case proplists:get_value(save_data_dir, Props) of
|
||||||
true ->
|
true ->
|
||||||
ok;
|
ok;
|
||||||
|
@ -66,89 +50,67 @@ start_flu_package(RegName, TcpPort, DataDir, Props) ->
|
||||||
clean_up_data_dir(DataDir)
|
clean_up_data_dir(DataDir)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
maybe_start_sup(),
|
{ok, FLU1} = ?FLU:start_link([{RegName, TcpPort, DataDir}|Props]),
|
||||||
machi_flu_psup:start_flu_package(RegName, TcpPort, DataDir, Props).
|
%% TODO the process structuring/racy-ness of the various processes
|
||||||
|
%% of the FLU needs to be deterministic to remove this sleep race
|
||||||
stop_flu_package(FluName) ->
|
%% "prevention".
|
||||||
machi_flu_psup:stop_flu_package(FluName),
|
|
||||||
Pid = whereis(machi_sup),
|
|
||||||
exit(Pid, normal),
|
|
||||||
machi_util:wait_for_death(Pid, 100).
|
|
||||||
|
|
||||||
maybe_start_sup() ->
|
|
||||||
case whereis(machi_sup) of
|
|
||||||
undefined ->
|
|
||||||
machi_sup:start_link(),
|
|
||||||
%% evil but we have to let stuff start up
|
|
||||||
timer:sleep(10),
|
timer:sleep(10),
|
||||||
maybe_start_sup();
|
FLU1.
|
||||||
Pid -> Pid
|
|
||||||
end.
|
|
||||||
|
|
||||||
-ifndef(PULSE).
|
-ifndef(PULSE).
|
||||||
|
|
||||||
flu_smoke_test() ->
|
flu_smoke_test() ->
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
TcpPort = 12957,
|
TcpPort = 32957,
|
||||||
DataDir = "./data",
|
DataDir = "./data",
|
||||||
NSInfo = undefined,
|
|
||||||
NoCSum = <<>>,
|
|
||||||
Prefix = <<"prefix!">>,
|
Prefix = <<"prefix!">>,
|
||||||
BadPrefix = BadFile = "no/good",
|
BadPrefix = BadFile = "no/good",
|
||||||
|
|
||||||
W_props = [{initial_wedged, false}],
|
W_props = [{initial_wedged, false}],
|
||||||
{_, _, _} = machi_test_util:start_flu_package(smoke_flu, TcpPort, DataDir, W_props),
|
FLU1 = setup_test_flu(smoke_flu, TcpPort, DataDir, W_props),
|
||||||
try
|
try
|
||||||
Msg = "Hello, world!",
|
Msg = "Hello, world!",
|
||||||
Msg = ?FLU_C:echo(Host, TcpPort, Msg),
|
Msg = ?FLU_C:echo(Host, TcpPort, Msg),
|
||||||
{error, bad_arg} = ?FLU_C:checksum_list(Host, TcpPort,"does-not-exist"),
|
{error, no_such_file} = ?FLU_C:checksum_list(Host, TcpPort,
|
||||||
{error, bad_arg} = ?FLU_C:checksum_list(Host, TcpPort, BadFile),
|
?DUMMY_PV1_EPOCH,
|
||||||
|
"does-not-exist"),
|
||||||
|
{error, bad_arg} = ?FLU_C:checksum_list(Host, TcpPort,
|
||||||
|
?DUMMY_PV1_EPOCH, BadFile),
|
||||||
|
|
||||||
{ok, []} = ?FLU_C:list_files(Host, TcpPort, ?DUMMY_PV1_EPOCH),
|
{ok, []} = ?FLU_C:list_files(Host, TcpPort, ?DUMMY_PV1_EPOCH),
|
||||||
{ok, {false, _,_,_}} = ?FLU_C:wedge_status(Host, TcpPort),
|
{ok, {false, _}} = ?FLU_C:wedge_status(Host, TcpPort),
|
||||||
|
|
||||||
Chunk1 = <<"yo!">>,
|
Chunk1 = <<"yo!">>,
|
||||||
{ok, {Off1,Len1,File1}} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo,
|
{ok, {Off1,Len1,File1}} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
Prefix, Chunk1, NoCSum),
|
Prefix, Chunk1),
|
||||||
{ok, {[{_, Off1, Chunk1, _}], _}} = ?FLU_C:read_chunk(Host, TcpPort,
|
{ok, Chunk1} = ?FLU_C:read_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH,
|
||||||
NSInfo, ?DUMMY_PV1_EPOCH,
|
File1, Off1, Len1),
|
||||||
File1, Off1, Len1,
|
{ok, KludgeBin} = ?FLU_C:checksum_list(Host, TcpPort,
|
||||||
noopt),
|
?DUMMY_PV1_EPOCH, File1),
|
||||||
{ok, KludgeBin} = ?FLU_C:checksum_list(Host, TcpPort, File1),
|
|
||||||
true = is_binary(KludgeBin),
|
true = is_binary(KludgeBin),
|
||||||
{error, bad_arg} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo,
|
{error, bad_arg} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
BadPrefix, Chunk1, NoCSum),
|
BadPrefix, Chunk1),
|
||||||
{ok, [{_,File1}]} = ?FLU_C:list_files(Host, TcpPort, ?DUMMY_PV1_EPOCH),
|
{ok, [{_,File1}]} = ?FLU_C:list_files(Host, TcpPort, ?DUMMY_PV1_EPOCH),
|
||||||
Len1 = size(Chunk1),
|
Len1 = size(Chunk1),
|
||||||
{error, not_written} = ?FLU_C:read_chunk(Host, TcpPort,
|
{error, not_written} = ?FLU_C:read_chunk(Host, TcpPort,
|
||||||
NSInfo, ?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
File1, Off1*983829323, Len1,
|
File1, Off1*983829323, Len1),
|
||||||
noopt),
|
{error, partial_read} = ?FLU_C:read_chunk(Host, TcpPort,
|
||||||
%% XXX FIXME
|
?DUMMY_PV1_EPOCH,
|
||||||
%%
|
File1, Off1, Len1*9999),
|
||||||
%% This is failing because the read extends past the end of the file.
|
|
||||||
%% I guess the semantic here is that we should consider any read which
|
|
||||||
%% *starts* at a valid offset to be a partial read, even if the length
|
|
||||||
%% of the read will cause it to fail.
|
|
||||||
%%
|
|
||||||
%% {error, partial_read} = ?FLU_C:read_chunk(Host, TcpPort,
|
|
||||||
%% NSInfo, ?DUMMY_PV1_EPOCH,
|
|
||||||
%% File1, Off1, Len1*9999),
|
|
||||||
|
|
||||||
{ok, {Off1b,Len1b,File1b}} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo,
|
{ok, {Off1b,Len1b,File1b}} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
Prefix, Chunk1,NoCSum),
|
Prefix, Chunk1),
|
||||||
Extra = 42,
|
Extra = 42,
|
||||||
Opts1 = #append_opts{chunk_extra=Extra},
|
{ok, {Off1c,Len1c,File1c}} = ?FLU_C:append_chunk_extra(Host, TcpPort,
|
||||||
{ok, {Off1c,Len1c,File1c}} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo,
|
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
Prefix, Chunk1, NoCSum,
|
Prefix, Chunk1, Extra),
|
||||||
Opts1, infinity),
|
|
||||||
{ok, {Off1d,Len1d,File1d}} = ?FLU_C:append_chunk(Host, TcpPort,
|
{ok, {Off1d,Len1d,File1d}} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
NSInfo,
|
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
Prefix, Chunk1,NoCSum),
|
Prefix, Chunk1),
|
||||||
if File1b == File1c, File1c == File1d ->
|
if File1b == File1c, File1c == File1d ->
|
||||||
true = (Off1c == Off1b + Len1b),
|
true = (Off1c == Off1b + Len1b),
|
||||||
true = (Off1d == Off1c + Len1c + Extra);
|
true = (Off1d == Off1c + Len1c + Extra);
|
||||||
|
@ -156,44 +118,27 @@ flu_smoke_test() ->
|
||||||
exit(not_mandatory_but_test_expected_same_file_fixme)
|
exit(not_mandatory_but_test_expected_same_file_fixme)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
|
Chunk1_cs = {<<?CSUM_TAG_NONE:8, 0:(8*20)>>, Chunk1},
|
||||||
|
{ok, {Off1e,Len1e,File1e}} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
|
?DUMMY_PV1_EPOCH,
|
||||||
|
Prefix, Chunk1_cs),
|
||||||
|
|
||||||
Chunk2 = <<"yo yo">>,
|
Chunk2 = <<"yo yo">>,
|
||||||
Len2 = byte_size(Chunk2),
|
Len2 = byte_size(Chunk2),
|
||||||
Off2 = ?MINIMUM_OFFSET + 77,
|
Off2 = ?MINIMUM_OFFSET + 77,
|
||||||
File2 = "smoke-whole-file^^0^1^1",
|
File2 = "smoke-whole-file",
|
||||||
ok = ?FLU_C:write_chunk(Host, TcpPort, NSInfo, ?DUMMY_PV1_EPOCH,
|
ok = ?FLU_C:write_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH,
|
||||||
File2, Off2, Chunk2, NoCSum),
|
File2, Off2, Chunk2),
|
||||||
{error, bad_arg} = ?FLU_C:write_chunk(Host, TcpPort, NSInfo, ?DUMMY_PV1_EPOCH,
|
{error, bad_arg} = ?FLU_C:write_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH,
|
||||||
BadFile, Off2, Chunk2, NoCSum),
|
BadFile, Off2, Chunk2),
|
||||||
{ok, {[{_, Off2, Chunk2, _}], _}} =
|
{ok, Chunk2} = ?FLU_C:read_chunk(Host, TcpPort, ?DUMMY_PV1_EPOCH,
|
||||||
?FLU_C:read_chunk(Host, TcpPort, NSInfo, ?DUMMY_PV1_EPOCH, File2, Off2, Len2, noopt),
|
File2, Off2, Len2),
|
||||||
|
{error, not_written} = ?FLU_C:read_chunk(Host, TcpPort,
|
||||||
|
?DUMMY_PV1_EPOCH,
|
||||||
|
"no!!", Off2, Len2),
|
||||||
{error, bad_arg} = ?FLU_C:read_chunk(Host, TcpPort,
|
{error, bad_arg} = ?FLU_C:read_chunk(Host, TcpPort,
|
||||||
NSInfo, ?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
"no!!", Off2, Len2, noopt),
|
BadFile, Off2, Len2),
|
||||||
{error, bad_arg} = ?FLU_C:read_chunk(Host, TcpPort,
|
|
||||||
NSInfo, ?DUMMY_PV1_EPOCH,
|
|
||||||
BadFile, Off2, Len2, noopt),
|
|
||||||
|
|
||||||
%% Make a connected socket.
|
|
||||||
Sock1 = ?FLU_C:connect(#p_srvr{address=Host, port=TcpPort}),
|
|
||||||
|
|
||||||
%% Let's test some cluster version enforcement.
|
|
||||||
Good_EpochNum = 0,
|
|
||||||
Good_NSVersion = 0,
|
|
||||||
Good_NS = <<>>,
|
|
||||||
{ok, {false, {Good_EpochNum,_}, Good_NSVersion, GoodNS}} =
|
|
||||||
?FLU_C:wedge_status(Sock1),
|
|
||||||
NS_good = #ns_info{version=Good_NSVersion, name=Good_NS},
|
|
||||||
{ok, {[{_, Off2, Chunk2, _}], _}} =
|
|
||||||
?FLU_C:read_chunk(Sock1, NS_good, ?DUMMY_PV1_EPOCH,
|
|
||||||
File2, Off2, Len2, noopt),
|
|
||||||
NS_bad_version = #ns_info{version=1, name=Good_NS},
|
|
||||||
NS_bad_name = #ns_info{version=Good_NSVersion, name= <<"foons">>},
|
|
||||||
{error, bad_epoch} =
|
|
||||||
?FLU_C:read_chunk(Sock1, NS_bad_version, ?DUMMY_PV1_EPOCH,
|
|
||||||
File2, Off2, Len2, noopt),
|
|
||||||
{error, bad_arg} =
|
|
||||||
?FLU_C:read_chunk(Sock1, NS_bad_name, ?DUMMY_PV1_EPOCH,
|
|
||||||
File2, Off2, Len2, noopt),
|
|
||||||
|
|
||||||
%% We know that File1 still exists. Pretend that we've done a
|
%% We know that File1 still exists. Pretend that we've done a
|
||||||
%% migration and exercise the delete_migration() API.
|
%% migration and exercise the delete_migration() API.
|
||||||
|
@ -210,76 +155,72 @@ flu_smoke_test() ->
|
||||||
{error, bad_arg} = ?FLU_C:trunc_hack(Host, TcpPort,
|
{error, bad_arg} = ?FLU_C:trunc_hack(Host, TcpPort,
|
||||||
?DUMMY_PV1_EPOCH, BadFile),
|
?DUMMY_PV1_EPOCH, BadFile),
|
||||||
|
|
||||||
ok = ?FLU_C:quit(Sock1)
|
ok = ?FLU_C:quit(?FLU_C:connect(#p_srvr{address=Host,
|
||||||
|
port=TcpPort}))
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_package()
|
ok = ?FLU:stop(FLU1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
flu_projection_smoke_test() ->
|
flu_projection_smoke_test() ->
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
TcpPort = 12959,
|
TcpPort = 32959,
|
||||||
DataDir = "./data.projst",
|
DataDir = "./data",
|
||||||
{_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir),
|
|
||||||
|
FLU1 = setup_test_flu(projection_test_flu, TcpPort, DataDir),
|
||||||
try
|
try
|
||||||
[ok = flu_projection_common(Host, TcpPort, T) ||
|
[ok = flu_projection_common(Host, TcpPort, T) ||
|
||||||
T <- [public, private] ]
|
T <- [public, private] ]
|
||||||
%% , {ok, {false, EpochID1,_,_}} = ?FLU_C:wedge_status(Host, TcpPort),
|
%% , {ok, {false, EpochID1}} = ?FLU_C:wedge_status(Host, TcpPort),
|
||||||
%% io:format(user, "EpochID1 ~p\n", [EpochID1])
|
%% io:format(user, "EpochID1 ~p\n", [EpochID1])
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_package()
|
ok = ?FLU:stop(FLU1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
flu_projection_common(Host, TcpPort, T) ->
|
flu_projection_common(Host, TcpPort, T) ->
|
||||||
{ok, {0,_}} = ?FLU_C:get_latest_epochid(Host, TcpPort, T),
|
{ok, {0,_}} = ?FLU_C:get_latest_epochid(Host, TcpPort, T),
|
||||||
{ok, #projection_v1{epoch_number=0}} =
|
{error, not_written} =
|
||||||
?FLU_C:read_latest_projection(Host, TcpPort, T),
|
?FLU_C:read_latest_projection(Host, TcpPort, T),
|
||||||
{ok, [0]} = ?FLU_C:list_all_projections(Host, TcpPort, T),
|
{ok, []} = ?FLU_C:list_all_projections(Host, TcpPort, T),
|
||||||
{ok, [#projection_v1{epoch_number=0}]} =
|
{ok, []} = ?FLU_C:get_all_projections(Host, TcpPort, T),
|
||||||
?FLU_C:get_all_projections(Host, TcpPort, T),
|
|
||||||
|
|
||||||
P_a = #p_srvr{name=a, address="localhost", port=4321},
|
P_a = #p_srvr{name=a, address="localhost", port=4321},
|
||||||
P1 = machi_projection:new(1, a, [P_a], [], [a], [], []),
|
P1 = machi_projection:new(1, a, [P_a], [], [a], [], []),
|
||||||
ok = ?FLU_C:write_projection(Host, TcpPort, T, P1),
|
ok = ?FLU_C:write_projection(Host, TcpPort, T, P1),
|
||||||
case ?FLU_C:write_projection(Host, TcpPort, T, P1) of
|
{error, written} = ?FLU_C:write_projection(Host, TcpPort, T, P1),
|
||||||
{error, written} when T == public -> ok;
|
|
||||||
ok when T == private -> ok
|
|
||||||
end,
|
|
||||||
{ok, P1} = ?FLU_C:read_projection(Host, TcpPort, T, 1),
|
{ok, P1} = ?FLU_C:read_projection(Host, TcpPort, T, 1),
|
||||||
{ok, {1,_}} = ?FLU_C:get_latest_epochid(Host, TcpPort, T),
|
{ok, {1,_}} = ?FLU_C:get_latest_epochid(Host, TcpPort, T),
|
||||||
{ok, P1} = ?FLU_C:read_latest_projection(Host, TcpPort, T),
|
{ok, P1} = ?FLU_C:read_latest_projection(Host, TcpPort, T),
|
||||||
{ok, [0,1]} = ?FLU_C:list_all_projections(Host, TcpPort, T),
|
{ok, [1]} = ?FLU_C:list_all_projections(Host, TcpPort, T),
|
||||||
{ok, [_,P1]} = ?FLU_C:get_all_projections(Host, TcpPort, T),
|
{ok, [P1]} = ?FLU_C:get_all_projections(Host, TcpPort, T),
|
||||||
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort, T, 2),
|
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort, T, 2),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
bad_checksum_test() ->
|
bad_checksum_test() ->
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
TcpPort = 12960,
|
TcpPort = 32960,
|
||||||
DataDir = "./data.bct",
|
DataDir = "./data",
|
||||||
|
|
||||||
Opts = [{initial_wedged, false}],
|
Opts = [{initial_wedged, false}],
|
||||||
{_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir, Opts),
|
FLU1 = setup_test_flu(projection_test_flu, TcpPort, DataDir, Opts),
|
||||||
NSInfo = undefined,
|
|
||||||
try
|
try
|
||||||
Prefix = <<"some prefix">>,
|
Prefix = <<"some prefix">>,
|
||||||
Chunk1 = <<"yo yo yo">>,
|
Chunk1 = <<"yo yo yo">>,
|
||||||
BadCSum = {?CSUM_TAG_CLIENT_SHA, crypto:hash(sha, ".................")},
|
Chunk1_badcs = {<<?CSUM_TAG_CLIENT_SHA:8, 0:(8*20)>>, Chunk1},
|
||||||
{error, bad_checksum} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo,
|
{error, bad_checksum} = ?FLU_C:append_chunk(Host, TcpPort,
|
||||||
?DUMMY_PV1_EPOCH,
|
?DUMMY_PV1_EPOCH,
|
||||||
Prefix,
|
Prefix, Chunk1_badcs),
|
||||||
Chunk1, BadCSum),
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_package()
|
ok = ?FLU:stop(FLU1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
witness_test() ->
|
witness_test() ->
|
||||||
Host = "localhost",
|
Host = "localhost",
|
||||||
TcpPort = 12961,
|
TcpPort = 32961,
|
||||||
DataDir = "./data.witness",
|
DataDir = "./data",
|
||||||
|
|
||||||
Opts = [{initial_wedged, false}, {witness_mode, true}],
|
Opts = [{initial_wedged, false}, {witness_mode, true}],
|
||||||
{_,_,_} = machi_test_util:start_flu_package(projection_test_flu, TcpPort, DataDir, Opts),
|
FLU1 = setup_test_flu(projection_test_flu, TcpPort, DataDir, Opts),
|
||||||
NSInfo = undefined,
|
|
||||||
NoCSum = <<>>,
|
|
||||||
try
|
try
|
||||||
Prefix = <<"some prefix">>,
|
Prefix = <<"some prefix">>,
|
||||||
Chunk1 = <<"yo yo yo">>,
|
Chunk1 = <<"yo yo yo">>,
|
||||||
|
@ -292,14 +233,15 @@ witness_test() ->
|
||||||
{ok, EpochID1} = ?FLU_C:get_latest_epochid(Host, TcpPort, private),
|
{ok, EpochID1} = ?FLU_C:get_latest_epochid(Host, TcpPort, private),
|
||||||
|
|
||||||
%% Witness-protected ops all fail
|
%% Witness-protected ops all fail
|
||||||
{error, bad_arg} = ?FLU_C:append_chunk(Host, TcpPort, NSInfo, EpochID1,
|
{error, bad_arg} = ?FLU_C:append_chunk(Host, TcpPort, EpochID1,
|
||||||
Prefix, Chunk1, NoCSum),
|
Prefix, Chunk1),
|
||||||
File = <<"foofile">>,
|
File = <<"foofile">>,
|
||||||
{error, bad_arg} = ?FLU_C:read_chunk(Host, TcpPort, NSInfo, EpochID1,
|
{error, bad_arg} = ?FLU_C:read_chunk(Host, TcpPort, EpochID1,
|
||||||
File, 9999, 9999, noopt),
|
File, 9999, 9999),
|
||||||
{error, bad_arg} = ?FLU_C:checksum_list(Host, TcpPort, File),
|
{error, bad_arg} = ?FLU_C:checksum_list(Host, TcpPort, EpochID1,
|
||||||
|
File),
|
||||||
{error, bad_arg} = ?FLU_C:list_files(Host, TcpPort, EpochID1),
|
{error, bad_arg} = ?FLU_C:list_files(Host, TcpPort, EpochID1),
|
||||||
{ok, {false, EpochID1,_,_}} = ?FLU_C:wedge_status(Host, TcpPort),
|
{ok, {false, EpochID1}} = ?FLU_C:wedge_status(Host, TcpPort),
|
||||||
{ok, _} = ?FLU_C:get_latest_epochid(Host, TcpPort, public),
|
{ok, _} = ?FLU_C:get_latest_epochid(Host, TcpPort, public),
|
||||||
{ok, _} = ?FLU_C:read_latest_projection(Host, TcpPort, public),
|
{ok, _} = ?FLU_C:read_latest_projection(Host, TcpPort, public),
|
||||||
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort,
|
{error, not_written} = ?FLU_C:read_projection(Host, TcpPort,
|
||||||
|
@ -310,7 +252,7 @@ witness_test() ->
|
||||||
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_package()
|
ok = ?FLU:stop(FLU1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% The purpose of timing_pb_encoding_test_ and timing_bif_encoding_test_ is
|
%% The purpose of timing_pb_encoding_test_ and timing_bif_encoding_test_ is
|
||||||
|
@ -348,7 +290,7 @@ timing_pb_encoding_test2() ->
|
||||||
RUN2 = timer:tc(fun() -> begin [_ = DoIt2() || _ <- XX], ok end end),
|
RUN2 = timer:tc(fun() -> begin [_ = DoIt2() || _ <- XX], ok end end),
|
||||||
erlang:garbage_collect(),
|
erlang:garbage_collect(),
|
||||||
Factor = (element(1, RUN1) / element(1, RUN2)),
|
Factor = (element(1, RUN1) / element(1, RUN2)),
|
||||||
io:format(" speed factor=~.2f ", [Factor]),
|
io:format(user, " speed factor=~.2f ", [Factor]),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
-endif. % !PULSE
|
-endif. % !PULSE
|
||||||
|
|
|
@ -38,12 +38,12 @@ smoke_test_() ->
|
||||||
{timeout, 5*60, fun() -> smoke_test2() end}.
|
{timeout, 5*60, fun() -> smoke_test2() end}.
|
||||||
|
|
||||||
smoke_test2() ->
|
smoke_test2() ->
|
||||||
Ps = [{a,#p_srvr{name=a, address="localhost", port=5550, props="./data.a"}},
|
Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}},
|
||||||
{b,#p_srvr{name=b, address="localhost", port=5551, props="./data.b"}},
|
{b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}},
|
||||||
{c,#p_srvr{name=c, address="localhost", port=5552, props="./data.c"}}
|
{c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}}
|
||||||
],
|
],
|
||||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
||||||
{ok, SupPid} = machi_sup:start_link(),
|
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||||
try
|
try
|
||||||
%% Only run a, don't run b & c so we have 100% failures talking to them
|
%% Only run a, don't run b & c so we have 100% failures talking to them
|
||||||
[begin
|
[begin
|
||||||
|
@ -66,15 +66,15 @@ partial_stop_restart_test_() ->
|
||||||
{timeout, 5*60, fun() -> partial_stop_restart2() end}.
|
{timeout, 5*60, fun() -> partial_stop_restart2() end}.
|
||||||
|
|
||||||
partial_stop_restart2() ->
|
partial_stop_restart2() ->
|
||||||
Ps = [{a,#p_srvr{name=a, address="localhost", port=5560, props="./data.a"}},
|
Ps = [{a,#p_srvr{name=a, address="localhost", port=5555, props="./data.a"}},
|
||||||
{b,#p_srvr{name=b, address="localhost", port=5561, props="./data.b"}},
|
{b,#p_srvr{name=b, address="localhost", port=5556, props="./data.b"}},
|
||||||
{c,#p_srvr{name=c, address="localhost", port=5562, props="./data.c"}}
|
{c,#p_srvr{name=c, address="localhost", port=5557, props="./data.c"}}
|
||||||
],
|
],
|
||||||
ChMgrs = [machi_flu_psup:make_mgr_supname(P#p_srvr.name) || {_,P} <-Ps],
|
ChMgrs = [machi_flu_psup:make_mgr_supname(P#p_srvr.name) || {_,P} <-Ps],
|
||||||
PStores = [machi_flu_psup:make_proj_supname(P#p_srvr.name) || {_,P} <-Ps],
|
PStores = [machi_flu_psup:make_proj_supname(P#p_srvr.name) || {_,P} <-Ps],
|
||||||
Dict = orddict:from_list(Ps),
|
Dict = orddict:from_list(Ps),
|
||||||
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
[os:cmd("rm -rf " ++ P#p_srvr.props) || {_,P} <- Ps],
|
||||||
{ok, SupPid} = machi_sup:start_link(),
|
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||||
DbgProps = [{initial_wedged, true}],
|
DbgProps = [{initial_wedged, true}],
|
||||||
Start = fun({_,P}) ->
|
Start = fun({_,P}) ->
|
||||||
#p_srvr{name=Name, port=Port, props=Dir} = P,
|
#p_srvr{name=Name, port=Port, props=Dir} = P,
|
||||||
|
@ -84,23 +84,20 @@ partial_stop_restart2() ->
|
||||||
WedgeStatus = fun({_,#p_srvr{address=Addr, port=TcpPort}}) ->
|
WedgeStatus = fun({_,#p_srvr{address=Addr, port=TcpPort}}) ->
|
||||||
machi_flu1_client:wedge_status(Addr, TcpPort)
|
machi_flu1_client:wedge_status(Addr, TcpPort)
|
||||||
end,
|
end,
|
||||||
NSInfo = undefined,
|
|
||||||
Append = fun({_,#p_srvr{address=Addr, port=TcpPort}}, EpochID) ->
|
Append = fun({_,#p_srvr{address=Addr, port=TcpPort}}, EpochID) ->
|
||||||
NoCSum = <<>>,
|
|
||||||
machi_flu1_client:append_chunk(Addr, TcpPort,
|
machi_flu1_client:append_chunk(Addr, TcpPort,
|
||||||
NSInfo, EpochID,
|
EpochID,
|
||||||
<<"prefix">>,
|
<<"prefix">>, <<"data">>)
|
||||||
<<"data">>, NoCSum)
|
|
||||||
end,
|
end,
|
||||||
try
|
try
|
||||||
[Start(P) || P <- Ps],
|
[Start(P) || P <- Ps],
|
||||||
[{ok, {true, _,_,_}} = WedgeStatus(P) || P <- Ps], % all are wedged
|
[{ok, {true, _}} = WedgeStatus(P) || P <- Ps], % all are wedged
|
||||||
[{error,wedged} = Append(P, ?DUMMY_PV1_EPOCH) || P <- Ps], % all are wedged
|
[{error,wedged} = Append(P, ?DUMMY_PV1_EPOCH) || P <- Ps], % all are wedged
|
||||||
|
|
||||||
[machi_chain_manager1:set_chain_members(ChMgr, Dict) ||
|
[machi_chain_manager1:set_chain_members(ChMgr, Dict) ||
|
||||||
ChMgr <- ChMgrs ],
|
ChMgr <- ChMgrs ],
|
||||||
{ok, {false, EpochID1,_,_}} = WedgeStatus(hd(Ps)),
|
{ok, {false, EpochID1}} = WedgeStatus(hd(Ps)),
|
||||||
[{ok, {false, EpochID1,_,_}} = WedgeStatus(P) || P <- Ps], % *not* wedged
|
[{ok, {false, EpochID1}} = WedgeStatus(P) || P <- Ps], % *not* wedged
|
||||||
[{ok,_} = Append(P, EpochID1) || P <- Ps], % *not* wedged
|
[{ok,_} = Append(P, EpochID1) || P <- Ps], % *not* wedged
|
||||||
{ok, {_,_,File1}} = Append(hd(Ps), EpochID1),
|
{ok, {_,_,File1}} = Append(hd(Ps), EpochID1),
|
||||||
|
|
||||||
|
@ -126,9 +123,9 @@ partial_stop_restart2() ->
|
||||||
Epoch_m = Proj_m#projection_v1.epoch_number,
|
Epoch_m = Proj_m#projection_v1.epoch_number,
|
||||||
%% Confirm that all FLUs are *not* wedged, with correct proj & epoch
|
%% Confirm that all FLUs are *not* wedged, with correct proj & epoch
|
||||||
Proj_mCSum = Proj_m#projection_v1.epoch_csum,
|
Proj_mCSum = Proj_m#projection_v1.epoch_csum,
|
||||||
[{ok, {false, {Epoch_m, Proj_mCSum},_,_}} = WedgeStatus(P) || % *not* wedged
|
[{ok, {false, {Epoch_m, Proj_mCSum}}} = WedgeStatus(P) || % *not* wedged
|
||||||
P <- Ps],
|
P <- Ps],
|
||||||
{ok, {false, EpochID1,_,_}} = WedgeStatus(hd(Ps)),
|
{ok, {false, EpochID1}} = WedgeStatus(hd(Ps)),
|
||||||
[{ok,_} = Append(P, EpochID1) || P <- Ps], % *not* wedged
|
[{ok,_} = Append(P, EpochID1) || P <- Ps], % *not* wedged
|
||||||
|
|
||||||
%% Stop all but 'a'.
|
%% Stop all but 'a'.
|
||||||
|
@ -138,20 +135,19 @@ partial_stop_restart2() ->
|
||||||
{FluName_a, _} = hd(Ps),
|
{FluName_a, _} = hd(Ps),
|
||||||
ok = machi_flu_psup:stop_flu_package(FluName_a),
|
ok = machi_flu_psup:stop_flu_package(FluName_a),
|
||||||
{ok, _} = Start(hd(Ps)),
|
{ok, _} = Start(hd(Ps)),
|
||||||
timer:sleep(123), % TODO fix server socket available race condition in better way
|
|
||||||
%% Remember: 'a' is not in active mode.
|
%% Remember: 'a' is not in active mode.
|
||||||
{ok, Proj_m3} = machi_projection_store:read_latest_projection(
|
{ok, Proj_m3} = machi_projection_store:read_latest_projection(
|
||||||
hd(PStores), private),
|
hd(PStores), private),
|
||||||
true = (machi_projection:update_dbg2(Proj_m, []) ==
|
true = (machi_projection:update_dbg2(Proj_m, []) ==
|
||||||
machi_projection:update_dbg2(Proj_m3, [])),
|
machi_projection:update_dbg2(Proj_m, [])),
|
||||||
%% Confirm that 'a' is wedged
|
%% Confirm that 'a' is wedged
|
||||||
{error, wedged} = Append(hd(Ps), EpochID1),
|
{error, wedged} = Append(hd(Ps), EpochID1),
|
||||||
{_, #p_srvr{address=Addr_a, port=TcpPort_a}} = hd(Ps),
|
{_, #p_srvr{address=Addr_a, port=TcpPort_a}} = hd(Ps),
|
||||||
{error, wedged} = machi_flu1_client:read_chunk(
|
{error, wedged} = machi_flu1_client:read_chunk(
|
||||||
Addr_a, TcpPort_a, NSInfo, ?DUMMY_PV1_EPOCH,
|
Addr_a, TcpPort_a, ?DUMMY_PV1_EPOCH,
|
||||||
<<>>, 99999999, 1, undefined),
|
<<>>, 99999999, 1),
|
||||||
{error, bad_arg} = machi_flu1_client:checksum_list(
|
{error, wedged} = machi_flu1_client:checksum_list(
|
||||||
Addr_a, TcpPort_a, <<>>),
|
Addr_a, TcpPort_a, ?DUMMY_PV1_EPOCH, <<>>),
|
||||||
%% list_files() is permitted despite wedged status
|
%% list_files() is permitted despite wedged status
|
||||||
{ok, _} = machi_flu1_client:list_files(
|
{ok, _} = machi_flu1_client:list_files(
|
||||||
Addr_a, TcpPort_a, ?DUMMY_PV1_EPOCH),
|
Addr_a, TcpPort_a, ?DUMMY_PV1_EPOCH),
|
||||||
|
@ -160,7 +156,7 @@ partial_stop_restart2() ->
|
||||||
{now_using,_,Epoch_n} = machi_chain_manager1:trigger_react_to_env(
|
{now_using,_,Epoch_n} = machi_chain_manager1:trigger_react_to_env(
|
||||||
hd(ChMgrs)),
|
hd(ChMgrs)),
|
||||||
true = (Epoch_n > Epoch_m),
|
true = (Epoch_n > Epoch_m),
|
||||||
{ok, {false, EpochID3,_,_}} = WedgeStatus(hd(Ps)),
|
{ok, {false, EpochID3}} = WedgeStatus(hd(Ps)),
|
||||||
%% The file we're assigned should be different with the epoch change.
|
%% The file we're assigned should be different with the epoch change.
|
||||||
{ok, {_,_,File3}} = Append(hd(Ps), EpochID3),
|
{ok, {_,_,File3}} = Append(hd(Ps), EpochID3),
|
||||||
true = (File1 /= File3),
|
true = (File1 /= File3),
|
||||||
|
@ -176,19 +172,6 @@ partial_stop_restart2() ->
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
p_srvr_rec_test() ->
|
|
||||||
P = #p_srvr{name=a, address="localhost", port=1024, props=[yo]},
|
|
||||||
[P] = machi_flu_sup:sanitize_p_srvr_records([P]),
|
|
||||||
[P] = machi_flu_sup:sanitize_p_srvr_records([P,P]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([nope]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{proto_mod=does_not_exist}]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{proto_mod="lists"}]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{address=7}]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{port=5}]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{port=foo}]),
|
|
||||||
[] = machi_flu_sup:sanitize_p_srvr_records([#p_srvr{props=foo}]),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
-endif. % !PULSE
|
-endif. % !PULSE
|
||||||
-endif. % TEST
|
-endif. % TEST
|
||||||
|
|
||||||
|
|
|
@ -1,307 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2014 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_lifecycle_mgr_test).
|
|
||||||
-compile(export_all).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
-define(MGR, machi_chain_manager1).
|
|
||||||
|
|
||||||
setup() ->
|
|
||||||
catch application:stop(machi),
|
|
||||||
{ok, SupPid} = machi_sup:start_link(),
|
|
||||||
error_logger:tty(false),
|
|
||||||
Dir = "./" ++ atom_to_list(?MODULE) ++ ".datadir",
|
|
||||||
machi_flu1_test:clean_up_data_dir(Dir ++ "/*/*"),
|
|
||||||
machi_flu1_test:clean_up_data_dir(Dir),
|
|
||||||
Envs = [{flu_data_dir, Dir ++ "/data/flu"},
|
|
||||||
{flu_config_dir, Dir ++ "/etc/flu-config"},
|
|
||||||
{chain_config_dir, Dir ++ "/etc/chain-config"},
|
|
||||||
{platform_data_dir, Dir ++ "/data"},
|
|
||||||
{platform_etc_dir, Dir ++ "/etc"},
|
|
||||||
{not_used_pending, Dir ++ "/etc/pending"}
|
|
||||||
],
|
|
||||||
EnvKeys = [K || {K,_V} <- Envs],
|
|
||||||
undefined = application:get_env(machi, yo),
|
|
||||||
Cleanup = machi_flu1_test:get_env_vars(machi, EnvKeys ++ [yo]),
|
|
||||||
[begin
|
|
||||||
filelib:ensure_dir(V ++ "/unused"),
|
|
||||||
application:set_env(machi, K, V)
|
|
||||||
end || {K, V} <- Envs],
|
|
||||||
{SupPid, Dir, Cleanup}.
|
|
||||||
|
|
||||||
cleanup({SupPid, Dir, Cleanup}) ->
|
|
||||||
exit(SupPid, normal),
|
|
||||||
machi_util:wait_for_death(SupPid, 100),
|
|
||||||
error_logger:tty(true),
|
|
||||||
catch application:stop(machi),
|
|
||||||
machi_flu1_test:clean_up_data_dir(Dir ++ "/*/*"),
|
|
||||||
machi_flu1_test:clean_up_data_dir(Dir),
|
|
||||||
machi_flu1_test:clean_up_env_vars(Cleanup),
|
|
||||||
undefined = application:get_env(machi, yo),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
smoke_test_() ->
|
|
||||||
{timeout, 60, fun() -> smoke_test2() end}.
|
|
||||||
|
|
||||||
smoke_test2() ->
|
|
||||||
YoCleanup = setup(),
|
|
||||||
try
|
|
||||||
Prefix = <<"pre">>,
|
|
||||||
Chunk1 = <<"yochunk">>,
|
|
||||||
Host = "localhost",
|
|
||||||
PortBase = 60120,
|
|
||||||
|
|
||||||
Pa = #p_srvr{name=a,address="localhost",port=PortBase+0},
|
|
||||||
Pb = #p_srvr{name=b,address="localhost",port=PortBase+1},
|
|
||||||
Pc = #p_srvr{name=c,address="localhost",port=PortBase+2},
|
|
||||||
%% Pstore_a = machi_flu1:make_projection_server_regname(a),
|
|
||||||
%% Pstore_b = machi_flu1:make_projection_server_regname(b),
|
|
||||||
%% Pstore_c = machi_flu1:make_projection_server_regname(c),
|
|
||||||
Pstores = [Pstore_a, Pstore_b, Pstore_c] =
|
|
||||||
[machi_flu1:make_projection_server_regname(a),
|
|
||||||
machi_flu1:make_projection_server_regname(b),
|
|
||||||
machi_flu1:make_projection_server_regname(c)],
|
|
||||||
ChMgrs = [ChMgr_a, ChMgr_b, ChMgr_c] =
|
|
||||||
[machi_chain_manager1:make_chmgr_regname(a),
|
|
||||||
machi_chain_manager1:make_chmgr_regname(b),
|
|
||||||
machi_chain_manager1:make_chmgr_regname(c)],
|
|
||||||
Fits = [Fit_a, Fit_b, Fit_c] =
|
|
||||||
[machi_flu_psup:make_fitness_regname(a),
|
|
||||||
machi_flu_psup:make_fitness_regname(b),
|
|
||||||
machi_flu_psup:make_fitness_regname(c)],
|
|
||||||
Advance = machi_chain_manager1_test:make_advance_fun(
|
|
||||||
Fits, [a,b,c], ChMgrs, 3),
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("\nSTEP: Start 3 FLUs, no chain.\n", []),
|
|
||||||
|
|
||||||
[machi_lifecycle_mgr:make_pending_config(P) || P <- [Pa,Pb,Pc] ],
|
|
||||||
{[_,_,_],[]} = machi_lifecycle_mgr:process_pending(),
|
|
||||||
[{ok, #projection_v1{epoch_number=0}} =
|
|
||||||
machi_projection_store:read_latest_projection(PSTORE, private)
|
|
||||||
|| PSTORE <- Pstores],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("\nSTEP: Start chain = [a,b,c]\n", []),
|
|
||||||
|
|
||||||
C1 = #chain_def_v1{name=cx, mode=ap_mode, full=[Pa,Pb,Pc],
|
|
||||||
local_run=[a,b,c]},
|
|
||||||
machi_lifecycle_mgr:make_pending_config(C1),
|
|
||||||
{[],[_]} = machi_lifecycle_mgr:process_pending(),
|
|
||||||
Advance(),
|
|
||||||
[{ok, #projection_v1{all_members=[a,b,c]}} =
|
|
||||||
machi_projection_store:read_latest_projection(PSTORE, private)
|
|
||||||
|| PSTORE <- Pstores],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("\nSTEP: Reset chain = [b,c]\n", []),
|
|
||||||
|
|
||||||
C2 = #chain_def_v1{name=cx, mode=ap_mode, full=[Pb,Pc],
|
|
||||||
old_full=[a,b,c], old_witnesses=[],
|
|
||||||
local_stop=[a], local_run=[b,c]},
|
|
||||||
machi_lifecycle_mgr:make_pending_config(C2),
|
|
||||||
{[],[_]} = machi_lifecycle_mgr:process_pending(),
|
|
||||||
Advance(),
|
|
||||||
%% a should be down
|
|
||||||
{'EXIT', _} = (catch machi_projection_store:read_latest_projection(
|
|
||||||
hd(Pstores), private)),
|
|
||||||
[{ok, #projection_v1{all_members=[b,c]}} =
|
|
||||||
machi_projection_store:read_latest_projection(PSTORE, private)
|
|
||||||
|| PSTORE <- tl(Pstores)],
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
io:format("\nSTEP: Reset chain = []\n", []),
|
|
||||||
|
|
||||||
C3 = #chain_def_v1{name=cx, mode=ap_mode, full=[],
|
|
||||||
old_full=[b,c], old_witnesses=[],
|
|
||||||
local_stop=[b,c], local_run=[]},
|
|
||||||
machi_lifecycle_mgr:make_pending_config(C3),
|
|
||||||
{[],[_]} = machi_lifecycle_mgr:process_pending(),
|
|
||||||
Advance(),
|
|
||||||
%% a,b,c should be down
|
|
||||||
[{'EXIT', _} = (catch machi_projection_store:read_latest_projection(
|
|
||||||
PSTORE, private))
|
|
||||||
|| PSTORE <- Pstores],
|
|
||||||
|
|
||||||
ok
|
|
||||||
after
|
|
||||||
cleanup(YoCleanup)
|
|
||||||
end.
|
|
||||||
|
|
||||||
ast_tuple_syntax_test() ->
|
|
||||||
T = fun(L) -> machi_lifecycle_mgr:check_ast_tuple_syntax(L) end,
|
|
||||||
Canon1 = [ {host, "localhost", []},
|
|
||||||
{host, "localhost", [{client_interface, "1.2.3.4"},
|
|
||||||
{admin_interface, "5.6.7.8"}]},
|
|
||||||
{flu, 'fx', "foohost", 4000, []},
|
|
||||||
switch_old_and_new,
|
|
||||||
{chain, 'cy', ['fx', 'fy'], [{foo,"yay"},{bar,baz}]} ],
|
|
||||||
|
|
||||||
{_Good,[]=_Bad} = T(Canon1),
|
|
||||||
Canon1_norm = machi_lifecycle_mgr:normalize_ast_tuple_syntax(Canon1),
|
|
||||||
true = (length(Canon1) == length(Canon1_norm)),
|
|
||||||
{Canon1_norm_b, []} = T(Canon1_norm),
|
|
||||||
true = (length(Canon1_norm) == length(Canon1_norm_b)),
|
|
||||||
|
|
||||||
{[],[_,_,_,_]} =
|
|
||||||
T([ {host, 'localhost', []},
|
|
||||||
{host, 'localhost', yo},
|
|
||||||
{host, "localhost", [{client_interface, 77.88293829832}]},
|
|
||||||
{host, "localhost", [{client_interface, "1.2.3.4"},
|
|
||||||
{bummer, "5.6.7.8"}]} ]),
|
|
||||||
{[],[_,_,_,_,_,_]} =
|
|
||||||
T([ {flu, 'fx', 'foohost', 4000, []},
|
|
||||||
{flu, 'fx', <<"foohost">>, 4000, []},
|
|
||||||
{flu, 'fx', "foohost", -4000, []},
|
|
||||||
{flu, 'fx', "foohost", 40009999, []},
|
|
||||||
{flu, 'fx', "foohost", 4000, gack},
|
|
||||||
{flu, 'fx', "foohost", 4000, [22]} ]),
|
|
||||||
{[],[_,_,_]} =
|
|
||||||
T([ {chain, 'cy', ["fx", "fy"], [foo,{bar,baz}]},
|
|
||||||
yoloyolo,
|
|
||||||
{chain, "cy", ["fx", 27], oops,arity,way,way,way,too,big,x}
|
|
||||||
]).
|
|
||||||
|
|
||||||
ast_run_test() ->
|
|
||||||
PortBase = 20300,
|
|
||||||
R1 = [
|
|
||||||
{host, "localhost", "localhost", "localhost", []},
|
|
||||||
{flu, 'f0', "localhost", PortBase+0, []},
|
|
||||||
{flu, 'f1', "localhost", PortBase+1, []},
|
|
||||||
{chain, 'ca', ['f0'], []},
|
|
||||||
{chain, 'cb', ['f1'], []},
|
|
||||||
switch_old_and_new,
|
|
||||||
{flu, 'f2', "localhost", PortBase+2, []},
|
|
||||||
{flu, 'f3', "localhost", PortBase+3, []},
|
|
||||||
{flu, 'f4', "localhost", PortBase+4, []},
|
|
||||||
{chain, 'ca', ['f0', 'f2'], []},
|
|
||||||
{chain, 'cc', ['f3', 'f4'], []}
|
|
||||||
],
|
|
||||||
|
|
||||||
{ok, Env1} = machi_lifecycle_mgr:run_ast(R1),
|
|
||||||
%% Uncomment to examine the Env trees.
|
|
||||||
%% Y1 = {lists:sort(gb_trees:to_list(element(1, Env1))),
|
|
||||||
%% lists:sort(gb_trees:to_list(element(2, Env1))),
|
|
||||||
%% element(3, Env1)},
|
|
||||||
%% io:format(user, "\nY1 ~p\n", [Y1]),
|
|
||||||
|
|
||||||
Negative_after_R1 =
|
|
||||||
[
|
|
||||||
{host, "localhost", "foo", "foo", []}, % dupe host
|
|
||||||
{flu, 'f1', "other", PortBase+9999999, []}, % bogus port # (syntax)
|
|
||||||
{flu, 'f1', "other", PortBase+888, []}, % dupe flu name
|
|
||||||
{flu, 'f7', "localhost", PortBase+1, []}, % dupe host+port
|
|
||||||
{chain, 'ca', ['f7'], []}, % unknown flu
|
|
||||||
{chain, 'cc', ['f0'], []}, % flu previously assigned
|
|
||||||
{chain, 'ca', cp_mode, ['f0', 'f1', 'f2'], [], []} % mode change
|
|
||||||
],
|
|
||||||
[begin
|
|
||||||
%% io:format(user, "dbg: Neg ~p\n", [Neg]),
|
|
||||||
{error, _} = machi_lifecycle_mgr:run_ast(R1 ++ [Neg])
|
|
||||||
end || Neg <- Negative_after_R1],
|
|
||||||
|
|
||||||
%% The 'run' phase doesn't blow smoke. What about 'diff'?
|
|
||||||
{X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, "localhost"),
|
|
||||||
%% There's only one host, "localhost", so 'all' should be exactly equal.
|
|
||||||
{X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, all),
|
|
||||||
%% io:format(user, "X1b: ~p\n", [X1b]),
|
|
||||||
|
|
||||||
%% Append to the R1 scenario: for chain cc: add f5, remove f4
|
|
||||||
%% Expect: see pattern matching below on X2b.
|
|
||||||
R2 = (R1 -- [switch_old_and_new]) ++
|
|
||||||
[switch_old_and_new,
|
|
||||||
{flu, 'f5', "localhost", PortBase+5, []},
|
|
||||||
{chain, 'cc', ['f3','f5'], []}],
|
|
||||||
{ok, Env2} = machi_lifecycle_mgr:run_ast(R2),
|
|
||||||
{_X2a, X2b} = machi_lifecycle_mgr:diff_env(Env2, "localhost"),
|
|
||||||
%% io:format(user, "X2b: ~p\n", [X2b]),
|
|
||||||
F5_port = PortBase+5,
|
|
||||||
[#p_srvr{name='f5',address="localhost",port=F5_port},
|
|
||||||
#chain_def_v1{name='cc',
|
|
||||||
full=[#p_srvr{name='f3'},#p_srvr{name='f5'}], witnesses=[],
|
|
||||||
old_full=[f3,f4], old_witnesses=[],
|
|
||||||
local_run=[f5], local_stop=[f4]}] = X2b,
|
|
||||||
|
|
||||||
ok.
|
|
||||||
|
|
||||||
ast_then_apply_test_() ->
|
|
||||||
{timeout, 60, fun() -> ast_then_apply_test2() end}.
|
|
||||||
|
|
||||||
ast_then_apply_test2() ->
|
|
||||||
YoCleanup = setup(),
|
|
||||||
try
|
|
||||||
PortBase = 20400,
|
|
||||||
NumChains = 4,
|
|
||||||
ChainLen = 3,
|
|
||||||
FLU_num = NumChains * ChainLen,
|
|
||||||
FLU_defs = [{flu, list_to_atom("f"++integer_to_list(X)),
|
|
||||||
"localhost", PortBase+X, []} || X <- lists:seq(1,FLU_num)],
|
|
||||||
FLU_names = [FLU || {flu,FLU,_,_,_} <- FLU_defs],
|
|
||||||
Ch_defs = [{chain, list_to_atom("c"++integer_to_list(X)),
|
|
||||||
lists:sublist(FLU_names, X, 3),
|
|
||||||
[]} || X <- lists:seq(1, FLU_num, 3)],
|
|
||||||
|
|
||||||
R1 = [switch_old_and_new,
|
|
||||||
{host, "localhost", "localhost", "localhost", []}]
|
|
||||||
++ FLU_defs ++ Ch_defs,
|
|
||||||
{ok, Env1} = machi_lifecycle_mgr:run_ast(R1),
|
|
||||||
{_X1a, X1b} = machi_lifecycle_mgr:diff_env(Env1, "localhost"),
|
|
||||||
%% io:format(user, "X1b ~p\n", [X1b]),
|
|
||||||
[machi_lifecycle_mgr:make_pending_config(X) || X <- X1b],
|
|
||||||
{PassFLUs, PassChains} = machi_lifecycle_mgr:process_pending(),
|
|
||||||
true = (length(PassFLUs) == length(FLU_defs)),
|
|
||||||
true = (length(PassChains) == length(Ch_defs)),
|
|
||||||
|
|
||||||
%% Kick the chain managers into doing something useful right now.
|
|
||||||
Pstores = [list_to_atom(atom_to_list(X) ++ "_pstore") || X <- FLU_names],
|
|
||||||
Fits = [list_to_atom(atom_to_list(X) ++ "_fitness") || X <- FLU_names],
|
|
||||||
ChMgrs = [list_to_atom(atom_to_list(X) ++ "_chmgr") || X <- FLU_names],
|
|
||||||
Advance = machi_chain_manager1_test:make_advance_fun(
|
|
||||||
Fits, FLU_names, ChMgrs, 3),
|
|
||||||
Advance(),
|
|
||||||
|
|
||||||
%% Sanity check: everyone is configured properly.
|
|
||||||
[begin
|
|
||||||
{ok, #projection_v1{epoch_number=Epoch, all_members=All,
|
|
||||||
chain_name=ChainName, upi=UPI}} =
|
|
||||||
machi_projection_store:read_latest_projection(PStore, private),
|
|
||||||
%% io:format(user, "~p: epoch ~p all ~p\n", [PStore, Epoch, All]),
|
|
||||||
true = Epoch > 0,
|
|
||||||
ChainLen = length(All),
|
|
||||||
true = (length(UPI) > 0),
|
|
||||||
{chain, _, Full, []} = lists:keyfind(ChainName, 2, Ch_defs),
|
|
||||||
true = lists:sort(Full) == lists:sort(All)
|
|
||||||
end || PStore <- Pstores],
|
|
||||||
|
|
||||||
ok
|
|
||||||
after
|
|
||||||
cleanup(YoCleanup)
|
|
||||||
end.
|
|
||||||
|
|
||||||
-endif. % !PULSE
|
|
||||||
-endif. % TEST
|
|
|
@ -1,200 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_merkle_tree_test).
|
|
||||||
-compile([export_all]).
|
|
||||||
|
|
||||||
-include("machi_merkle_tree.hrl").
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
-include_lib("kernel/include/file.hrl").
|
|
||||||
|
|
||||||
-define(GAP_CHANCE, 0.10).
|
|
||||||
|
|
||||||
%% unit tests
|
|
||||||
basic_test() ->
|
|
||||||
random:seed(os:timestamp()),
|
|
||||||
Fsz = choose_size() * 1024,
|
|
||||||
Filesize = max(Fsz, 10*1024*1024),
|
|
||||||
ChunkSize = max(1048576, Filesize div 100),
|
|
||||||
N = make_leaf_nodes(Filesize),
|
|
||||||
D0 = #naive{ leaves = N, chunk_size = ChunkSize, recalc = true },
|
|
||||||
T1 = machi_merkle_tree:build_tree(D0),
|
|
||||||
|
|
||||||
D1 = #naive{ leaves = tl(N), chunk_size = ChunkSize, recalc = true },
|
|
||||||
T2 = machi_merkle_tree:build_tree(D1),
|
|
||||||
|
|
||||||
?assertNotEqual(T1#naive.root, T2#naive.root),
|
|
||||||
?assertEqual(true, length(machi_merkle_tree:naive_diff(T1, T2)) == 1
|
|
||||||
orelse
|
|
||||||
Filesize > ChunkSize).
|
|
||||||
|
|
||||||
|
|
||||||
make_leaf_nodes(Filesize) ->
|
|
||||||
lists:reverse(
|
|
||||||
lists:foldl(fun(T, Acc) -> machi_merkle_tree:update_acc(T, Acc) end,
|
|
||||||
[],
|
|
||||||
generate_offsets(Filesize, 1024, []))
|
|
||||||
).
|
|
||||||
|
|
||||||
choose_int(Factor) ->
|
|
||||||
random:uniform(1024*Factor).
|
|
||||||
|
|
||||||
small_int() ->
|
|
||||||
choose_int(10).
|
|
||||||
|
|
||||||
medium_int() ->
|
|
||||||
choose_int(1024).
|
|
||||||
|
|
||||||
large_int() ->
|
|
||||||
choose_int(4096).
|
|
||||||
|
|
||||||
generate_offsets(Filesize, Current, Acc) when Current < Filesize ->
|
|
||||||
Length0 = choose_size(),
|
|
||||||
|
|
||||||
Length = case Length0 + Current > Filesize of
|
|
||||||
false -> Length0;
|
|
||||||
true -> Filesize - Current
|
|
||||||
end,
|
|
||||||
Data = term_to_binary(os:timestamp()),
|
|
||||||
Checksum = machi_util:make_tagged_csum(client_sha, machi_util:checksum_chunk(Data)),
|
|
||||||
Gap = maybe_gap(random:uniform()),
|
|
||||||
generate_offsets(Filesize, Current + Length + Gap, [ {Current, Length, Checksum} | Acc ]);
|
|
||||||
generate_offsets(_Filesize, _Current, Acc) ->
|
|
||||||
lists:reverse(Acc).
|
|
||||||
|
|
||||||
|
|
||||||
random_from_list(L) ->
|
|
||||||
N = random:uniform(length(L)),
|
|
||||||
lists:nth(N, L).
|
|
||||||
|
|
||||||
choose_size() ->
|
|
||||||
F = random_from_list([fun small_int/0, fun medium_int/0, fun large_int/0]),
|
|
||||||
F().
|
|
||||||
|
|
||||||
maybe_gap(Chance) when Chance < ?GAP_CHANCE ->
|
|
||||||
choose_size();
|
|
||||||
maybe_gap(_) -> 0.
|
|
||||||
|
|
||||||
%% Define or remove these ifdefs if benchmarking is desired.
|
|
||||||
-ifdef(BENCH).
|
|
||||||
generate_offsets(FH, Filesize, Current, Acc) when Current < Filesize ->
|
|
||||||
Length0 = choose_size(),
|
|
||||||
|
|
||||||
Length = case Length0 + Current > Filesize of
|
|
||||||
false -> Length0;
|
|
||||||
true -> Filesize - Current
|
|
||||||
end,
|
|
||||||
{ok, Data} = file:pread(FH, Current, Length),
|
|
||||||
Checksum = machi_util:make_tagged_csum(client_sha, machi_util:checksum_chunk(Data)),
|
|
||||||
Gap = maybe_gap(random:uniform()),
|
|
||||||
generate_offsets(FH, Filesize, Current + Length + Gap, [ {Current, Length, Checksum} | Acc ]);
|
|
||||||
generate_offsets(_FH, _Filesize, _Current, Acc) ->
|
|
||||||
lists:reverse(Acc).
|
|
||||||
|
|
||||||
make_offsets_from_file(Filename) ->
|
|
||||||
{ok, Info} = file:read_file_info(Filename),
|
|
||||||
Filesize = Info#file_info.size,
|
|
||||||
{ok, FH} = file:open(Filename, [read, raw, binary]),
|
|
||||||
Offsets = generate_offsets(FH, Filesize, 1024, []),
|
|
||||||
file:close(FH),
|
|
||||||
Offsets.
|
|
||||||
|
|
||||||
choose_filename() ->
|
|
||||||
random_from_list([
|
|
||||||
"def^c5ea7511-d649-47d6-a8c3-2b619379c237^1",
|
|
||||||
"jkl^b077eff7-b2be-4773-a73f-fea4acb8a732^1",
|
|
||||||
"stu^553fa47a-157c-4fac-b10f-2252c7d8c37a^1",
|
|
||||||
"vwx^ae015d68-7689-4c9f-9677-926c6664f513^1",
|
|
||||||
"yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1"
|
|
||||||
]).
|
|
||||||
|
|
||||||
|
|
||||||
make_csum_file(DataDir, Filename, Offsets) ->
|
|
||||||
Path = machi_util:make_checksum_filename(DataDir, Filename),
|
|
||||||
filelib:ensure_dir(Path),
|
|
||||||
{ok, MC} = machi_csum_table:open(Path, []),
|
|
||||||
lists:foreach(fun({Offset, Size, Checksum}) ->
|
|
||||||
machi_csum_table:write(MC, Offset, Size, Checksum) end,
|
|
||||||
Offsets),
|
|
||||||
machi_csum_table:close(MC).
|
|
||||||
|
|
||||||
|
|
||||||
test() ->
|
|
||||||
test(100).
|
|
||||||
|
|
||||||
test(N) ->
|
|
||||||
{ok, F} = file:open("results.txt", [raw, write]),
|
|
||||||
lists:foreach(fun(X) -> format_and_store(F, run_test(X)) end, lists:seq(1, N)).
|
|
||||||
|
|
||||||
format_and_store(F, {OffsetNum, {MTime, MSize}, {NTime, NSize}}) ->
|
|
||||||
S = io_lib:format("~w\t~w\t~w\t~w\t~w\n", [OffsetNum, MTime, MSize, NTime, NSize]),
|
|
||||||
ok = file:write(F, S).
|
|
||||||
|
|
||||||
run_test(C) ->
|
|
||||||
random:seed(os:timestamp()),
|
|
||||||
OffsetFn = "test/" ++ choose_filename(),
|
|
||||||
O = make_offsets_from_file(OffsetFn),
|
|
||||||
Fn = "csum_" ++ integer_to_list(C),
|
|
||||||
make_csum_file(".", Fn, O),
|
|
||||||
|
|
||||||
Osize = length(O),
|
|
||||||
|
|
||||||
{MTime, {ok, M}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", merklet) end),
|
|
||||||
{NTime, {ok, N}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", naive) end),
|
|
||||||
|
|
||||||
?assertEqual(Fn, machi_merkle_tree:filename(M)),
|
|
||||||
?assertEqual(Fn, machi_merkle_tree:filename(N)),
|
|
||||||
|
|
||||||
MTree = machi_merkle_tree:tree(M),
|
|
||||||
MSize = byte_size(term_to_binary(MTree)),
|
|
||||||
|
|
||||||
NTree = machi_merkle_tree:tree(N),
|
|
||||||
NSize = byte_size(term_to_binary(NTree)),
|
|
||||||
|
|
||||||
?assertEqual(same, machi_merkle_tree:diff(N, N)),
|
|
||||||
?assertEqual(same, machi_merkle_tree:diff(M, M)),
|
|
||||||
{Osize, {MTime, MSize}, {NTime, NSize}}.
|
|
||||||
|
|
||||||
torture_test(C) ->
|
|
||||||
Results = [ run_torture_test() || _ <- lists:seq(1, C) ],
|
|
||||||
{ok, F} = file:open("torture_results.txt", [raw, write]),
|
|
||||||
lists:foreach(fun({MSize, MTime, NSize, NTime}) ->
|
|
||||||
file:write(F, io_lib:format("~p\t~p\t~p\t~p\n",
|
|
||||||
[MSize, MTime, NSize, NTime]))
|
|
||||||
end, Results),
|
|
||||||
ok = file:close(F).
|
|
||||||
|
|
||||||
run_torture_test() ->
|
|
||||||
{NTime, N} = timer:tc(fun() -> naive_torture() end),
|
|
||||||
|
|
||||||
MSize = byte_size(term_to_binary(M)),
|
|
||||||
NSize = byte_size(term_to_binary(N)),
|
|
||||||
|
|
||||||
{MSize, MTime, NSize, NTime}.
|
|
||||||
|
|
||||||
naive_torture() ->
|
|
||||||
N = lists:foldl(fun(T, Acc) -> machi_merkle_tree:update_acc(T, Acc) end, [], torture_generator()),
|
|
||||||
T = #naive{ leaves = lists:reverse(N), chunk_size = 10010, recalc = true },
|
|
||||||
machi_merkle_tree:build_tree(T).
|
|
||||||
|
|
||||||
torture_generator() ->
|
|
||||||
[ {O, 1, crypto:hash(sha, term_to_binary(now()))} || O <- lists:seq(1024, 1000000) ].
|
|
||||||
-endif. % BENCH
|
|
|
@ -24,7 +24,6 @@
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
-ifndef(PULSE).
|
-ifndef(PULSE).
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
-include("machi_pb.hrl").
|
-include("machi_pb.hrl").
|
||||||
-include("machi_projection.hrl").
|
-include("machi_projection.hrl").
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
@ -35,15 +34,20 @@ smoke_test_() ->
|
||||||
{timeout, 5*60, fun() -> smoke_test2() end}.
|
{timeout, 5*60, fun() -> smoke_test2() end}.
|
||||||
|
|
||||||
smoke_test2() ->
|
smoke_test2() ->
|
||||||
PortBase = 5720,
|
Port = 5720,
|
||||||
ok = application:set_env(machi, max_file_size, 1024*1024),
|
Ps = [#p_srvr{name=a, address="localhost", port=Port, props="./data.a"}
|
||||||
try
|
],
|
||||||
{Ps, MgrNames, Dirs} = machi_test_util:start_flu_packages(
|
|
||||||
1, PortBase, "./data.", []),
|
|
||||||
D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]),
|
D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]),
|
||||||
M0 = hd(MgrNames),
|
|
||||||
ok = machi_chain_manager1:set_chain_members(M0, D),
|
[os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps],
|
||||||
[machi_chain_manager1:trigger_react_to_env(M0) || _ <-lists:seq(1,5)],
|
{ok, SupPid} = machi_flu_sup:start_link(),
|
||||||
|
try
|
||||||
|
[begin
|
||||||
|
#p_srvr{name=Name, port=Port, props=Dir} = P,
|
||||||
|
{ok, _} = machi_flu_psup:start_flu_package(Name, Port, Dir, [])
|
||||||
|
end || P <- Ps],
|
||||||
|
ok = machi_chain_manager1:set_chain_members(a_chmgr, D),
|
||||||
|
[machi_chain_manager1:trigger_react_to_env(a_chmgr) || _ <-lists:seq(1,5)],
|
||||||
|
|
||||||
{ok, Clnt} = ?C:start_link(Ps),
|
{ok, Clnt} = ?C:start_link(Ps),
|
||||||
try
|
try
|
||||||
|
@ -56,18 +60,16 @@ smoke_test2() ->
|
||||||
%% a separate test module? Or separate test func?
|
%% a separate test module? Or separate test func?
|
||||||
{error, _} = ?C:auth(Clnt, "foo", "bar"),
|
{error, _} = ?C:auth(Clnt, "foo", "bar"),
|
||||||
|
|
||||||
|
PK = <<>>,
|
||||||
Prefix = <<"prefix">>,
|
Prefix = <<"prefix">>,
|
||||||
Chunk1 = <<"Hello, chunk!">>,
|
Chunk1 = <<"Hello, chunk!">>,
|
||||||
NS = "",
|
|
||||||
NoCSum = <<>>,
|
|
||||||
Opts1 = #append_opts{},
|
|
||||||
{ok, {Off1, Size1, File1}} =
|
{ok, {Off1, Size1, File1}} =
|
||||||
?C:append_chunk(Clnt, NS, Prefix, Chunk1, NoCSum, Opts1),
|
?C:append_chunk(Clnt, PK, Prefix, Chunk1, none, 0),
|
||||||
true = is_binary(File1),
|
true = is_binary(File1),
|
||||||
Chunk2 = "It's another chunk",
|
Chunk2 = "It's another chunk",
|
||||||
CSum2 = {client_sha, machi_util:checksum_chunk(Chunk2)},
|
CSum2 = {client_sha, machi_util:checksum_chunk(Chunk2)},
|
||||||
{ok, {Off2, Size2, File2}} =
|
{ok, {Off2, Size2, File2}} =
|
||||||
?C:append_chunk(Clnt, NS, Prefix, Chunk2, CSum2, Opts1),
|
?C:append_chunk(Clnt, PK, Prefix, Chunk2, CSum2, 1024),
|
||||||
Chunk3 = ["This is a ", <<"test,">>, 32, [["Hello, world!"]]],
|
Chunk3 = ["This is a ", <<"test,">>, 32, [["Hello, world!"]]],
|
||||||
File3 = File2,
|
File3 = File2,
|
||||||
Off3 = Off2 + iolist_size(Chunk2),
|
Off3 = Off2 + iolist_size(Chunk2),
|
||||||
|
@ -78,9 +80,7 @@ smoke_test2() ->
|
||||||
{iolist_to_binary(Chunk2), File2, Off2, Size2},
|
{iolist_to_binary(Chunk2), File2, Off2, Size2},
|
||||||
{iolist_to_binary(Chunk3), File3, Off3, Size3}],
|
{iolist_to_binary(Chunk3), File3, Off3, Size3}],
|
||||||
[begin
|
[begin
|
||||||
File = Fl,
|
{ok, Ch} = ?C:read_chunk(Clnt, Fl, Off, Sz)
|
||||||
?assertMatch({ok, {[{File, Off, Ch, _}], []}},
|
|
||||||
?C:read_chunk(Clnt, Fl, Off, Sz, undefined))
|
|
||||||
end || {Ch, Fl, Off, Sz} <- Reads],
|
end || {Ch, Fl, Off, Sz} <- Reads],
|
||||||
|
|
||||||
{ok, KludgeBin} = ?C:checksum_list(Clnt, File1),
|
{ok, KludgeBin} = ?C:checksum_list(Clnt, File1),
|
||||||
|
@ -88,56 +88,15 @@ smoke_test2() ->
|
||||||
{ok, [{File1Size,File1}]} = ?C:list_files(Clnt),
|
{ok, [{File1Size,File1}]} = ?C:list_files(Clnt),
|
||||||
true = is_integer(File1Size),
|
true = is_integer(File1Size),
|
||||||
|
|
||||||
File1Bin = binary_to_list(File1),
|
|
||||||
[begin
|
|
||||||
#p_srvr{name=Name, props=Props} = P,
|
|
||||||
Dir = proplists:get_value(data_dir, Props),
|
|
||||||
?assertEqual({ok, [File1Bin]},
|
|
||||||
file:list_dir(filename:join([Dir, "data"]))),
|
|
||||||
FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]),
|
|
||||||
{ok, Plist} = machi_plist:open(FileListFileName, []),
|
|
||||||
?assertEqual([], machi_plist:all(Plist))
|
|
||||||
end || P <- Ps],
|
|
||||||
|
|
||||||
[begin
|
|
||||||
ok = ?C:trim_chunk(Clnt, Fl, Off, Sz)
|
|
||||||
end || {_Ch, Fl, Off, Sz} <- Reads],
|
|
||||||
[begin
|
|
||||||
{ok, {[], Trimmed}} =
|
|
||||||
?C:read_chunk(Clnt, Fl, Off, Sz, #read_opts{needs_trimmed=true}),
|
|
||||||
Filename = Fl,
|
|
||||||
?assertEqual([{Filename, Off, Sz}], Trimmed)
|
|
||||||
end || {_Ch, Fl, Off, Sz} <- Reads],
|
|
||||||
|
|
||||||
LargeBytes = binary:copy(<<"x">>, 1024*1024),
|
|
||||||
LBCsum = {client_sha, machi_util:checksum_chunk(LargeBytes)},
|
|
||||||
{ok, {Offx, Sizex, Filex}} =
|
|
||||||
?C:append_chunk(Clnt, NS,
|
|
||||||
Prefix, LargeBytes, LBCsum, Opts1),
|
|
||||||
ok = ?C:trim_chunk(Clnt, Filex, Offx, Sizex),
|
|
||||||
|
|
||||||
%% Make sure everything was trimmed
|
|
||||||
File = binary_to_list(Filex),
|
|
||||||
[begin
|
|
||||||
#p_srvr{name=Name, props=Props} = P,
|
|
||||||
Dir = proplists:get_value(data_dir, Props),
|
|
||||||
?assertEqual({ok, []},
|
|
||||||
file:list_dir(filename:join([Dir, "data"]))),
|
|
||||||
FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]),
|
|
||||||
{ok, Plist} = machi_plist:open(FileListFileName, []),
|
|
||||||
?assertEqual([File], machi_plist:all(Plist))
|
|
||||||
end || P <- Ps],
|
|
||||||
|
|
||||||
[begin
|
|
||||||
{error, trimmed} =
|
|
||||||
?C:read_chunk(Clnt, Fl, Off, Sz, undefined)
|
|
||||||
end || {_Ch, Fl, Off, Sz} <- Reads],
|
|
||||||
ok
|
ok
|
||||||
after
|
after
|
||||||
(catch ?C:quit(Clnt))
|
(catch ?C:quit(Clnt))
|
||||||
end
|
end
|
||||||
after
|
after
|
||||||
machi_test_util:stop_flu_packages()
|
exit(SupPid, normal),
|
||||||
|
[os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps],
|
||||||
|
machi_util:wait_for_death(SupPid, 100),
|
||||||
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-endif. % !PULSE
|
-endif. % !PULSE
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
-module(machi_plist_test).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
open_close_test() ->
|
|
||||||
FileName = "bark-bark-one",
|
|
||||||
file:delete(FileName),
|
|
||||||
{ok, PList0} = machi_plist:open(FileName, []),
|
|
||||||
{ok, PList1} = machi_plist:add(PList0, "boomar"),
|
|
||||||
?assertEqual(["boomar"], machi_plist:all(PList1)),
|
|
||||||
ok = machi_plist:close(PList1),
|
|
||||||
|
|
||||||
{ok, PList2} = machi_plist:open(FileName, []),
|
|
||||||
?assertEqual(["boomar"], machi_plist:all(PList2)),
|
|
||||||
ok = machi_plist:close(PList2),
|
|
||||||
file:delete(FileName),
|
|
||||||
ok.
|
|
|
@ -1,65 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(machi_projection_store_test).
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-ifndef(PULSE).
|
|
||||||
|
|
||||||
-compile(export_all).
|
|
||||||
-define(PS, machi_projection_store).
|
|
||||||
|
|
||||||
-include("machi_projection.hrl").
|
|
||||||
|
|
||||||
smoke_test() ->
|
|
||||||
PortBase = 64820,
|
|
||||||
Dir = "./data.a",
|
|
||||||
Os = [{ignore_stability_time, true}, {active_mode, false}],
|
|
||||||
os:cmd("rm -rf " ++ Dir),
|
|
||||||
machi_test_util:start_flu_package(a, PortBase, "./data.a", Os),
|
|
||||||
|
|
||||||
try
|
|
||||||
P1 = machi_projection:new(1, a, [], [], [], [], []),
|
|
||||||
ok = ?PS:write(a_pstore, public, P1),
|
|
||||||
{error, written} = ?PS:write(a_pstore, public, P1),
|
|
||||||
|
|
||||||
Pbad = P1#projection_v1{epoch_number=99238}, % break checksum
|
|
||||||
{error, bad_arg} = ?PS:write(a_pstore, public, Pbad),
|
|
||||||
|
|
||||||
ok = ?PS:write(a_pstore, private, P1),
|
|
||||||
P1a = machi_projection:update_checksum(P1#projection_v1{dbg=[diff_yo]}),
|
|
||||||
{error, written} = ?PS:write(a_pstore, private, P1a),
|
|
||||||
|
|
||||||
P1b = P1#projection_v1{dbg2=[version_b]},
|
|
||||||
ok = ?PS:write(a_pstore, private, P1b),
|
|
||||||
P1c = P1#projection_v1{dbg2=[version_c]},
|
|
||||||
ok = ?PS:write(a_pstore, private, P1c),
|
|
||||||
{error, written} = ?PS:write(a_pstore, private, P1a),
|
|
||||||
|
|
||||||
ok = ?PS:set_consistency_mode(a_pstore, ap_mode),
|
|
||||||
ok = ?PS:set_consistency_mode(a_pstore, cp_mode),
|
|
||||||
|
|
||||||
ok
|
|
||||||
after
|
|
||||||
machi_test_util:stop_flu_package()
|
|
||||||
end.
|
|
||||||
|
|
||||||
-endif. % !PULSE
|
|
||||||
-endif. % TEST
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue