Merge branch 'merge/tango-prototype'

This commit is contained in:
Scott Lystig Fritchie 2015-03-02 20:07:25 +09:00
commit 370f70303d
33 changed files with 5279 additions and 0 deletions

7
prototype/tango/.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
.eunit
.eqc-info
current_counterexample.eqc
deps
ebin/*.beam
ebin/*.app
erl_crash.dump

178
prototype/tango/LICENSE Normal file
View file

@ -0,0 +1,178 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

39
prototype/tango/Makefile Normal file
View file

@ -0,0 +1,39 @@
REBAR_BIN := $(shell which rebar)
ifeq ($(REBAR_BIN),)
REBAR_BIN = ./rebar
endif
.PHONY: rel deps package pkgclean
all: deps compile
compile:
$(REBAR_BIN) compile
deps:
$(REBAR_BIN) get-deps
clean:
$(REBAR_BIN) clean
test: deps compile eunit
eunit:
$(REBAR_BIN) -v skip_deps=true eunit
pulse: compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile
env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit
APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \
xmerl webtool snmp public_key mnesia eunit syntax_tools compiler
PLT = $(HOME)/.dbms_dialyzer_plt
build_plt: deps compile
dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin
dialyzer: deps compile
dialyzer -Wno_return --plt $(PLT) ebin
clean_plt:
rm $(PLT)

33
prototype/tango/README.md Normal file
View file

@ -0,0 +1,33 @@
Tango prototype TODO list
=========================
__ The current checkpoint implementation is fundamentally broken and
needs a rewrite, or else.
This issue is not mentioned at all in the Tango paper.
option 1: fix checkpoint to be 100% correct
option 2: checkpointing is for the weak and the memory-constrained, so
don't bother. Instead, rip out the current checkpoint code,
period.
option 3: other
xx Checkpoint fix option #1: history splicing within the same OID?
xx Checkpoint fix option #2: checkpoint to a new OID, history writes to both
OIDs during the CP, then a marker in the old OID
to switch over to the new OID?
History splicing has a flaw that I belive just won't work. The switch to a
new OID has problems with updates written to the old OID before and before the
new checkpoint has finished.
I believe that a checkpoint where:
* all Tango writes, checkpoint and non-checkpoint alike, are noted with
a checkpoint number.
* that checkpoint number is strictly increasing
* a new checkpoint has a new checkpoint number
* scans ignore blocks with checkpoint numbers larger than the current
active checkpoint #, until the checkpoint is complete.
... ought to work correctly.

View file

@ -0,0 +1,191 @@
## CORFU papers
I recommend the "5 pages" paper below first, to give a flavor of
what the CORFU is about. When Scott first read the CORFU paper
back in 2011 (and the Hyder paper), he thought it was insanity.
He recommends waiting before judging quite so hastily. :-)
After that, then perhaps take a step back are skim over the
Hyder paper. Hyder started before CORFU, but since CORFU, the
Hyder folks at Microsoft have rewritten Hyder to use CORFU as
the shared log underneath it. But the Hyder paper has lots of
interesting bits about how you'd go about creating a distributed
DB where the transaction log *is* the DB.
### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log"
MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN
PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI,
University of California, San Diego, TED WOBBER, Microsoft Research
Silicon Valley
Long version of introduction to CORFU (~30 pages)
http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf
### "CORFU: A Shared Log Design for Flash Clusters"
Same authors as above
Short version of introduction to CORFU paper above (~12 pages)
http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf
### "From Paxos to CORFU: A Flash-Speed Shared Log"
Same authors as above
5 pages, a short summary of CORFU basics and some trial applications
that have been implemented on top of it.
http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf
### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware"
Wei, Davis, Wobber, Balakrishnan, Malkhi
Summary report of implmementing the CORFU server-side in
FPGA-style hardware. (~11 pages)
http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf
### "Tango: Distributed Data Structures over a Shared Log"
Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck
Describes a framework for developing data structures that reside
persistently within a CORFU log: the log *is* the database/data
structure store.
http://www.snookles.com/scottmp/corfu/Tango.pdf
### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service"
Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber
The ZooKeeper inter-server communication is replaced with CORFU.
Faster, fewer lines of code than ZK, and more features than the
original ZK code base.
http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf
### "Hyder A Transactional Record Manager for Shared Flash"
Bernstein, Reid, Das
Describes a distributed log-based DB system where the txn log is
treated quite oddly: a "txn intent" record is written to a
shared common log All participants read the shared log in
parallel and make commit/abort decisions in parallel, based on
what conflicts (or not) that they see in the log. Scott's first
reading was "No way, wacky" ... and has since changed his mind.
http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf
pages 9-20
## Fiddling with PULSE
Do the following:
make clean
make
make pulse
... then watch the dots go across the screen for 60 seconds. If you
wish, you can press `Control-c` to interrupt the test. We're really
interested in the build artifacts.
erl -pz .eunit deps/*/ebin
eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
This will run the PULSE test for 5 seconds. Feel free to adjust for
as many seconds as you wish.
Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
Eshell V5.10.3 (abort with ^G)
1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
Starting Quviq QuickCheck version 1.30.4
(compiled at {{2014,2,7},{9,19,50}})
Licence for Basho reserved until {{2014,2,17},{1,41,39}}
......................................................................................
OK, passed 86 tests
schedule: Count: 86 Min: 2 Max: 1974 Avg: 3.2e+2 Total: 27260
true
2>
REPL interactive work can be done via:
1. Edit code, e.g. `corfurl_pulse.erl`.
2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP`
to compile.
3. Reload any recompiled modules, e.g. `l(corfurl_pulse).`
4. Resume QuickCheck activities.
## Seeing an PULSE scheduler interleaving failure in action
1. Edit `corfurl_pulse:check_trace()` to uncomment the
use of `conjunction()` that mentions `bogus_order_check_do_not_use_me`
and comment out the real `conjunction()` call below it.
2. Recompile & reload.
3. Check.
For example:
9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())).
.........Failed! After 9 tests.
Sweet! The first tuple below are the first `?FORALL()` values,
and the 2nd is the list of commands,
`{SequentialCommands, ListofParallelCommandLists}`. The 3rd is the
seed used to perturb the PULSE scheduler.
In this case, `SequentialCommands` has two calls (to `setup()` then
`append()`) and there are two parallel procs: one makes 1 call
call to `append()` and the other makes 2 calls to `append()`.
{2,2,9}
{{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}],
[[{set,{var,3},
{call,corfurl_pulse,append,
[{var,1},<<231,149,226,203,10,105,54,223,147>>]}}],
[{set,{var,2},
{call,corfurl_pulse,append,
[{var,1},<<7,206,146,75,249,13,154,238,110>>]}},
{set,{var,4},
{call,corfurl_pulse,append,
[{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]},
{27492,46961,4884}}
Here are our results:
simple_result: passed
errors: passed
events: failed
identity: passed
bogus_order_check_do_not_use_me: failed
[{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
Our (bogus!) order expectation was violated. Shrinking!
simple_result: passed
errors: passed
events: failed
identity: passed
bogus_order_check_do_not_use_me: failed
[{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}]
Shrinking was able to remove two `append()` calls and to shrink the
size of the pages down from 9 bytes down to 1 byte.
Shrinking........(8 times)
{1,1,1}
{{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}],
[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],
[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]},
{27492,46961,4884}}
events: failed
bogus_order_check_do_not_use_me: failed
[{ok,2},{ok,1}] /= [{ok,1},{ok,2}]
false

View file

@ -0,0 +1,35 @@
msc {
client1, FLU1, FLU2, client2, client3;
client1 box client3 [label="Epoch #1: chain = FLU1 -> FLU2"];
client1 -> FLU1 [label="{write,epoch1,<<Page YYY>>}"];
client1 <- FLU1 [label="ok"];
client1 box client1 [label="Client crash", textcolour="red"];
FLU1 box FLU1 [label="FLU crash", textcolour="red"];
client1 box client3 [label="Epoch #2: chain = FLU2"];
client2 -> FLU2 [label="{write,epoch2,<<Page ZZZ>>}"];
client2 <- FLU2 [label="ok"];
client3 box client3 [label="Read repair starts", textbgcolour="aqua"];
client3 -> FLU2 [label="{read,epoch2}"];
client3 <- FLU2 [label="{ok,<<Page ZZZ>>}"];
client3 -> FLU1 [label="{write,epoch2,<<Page ZZZ>>}"];
FLU1 box FLU1 [label="What do we do here? Our current value is <<Page YYY>>.", textcolour="red"] ;
FLU1 box FLU1 [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ;
FLU1 box FLU1 [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ;
FLU1 -> client3 [label="I'm sorry, Dave, I cannot do that."];
FLU1 box FLU1 [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ;
client3 -> FLU1 [label="{write,epoch2,<<Page ZZZ>>,repair,witnesses=[FLU2]}", textbgcolour="silver"];
FLU1 box FLU1 [label="Start an async process to ask the witness list to corroborate this repair."];
FLU1 -> FLU2 [label="{read,epoch2}", textbgcolour="aqua"];
FLU1 <- FLU2 [label="{ok,<<Page ZZ>>}", textbgcolour="aqua"];
FLU1 box FLU1 [label="Overwrite local storage with repair page.", textbgcolour="silver"];
client3 <- FLU1 [label="Async proc replies: ok", textbgcolour="silver"];
}

View file

@ -0,0 +1,92 @@
## read-repair-race.1.
First attempt at using "mscgen" to make some Message Sequence
Chart (MSC) for a race found at commit 087c2605ab.
## read-repair-race.2.
Second attempt. This is almost exactly the trace that is
generated by this failing test case at commit 087c2605ab:
C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]].
eqc:check(corfurl_pulse:prop_pulse(), C2).
## read-repair-race.2b.*
Same basic condition as read-repair-race.2, but edited
substantially to make it clearer what is happening.
Also for commit 087c2605ab.
I believe that I have a fix for the silver-colored
`error-overwritten` ... and it was indeed added to the code soon
afterward, but it turns out that it doesn't solve the entire problem
of "two clients try to write the exact same data at the same time to
the same LPN".
## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN"
This situation is something that CORFU cannot protect against, IMO.
I have been struggling for a while, to try to find a way for CORFU
clients to know *always* when there is a conflict with another
writer. It usually works: the basic nature of write-once registers is
very powerful. However, in the case where two clients are trying to
write the same page data to the same LPN, it looks impossible to
resolve.
How do you tell the difference between:
1. A race between a client A writing page P at address LPN and
read-repair fixing P. P *is* A's data and no other's, so this race
doesn't confuse anyone.
1. A race between a client A writing page P at address LPN and client
B writing the exact same page data P at the same LPN.
A's page P = B's page P, but clients A & B don't know that.
If CORFU tells both A & B that they were successful, A & B assume
that the CORFU log has two new pages appended to it, but in truth
only one new page was appended.
If we try to solve this by always avoiding the same LPN address
conflict, we are deluding ourselves. If we assume that the sequencer
is 100% correct in that it never assigns the same LPN twice, and if we
assume that a client must never write a block without an assignment
from the sequencer, then the problem is solved. But the problem has a
_heavy_ price: the log is only available when the sequencer is
available, and only when never more than one sequencer running at a
time.
The CORFU base system promises correct operation, even if:
* Zero sequencers are running, and clients might choose the same LPN
to write to.
* Two more more sequencers are running, and different sequencers
assign the same LPN to two different clients.
But CORFU's "correct" behavior does not include detecting the same
page at the same LPN. The papers don't specifically say it, alas.
But IMO it's impossible to guarantee, so all docs ought to explicitly
say that it's impossible and that clients must not assume it.
See also
* two-clients-race.1.png
## A scenario of chain repair & write-once registers
See:
* 2014-02-27.chain-repair-write-twice.png
... for a scenario where write-once registers that are truly only
write-once-ever-for-the-rest-of-the-future are "inconvenient" when it
comes to chain repair. Client 3 is attempting to do chain repair ops,
bringing FLU1 back into sync with FLU2.
The diagram proposes one possible idea for making overwriting a
read-once register a bit safer: ask another node in the chain to
verify that the page you've been asked to repair is exactly the same
as that other FLU's page.

View file

@ -0,0 +1,49 @@
msc {
"<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"];
"<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
"<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ;
"<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"];
"<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
"<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ;
"<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ;
"<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ;
"<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
"<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ;
"<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ;
"<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"];
"<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
"<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ;
"<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ;
"<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ;
"<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ;
"<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ;
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ;
"<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ;
"<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ;
"<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ;
"<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ;
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ;
"<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ;
"<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
"<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ;
"<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
"<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ;
"<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ;
"<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ;
}

View file

@ -0,0 +1,60 @@
msc {
"<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
"<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
"<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
"<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
"<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
"<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
"<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
"<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ;
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
"<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
"<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"];
}

View file

@ -0,0 +1,57 @@
msc {
"<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"];
"<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"],
"<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"],
"<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"];
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
"<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
"<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ;
# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ;
# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ;
"<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ;
"<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ;
"<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ;
"<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ;
"<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ;
"<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ;
"<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ;
"<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ;
"<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"];
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ;
"<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ;
"<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ;
"<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ;
"<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"];
}

View file

@ -0,0 +1,33 @@
msc {
client1, FLU1, FLU2, client2, client3;
client1 -> FLU1 [label="{write,epoch1,<<Not unique page>>}"];
client1 <- FLU1 [label="ok"];
client3 -> FLU2 [label="{seal,epoch1}"];
client3 <- FLU2 [label="{ok,...}"];
client3 -> FLU1 [label="{seal,epoch1}"];
client3 <- FLU1 [label="{ok,...}"];
client2 -> FLU1 [label="{write,epoch1,<<Not unique page>>}"];
client2 <- FLU1 [label="error_epoch"];
client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"];
client2 -> FLU1 [label="{write,epoch2,<<Not unique page>>}"];
client2 <- FLU1 [label="error_overwritten"];
client1 -> FLU2 [label="{write,epoch1,<<Not unique page>>}"];
client1 <- FLU2 [label="error_epoch"];
client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"];
client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ;
client3 -> FLU1 [label="{read,epoch2}"];
client3 <- FLU1 [label="{ok,<<Not unique page>>}"];
client3 -> FLU2 [label="{write,epoch2,<<Not unique page>>}"];
client3 <- FLU2 [label="ok"];
client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ;
client3 abox client3 [ label = "We saw <<Not unique page>>", textbgcolour="silver"] ;
client1 -> FLU2 [label="{write,epoch2,<<Not unique page>>}"];
client1 <- FLU2 [label="error_overwritten"];
}

View file

@ -0,0 +1,47 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-type flu_name() :: atom().
-type flu() :: pid() | flu_name().
-type flu_chain() :: [flu()].
-type seq_name() :: {'undefined' | pid(), atom(), atom()}.
-record(range, {
pn_start :: non_neg_integer(), % start page number
pn_end :: non_neg_integer(), % end page number
%% chains :: [flu_chain()]
chains :: tuple()
}).
-record(proj, { % Projection
dir :: string(),
page_size :: non_neg_integer(),
epoch :: non_neg_integer(),
seq :: 'undefined' | seq_name(),
r :: [#range{}]
}).
%% 1 byte @ offset 0: 0=unwritten, 1=written, 2=trimmed, 255=corrupt? TODO
%% 8 bytes @ offset 1: logical page number
%% P bytes @ offset 9: page data
%% 1 byte @ offset 9+P: 0=unwritten, 1=written
-define(PAGE_OVERHEAD, (1 + 8 + 1)).

View file

@ -0,0 +1,55 @@
PulseBuild = case os:getenv("USE_PULSE") of
false ->
false;
_ ->
true
end,
case PulseBuild of
true ->
PulseOpts =
[{pulse_no_side_effect,
[{erlang,display,1}
]},
{pulse_side_effect,
[ {corfurl_sequencer, get, '_'}
, {corfurl_flu, write, '_'}
, {corfurl_flu, read, '_'}
, {corfurl_flu, seal, '_'}
, {corfurl_flu, trim, '_'}
, {corfurl_flu, fill, '_'}
, {corfurl, read_projection, '_'}
, {corfurl, save_projection, '_'}
, {prim_file, '_', '_'}
, {file, '_', '_'}
, {filelib, '_', '_'}
, {os, '_', '_'} ]},
{pulse_replace_module,
[ {gen_server, pulse_gen_server}
, {application, pulse_application}
, {supervisor, pulse_supervisor} ]}
],
PulseCFlags = [{"CFLAGS", "$CFLAGS -DPULSE"}],
UpdConfig = case lists:keysearch(eunit_compile_opts, 1, CONFIG) of
{value, {eunit_compile_opts, Opts}} ->
lists:keyreplace(eunit_compile_opts,
1,
CONFIG,
{eunit_compile_opts, Opts ++ PulseOpts});
_ ->
[{eunit_compile_opts, PulseOpts} | CONFIG]
end,
case lists:keysearch(port_env, 1, UpdConfig) of
{value, {port_env, PortEnv}} ->
lists:keyreplace(port_env,
1,
UpdConfig,
{port_env, PortEnv ++ PulseCFlags});
_ ->
[{port_env, PulseCFlags} | UpdConfig]
end;
false ->
CONFIG
end.

View file

@ -0,0 +1,376 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl).
-export([new_simple_projection/5,
new_range/3,
read_projection/2,
save_projection/2,
latest_projection_epoch_number/1]).
-export([write_page/3, read_page/2, scan_forward/3,
fill_page/2, trim_page/2]).
-export([simple_test_setup/5]).
-include("corfurl.hrl").
-ifdef(TEST).
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
-endif.
%%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
-define(EVENT_LOG(X), ok).
%%% -define(EVENT_LOG(X), event_logger:event(X)).
write_page(#proj{epoch=Epoch} = P, LPN, Page) ->
Chain = project_to_chain(LPN, P),
write_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1).
write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth) ->
write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth, ok).
write_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) ->
Reply;
write_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) ->
case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of
ok ->
write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
error_badepoch ->
%% TODO: Interesting case: there may be cases where retrying with
%% a new epoch & that epoch's projection is just fine (and
%% we'll succeed) and cases where retrying will fail.
%% Figure out what those cases are, then for the
%% destined-to-fail case, try to clean up (via trim?)?
error_badepoch;
error_trimmed when Nth == 1 ->
%% Whoa, partner, you're movin' kinda fast for a trim.
%% This might've been due to us being too slow and someone
%% else junked us.
error_trimmed;
error_trimmed when Nth > 1 ->
%% We're racing with a trimmer. We won the race at head,
%% but here in the middle or tail (Nth > 1), we lost.
%% Our strategy is keep racing down to the tail.
%% If we continue to lose the exact same race for the rest
%% of the chain, the 1st clause of this func will return 'ok'.
%% That is *exactly* our intent and purpose!
write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
error_overwritten when Nth == 1 ->
%% The sequencer lied, or we didn't use the sequencer and
%% guessed and guessed poorly, or someone is accidentally
%% trying to take our page. Shouganai, these things happen.
error_overwritten;
error_overwritten when Nth > 1 ->
%% The likely cause is that another reader has noticed that
%% we haven't finished writing this page in this chain and
%% has repaired the remainder of the chain while we were
%% drinking coffee. Let's double-check.
case corfurl_flu:read(flu_pid(FLU), Epoch, LPN) of
{ok, AlreadyThere} when AlreadyThere =:= Page ->
%% Alright, well, let's go continue the repair/writing,
%% since we agree on the page's value.
write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
error_badepoch ->
%% TODO: same TODO as the above error_badepoch case.
error_badepoch;
error_trimmed ->
%% This is the same as 'error_trimmed when Nth > 1' above.
%% Do the same thing.
write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
Else ->
%% Can PULSE can drive us to this case?
giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth})
end
end.
read_page(#proj{epoch=Epoch} = P, LPN) ->
Chain = project_to_chain(LPN, P),
Tail = lists:last(Chain),
case corfurl_flu:read(flu_pid(Tail), Epoch, LPN) of
{ok, _} = OK ->
OK;
error_badepoch ->
error_badepoch;
error_trimmed ->
%% TODO: A sanity/should-never-happen check would be to
%% see if everyone else in the chain are also trimmed.
error_trimmed;
error_unwritten ->
%% TODO: During scan_forward(), this pestering of the upstream
%% nodes in the chain is possibly-excessive-work.
%% For now, we'll assume that we always want to repair.
read_repair_chain(Epoch, LPN, Chain)
%% Let it crash: error_overwritten
end.
ok_or_trim(ok) ->
ok;
ok_or_trim(error_trimmed) ->
ok;
ok_or_trim(Else) ->
Else.
read_repair_chain(Epoch, LPN, Chain) ->
try
read_repair_chain1(Epoch, LPN, Chain)
catch
throw:{i_give_up,Res} ->
Res
end.
read_repair_chain1(Epoch, LPN, [Head|Rest] = Chain) ->
?EVENT_LOG({read_repair, LPN, Chain, i_am, self()}),
case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of
{ok, Page} ->
?EVENT_LOG({read_repair, LPN, Head, ok}),
read_repair_chain2(Rest, Epoch, LPN, Page, Chain);
error_badepoch ->
?EVENT_LOG({read_repair, LPN, Head, badepoch}),
error_badepoch;
error_trimmed ->
?EVENT_LOG({read_repair, LPN, Head, trimmed}),
%% TODO: robustify
[begin
?EVENT_LOG({read_repair, LPN, fill, flu_pid(X)}),
ok = case ok_or_trim(corfurl_flu:fill(flu_pid(X), Epoch,
LPN)) of
ok ->
?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), ok}),
ok;
error_overwritten ->
?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), overwritten, try_to_trim}),
Res2 = ok_or_trim(corfurl_flu:trim(
flu_pid(X), Epoch, LPN)),
?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), trim, Res2}),
case Res2 of ok -> ok;
_ -> throw({i_give_up,Res2})
end;
Else ->
%% We're too deeply nested for the current code
%% to deal with, and we're racing. Fine, let
%% our opponent continue. We'll give up, and if
%% the client wants to try again, we can try
%% again from the top.
?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), Else}),
throw({i_give_up,Else})
end
end || X <- Rest],
error_trimmed;
error_unwritten ->
?EVENT_LOG({read_repair, LPN, read, Head, unwritten}),
error_unwritten
%% Let it crash: error_overwritten
end.
read_repair_chain2([] = _Repairees, _Epoch, _LPN, Page, _OriginalChain) ->
?EVENT_LOG({read_repair2, _LPN, finished, {ok, Page}}),
{ok, Page};
read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) ->
case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of
ok ->
?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), ok}),
read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain);
error_badepoch ->
?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), badepoch}),
error_badepoch;
error_trimmed ->
?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), trimmed}),
error_trimmed;
error_overwritten ->
?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), overwritten}),
%% We're going to do an optional sanity check here.
%% TODO: make the sanity check configurable?
case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of
{ok, Page2} when Page2 =:= Page ->
?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), exact_page}),
%% We're probably going to be racing against someone else
%% that's also doing repair, but so be it.
read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain);
{ok, _Page2} ->
?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), bad_page, _Page2}),
giant_error({bummerbummer, ?MODULE, ?LINE,
sanity_check_failure, lpn, LPN, epoch, Epoch});
error_badepoch ->
?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), badepoch}),
error_badepoch;
error_trimmed ->
?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), trimmed}),
%% Start repair at the beginning to handle this case
read_repair_chain(Epoch, LPN, OriginalChain)
%% Let it crash: error_overwritten, error_unwritten
end
%% Let it crash: error_unwritten
end.
scan_forward(P, LPN, MaxPages) ->
scan_forward(P, LPN, MaxPages, ok, true, []).
scan_forward(_P, LPN, 0, Status, MoreP, Acc) ->
{Status, LPN, MoreP, lists:reverse(Acc)};
scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) ->
case read_page(P, LPN) of
{ok, Page} ->
Res = {LPN, Page},
scan_forward(P, LPN + 1, MaxPages - 1, ok, true, [Res|Acc]);
error_badepoch ->
%% Halt, allow recursion to create our return value.
scan_forward(P, LPN, 0, error_badepoch, false, Acc);
error_trimmed ->
%% TODO: API question, do we add a 'trimmed' indicator
%% in the Acc? Or should the client assume that if
%% scan_forward() doesn't mention a page that
scan_forward(P, LPN + 1, MaxPages - 1, ok, true, Acc);
error_unwritten ->
%% Halt, allow recursion to create our return value.
%% TODO: It's possible that we're stuck here because a client
%% crashed and that we see an unwritten page at LPN.
%% We ought to ask the sequencer always/sometime?? what
%% tail LPN is, and if there's a hole, start a timer to
%% allow us to fill the hole.
scan_forward(P, LPN, 0, ok, false, Acc)
%% Let it crash: error_overwritten
end.
fill_page(#proj{epoch=Epoch} = P, LPN) ->
Chain = project_to_chain(LPN, P),
fill_or_trim_page(Chain, Epoch, LPN, fill).
trim_page(#proj{epoch=Epoch} = P, LPN) ->
Chain = project_to_chain(LPN, P),
fill_or_trim_page(Chain, Epoch, LPN, trim).
fill_or_trim_page([], _Epoch, _LPN, _Func) ->
ok;
fill_or_trim_page([H|T], Epoch, LPN, Func) ->
case corfurl_flu:Func(flu_pid(H), Epoch, LPN) of
Res when Res == ok; Res == error_trimmed ->
%% Detecting a race here between fills and trims is too crazy,
%% and I don't believe that it *matters*. The ickiest one
%% is a race between Proc A = trim and Proc B = read,
%% chain length of 2 or more:
%% Proc A: trim head -> ok
%% Proc B: read tail -> error_unwritten
%% Proc B: read head -> error_trimmed
%% Proc B: trim tail -> ok
%% Proc A: trim tail -> ??
%%
%% The result that we want that both A & B & any later
%% readers agree that the LPN is trimmed. If the chain is
%% >2, then the procs can win some/all/none of the races
%% to fix up the chain, that's no problem. But don't tell
%% the caller that there was an error during those races.
fill_or_trim_page(T, Epoch, LPN, Func);
Else ->
%% TODO: worth doing anything here, if we're in the middle of chain?
%% TODO: is that ^^ anything different for fill vs. trim?
Else
end.
flu_pid(X) when is_pid(X) ->
X;
flu_pid(X) when is_atom(X) ->
ets:lookup_element(flu_pid_tab, X, 1).
giant_error(Err) ->
io:format(user, "GIANT ERROR: ~p\n", [Err]),
exit(Err).
%%%% %%%% %%%% projection utilities %%%% %%%% %%%%
new_range(Start, End, ChainList) ->
%% TODO: sanity checking of ChainList, Start < End, yadda
#range{pn_start=Start, pn_end=End, chains=list_to_tuple(ChainList)}.
new_simple_projection(Dir, Epoch, Start, End, ChainList) ->
ok = filelib:ensure_dir(Dir ++ "/unused"),
#proj{dir=Dir, epoch=Epoch, r=[new_range(Start, End, ChainList)]}.
make_projection_path(Dir, Epoch) ->
lists:flatten(io_lib:format("~s/~12..0w.proj", [Dir, Epoch])).
read_projection(Dir, Epoch) ->
case file:read_file(make_projection_path(Dir, Epoch)) of
{ok, Bin} ->
{ok, binary_to_term(Bin)}; % TODO if corrupted?
{error, enoent} ->
error_unwritten;
Else ->
Else % TODO API corner case
end.
save_projection(Dir, #proj{epoch=Epoch} = P) ->
Path = make_projection_path(Dir, Epoch),
ok = filelib:ensure_dir(Dir ++ "/ignored"),
{_, B, C} = now(),
TmpPath = Path ++ lists:flatten(io_lib:format(".~w.~w.~w", [B, C, node()])),
%% TODO: don't be lazy, do a flush before link when training wheels come off
ok = file:write_file(TmpPath, term_to_binary(P)),
case file:make_link(TmpPath, Path) of
ok ->
file:delete(TmpPath),
ok;
{error, eexist} ->
error_overwritten;
Else ->
Else % TODO API corner case
end.
latest_projection_epoch_number(Dir) ->
case filelib:wildcard("*.proj", Dir) of
[] ->
-1;
Files ->
{Epoch, _} = string:to_integer(lists:last(Files)),
Epoch
end.
project_to_chain(LPN, P) ->
%% TODO fixme
%% TODO something other than round-robin?
[#range{pn_start=Start, pn_end=End, chains=Chains}] = P#proj.r,
if Start =< LPN, LPN =< End ->
I = ((LPN - Start) rem tuple_size(Chains)) + 1,
element(I, Chains);
true ->
exit({?MODULE, project_to_chain, [{start, Start},
{lpn, LPN},
{'end', End}]})
end.
simple_test_setup(RootDir, BaseDirName, PageSize, NumPages, NumFLUs) ->
PDir = RootDir ++ "/" ++ BaseDirName ++ ".projection",
filelib:ensure_dir(PDir),
BaseDir = RootDir ++ "/flu." ++ BaseDirName ++ ".",
MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end,
DeleteFLUData = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) ||
X <- lists:seq(1, NumFLUs)] end,
DeleteFLUData(),
FLUs = [begin
element(2, corfurl_flu:start_link(MyDir(X),
PageSize, NumPages*PageSize))
end || X <- lists:seq(1, NumFLUs)],
{ok, Seq} = corfurl_sequencer:start_link(FLUs),
P0 = corfurl:new_simple_projection(PDir, 1, 1, 1*100, [FLUs]),
P1 = P0#proj{seq={Seq, unused, unused}, page_size=PageSize},
{FLUs, Seq, P1, DeleteFLUData}.

View file

@ -0,0 +1,268 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_client).
-export([append_page/2, append_page/3,
read_page/2, fill_page/2, trim_page/2, scan_forward/3]).
-export([restart_sequencer/1]).
%% For debugging/verification only
-export([pulse_tracing_start/1, pulse_tracing_add/2, pulse_tracing_get/1]).
-include("corfurl.hrl").
-define(LONG_TIME, 5*1000).
%% -define(LONG_TIME, 30*1000).
append_page(Proj, Page) ->
append_page(Proj, Page, []).
append_page(Proj, Page, StreamList) ->
append_page(Proj, Page, StreamList, 5).
append_page(Proj, _Page, _StreamList, 0) ->
{{error_failed, ?MODULE, ?LINE}, Proj};
%% TODO: remove _StreamList arg entirely?
append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, StreamList, Retries) ->
try
{ok, LPN} = corfurl_sequencer:get(Sequencer, 1),
pulse_tracing_add(write, LPN),
append_page1(Proj, LPN, Page, StreamList, 5)
catch
exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}}
when Reason == noproc; Reason == normal ->
append_page(restart_sequencer(Proj), Page, StreamList, Retries);
exit:Exit ->
{{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit}
end.
append_page1(Proj, _LPN, _Page, _StreamList, 0) ->
{{error_failed, ?MODULE, ?LINE}, Proj};
append_page1(Proj, LPN, Page, StreamList, Retries) ->
case append_page2(Proj, LPN, Page) of
lost_race ->
append_page(Proj, Page, StreamList, Retries - 1);
error_badepoch ->
case poll_for_new_epoch_projection(Proj) of
{ok, NewProj} ->
append_page1(NewProj, LPN, Page, StreamList, Retries - 1);
Else ->
{Else, Proj}
end;
Else ->
{Else, Proj}
end.
append_page2(Proj, LPN, Page) ->
case corfurl:write_page(Proj, LPN, Page) of
ok ->
{ok, LPN};
X when X == error_overwritten; X == error_trimmed ->
report_lost_race(LPN, X),
lost_race;
{special_trimmed, LPN}=XX ->
XX;
error_badepoch=XX->
XX
%% Let it crash: error_unwritten
end.
read_page(Proj, LPN) ->
retry_loop(Proj, fun(P) -> corfurl:read_page(P, LPN) end, 10).
fill_page(Proj, LPN) ->
retry_loop(Proj, fun(P) -> corfurl:fill_page(P, LPN) end, 10).
trim_page(Proj, LPN) ->
retry_loop(Proj, fun(P) -> corfurl:trim_page(P, LPN) end, 10).
scan_forward(Proj, LPN, MaxPages) ->
%% This is fiddly stuff that I'll get 0.7% wrong if I try to be clever.
%% So, do something simple and (I hope) obviously correct.
%% TODO: do something "smarter".
case corfurl:scan_forward(Proj, LPN, MaxPages) of
{error_badepoch, _LPN2, _MoreP, _Pages} = Res ->
case poll_for_new_epoch_projection(Proj) of
{ok, NewProj} ->
{Res, NewProj};
_Else ->
%% TODO: What is the risk of getting caught in a situation
%% where we can never make any forward progress when pages
%% really are being written?
{Res, Proj}
end;
Res ->
{Res, Proj}
end.
%%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%%
retry_loop(Proj, _Fun, 0) ->
{{error_failed, ?MODULE, ?LINE}, Proj};
retry_loop(Proj, Fun, Retries) ->
case Fun(Proj) of
error_badepoch ->
case poll_for_new_epoch_projection(Proj) of
{ok, NewProj} ->
retry_loop(NewProj, Fun, Retries - 1);
_Else ->
{{error_failed, ?MODULE, ?LINE}, Proj}
end;
Else ->
{Else, Proj}
end.
restart_sequencer(#proj{epoch=Epoch, dir=Dir} = P) ->
case corfurl:latest_projection_epoch_number(Dir) of
N when N > Epoch ->
%% Yay, someone else has intervened. Perhaps they've solved
%% our sequencer problem for us?
read_latest_projection(P);
_ ->
restart_sequencer2(P)
end.
restart_sequencer2(#proj{seq={OldSequencer, _SeqHost, SeqName},
epoch=Epoch, r=Ranges} = P) ->
spawn(fun() ->
(catch corfurl_sequencer:stop(OldSequencer))
end),
TODO_type = standard, % TODO: fix this hard-coding
FLUs = lists:usort(
[FLU || R <- Ranges,
C <- tuple_to_list(R#range.chains), FLU <- C]),
%% TODO: We can proceed if we can seal at least one FLU in
%% each chain. Robustify and sanity check.
[begin
_Res = corfurl_flu:seal(FLU, Epoch)
end || FLU <- lists:reverse(FLUs)],
case corfurl_sequencer:start_link(FLUs, TODO_type, SeqName) of
{ok, Pid} ->
NewP = P#proj{seq={Pid, node(), SeqName}, epoch=Epoch+1},
save_projection_or_get_latest(NewP)
end.
poll_for_new_epoch_projection(P) ->
put(silly_poll_counter, 0),
poll_for_new_epoch_projection(P, get_poll_retries()).
poll_for_new_epoch_projection(P, 0) ->
%% TODO: The client that caused the seal may have crashed before
%% writing a new projection. We should try to pick up here,
%% write a new projection, and bully forward.
%% NOTE: When that new logic is added, the huge polling interval
%% that PULSE uses should be reduced to something tiny.
case corfurl:latest_projection_epoch_number(P#proj.dir) of
Neg when Neg < 0 ->
error_badepoch;
Other ->
exit({bummer, ?MODULE, ?LINE, latest_epoch, Other})
end;
poll_for_new_epoch_projection(#proj{dir=Dir, epoch=Epoch} = P, Tries) ->
case corfurl:latest_projection_epoch_number(Dir) of
NewEpoch when NewEpoch > Epoch ->
corfurl:read_projection(Dir, NewEpoch);
_ ->
timer:sleep(get_poll_sleep_time()),
case put(silly_poll_counter, get(silly_poll_counter) + 1) div 10*1000 of
0 -> io:format(user, "P", []);
_ -> ok
end,
poll_for_new_epoch_projection(P, Tries - 1)
end.
save_projection_or_get_latest(#proj{dir=Dir} = P) ->
case corfurl:save_projection(Dir, P) of
ok ->
P;
error_overwritten ->
read_latest_projection(P)
end.
read_latest_projection(#proj{dir=Dir}) ->
NewEpoch = corfurl:latest_projection_epoch_number(Dir),
{ok, NewP} = corfurl:read_projection(Dir, NewEpoch),
NewP.
-ifdef(TEST).
-ifdef(PULSE).
report_lost_race(_LPN, _Reason) ->
%% It's interesting (sometime?) to know if a page was overwritten
%% because the sequencer was configured by QuickCheck to hand out
%% duplicate LPNs. If this gets too annoying, this can be a no-op
%% function.
io:format(user, "o", []).
-else. % PULSE
report_lost_race(LPN, Reason) ->
io:format(user, "LPN ~p race lost: ~p\n", [LPN, Reason]).
-endif. % PULSE
-else. % TEST
report_lost_race(LPN, Reason) ->
%% Perhaps it's an interesting event, but the rest of the system
%% should react correctly whenever this happens, so it shouldn't
%% ever cause an external consistency problem.
error_logger:info_msg("LPN ~p race lost: ~p\n", [LPN, Reason]).
-endif. % TEST
-ifdef(PULSE).
get_poll_retries() ->
999*1000.
get_poll_sleep_time() ->
1.
-else.
get_poll_retries() ->
25.
get_poll_sleep_time() ->
50.
-endif.
-ifdef(PULSE).
pulse_tracing_start(Type) ->
put({?MODULE, Type}, []).
pulse_tracing_add(Type, Stuff) ->
List = case pulse_tracing_get(Type) of
undefined -> [];
L -> L
end,
put({?MODULE, Type}, [Stuff|List]).
pulse_tracing_get(Type) ->
get({?MODULE, Type}).
-else.
pulse_tracing_start(_Type) ->
ok.
pulse_tracing_add(_Type, _Stuff) ->
ok.
pulse_tracing_get(_Type) ->
ok.
-endif.

View file

@ -0,0 +1,466 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_flu).
-behaviour(gen_server).
-type flu_error() :: 'error_badepoch' | 'error_trimmed' |
'error_overwritten' | 'error_unwritten'.
-export_type([flu_error/0]).
%% API
-export([start_link/1, start_link/3, status/1, stop/1]).
-export([write/4, read/3, seal/2, trim/3, fill/3]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
-include("corfurl.hrl").
-ifdef(TEST).
-export([get__mlp/1, get__min_epoch/1, get__trim_watermark/1]).
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-compile({pulse_skip,[{msc, 3}]}).
-endif.
-endif.
-include_lib("kernel/include/file.hrl").
%%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
-define(EVENT_LOG(X), ok).
%% -define(EVENT_LOG(X), event_logger:event(X)).
-record(state, {
dir :: string(),
mem_fh :: term(),
min_epoch :: non_neg_integer(),
page_size :: non_neg_integer(),
max_mem :: non_neg_integer(),
max_logical_page :: 'unknown' | non_neg_integer(),
%% TODO: Trim watermark handling is *INCOMPLETE*. The
%% current code is broken but is occasionally correct,
%% like a broken analog watch is correct 2x per day.
trim_watermark :: non_neg_integer(),
trim_count :: non_neg_integer()
}).
start_link(Dir) ->
start_link(Dir, 8, 64*1024*1024).
start_link(Dir, PageSize, MaxMem) ->
gen_server:start_link(?MODULE, {Dir, PageSize, MaxMem}, []).
status(Pid) ->
gen_server:call(Pid, status, infinity).
stop(Pid) ->
gen_server:call(Pid, stop, infinity).
write(Pid, Epoch, LogicalPN, PageBin)
when is_integer(LogicalPN), LogicalPN > 0, is_binary(PageBin) ->
g_call(Pid, {write, Epoch, LogicalPN, PageBin}, infinity).
read(Pid, Epoch, LogicalPN)
when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 ->
g_call(Pid, {read, Epoch, LogicalPN}, infinity).
seal(Pid, Epoch) when is_integer(Epoch), Epoch > 0 ->
g_call(Pid, {seal, Epoch}, infinity).
trim(Pid, Epoch, LogicalPN)
when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 ->
g_call(Pid, {trim, Epoch, LogicalPN}, infinity).
fill(Pid, Epoch, LogicalPN)
when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 ->
Res = g_call(Pid, {fill, Epoch, LogicalPN}, infinity),
undo_special_pulse_test_result(Res).
g_call(Pid, Arg, Timeout) ->
LC1 = lclock_get(),
msc(self(), Pid, Arg),
{Res, LC2} = gen_server:call(Pid, {Arg, LC1}, Timeout),
msc(Pid, self(), Res),
lclock_update(LC2),
Res.
-ifdef(TEST).
get__mlp(Pid) ->
gen_server:call(Pid, get__mlp, infinity).
get__min_epoch(Pid) ->
gen_server:call(Pid, get__min_epoch, infinity).
get__trim_watermark(Pid) ->
gen_server:call(Pid, get__trim_watermark, infinity).
-endif. % TEST
%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%%
init({Dir, ExpPageSize, ExpMaxMem}) ->
lclock_init(),
MemFile = memfile_path(Dir),
ok = filelib:ensure_dir(MemFile),
{ok, FH} = file:open(MemFile, [read, write, raw, binary]),
{_Version, MinEpoch, PageSize, MaxMem, TrimWatermark} =
try
Res = read_hard_state(Dir),
case Res of
{_V, _LE, PS, MM, TW}
when PS =:= ExpPageSize, MM =:= ExpMaxMem ->
Res
end
catch
X:Y ->
if X == error,
Y == {case_clause,{error,enoent}} ->
ok;
true ->
%% TODO: log-ify this
io:format("init: caught ~p ~p @ ~p\n",
[X, Y, erlang:get_stacktrace()])
end,
{no_version_number, 0, ExpPageSize, ExpMaxMem, 0}
end,
State = #state{dir=Dir, mem_fh=FH, min_epoch=MinEpoch, page_size=PageSize,
max_mem=MaxMem, max_logical_page=unknown,
trim_watermark=TrimWatermark, trim_count=0},
self() ! finish_init, % TODO
{ok, State}.
handle_call(Call, From, #state{max_logical_page=unknown} = State) ->
{noreply, NewState} = handle_info(finish_init, State),
handle_call(Call, From, NewState);
handle_call({{write, ClientEpoch, _LogicalPN, _PageBin}, LC1}, _From,
#state{min_epoch=MinEpoch} = State)
when ClientEpoch < MinEpoch ->
LC2 = lclock_update(LC1),
{reply, {error_badepoch, LC2}, State};
handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From,
#state{max_logical_page=MLPN} = State) ->
LC2 = lclock_update(LC1),
case check_write(LogicalPN, PageBin, State) of
{ok, Offset} ->
ok = write_page(Offset, LogicalPN, PageBin, State),
NewMLPN = erlang:max(LogicalPN, MLPN),
?EVENT_LOG({flu, write, self(), LogicalPN, ok}),
{reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}};
Else ->
?EVENT_LOG({flu, write, self(), LogicalPN, Else}),
{reply, {Else, LC2}, State}
end;
handle_call({{read, ClientEpoch, _LogicalPN}, LC1}, _From,
#state{min_epoch=MinEpoch} = State)
when ClientEpoch < MinEpoch ->
LC2 = lclock_update(LC1),
{reply, {error_badepoch, LC2}, State};
handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) ->
LC2 = lclock_update(LC1),
Reply = read_page(LogicalPN, State),
?EVENT_LOG({flu, read, self(), LogicalPN, Reply}),
{reply, {Reply, LC2}, State};
handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State)
when ClientEpoch < MinEpoch ->
LC2 = lclock_update(LC1),
{reply, {error_badepoch, LC2}, State};
handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=State) ->
LC2 = lclock_update(LC1),
NewState = State#state{min_epoch=ClientEpoch+1},
ok = write_hard_state(NewState),
{reply, {{ok, MLPN}, LC2}, NewState};
handle_call({{trim, ClientEpoch, _LogicalPN}, LC1}, _From,
#state{min_epoch=MinEpoch} = State)
when ClientEpoch < MinEpoch ->
LC2 = lclock_update(LC1),
{reply, {error_badepoch, LC2}, State};
handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) ->
LC2 = lclock_update(LC1),
{Reply, NewState} = do_trim_or_fill(trim, LogicalPN, State),
?EVENT_LOG({flu, trim, self(), LogicalPN, Reply}),
{reply, {Reply, LC2}, NewState};
handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From,
#state{min_epoch=MinEpoch} = State)
when ClientEpoch < MinEpoch ->
LC2 = lclock_update(LC1),
{reply, {error_badepoch, LC2}, State};
handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) ->
LC2 = lclock_update(LC1),
{Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State),
?EVENT_LOG({flu, fill, self(), LogicalPN, Reply}),
{reply, {Reply, LC2}, NewState};
handle_call(get__mlp, _From, State) ->
{reply, State#state.max_logical_page, State};
handle_call(get__min_epoch, _From, State) ->
{reply, State#state.min_epoch, State};
handle_call(get__trim_watermark, _From, State) ->
{reply, State#state.trim_watermark, State};
handle_call(status, _From, State) ->
L = [{min_epoch, State#state.min_epoch},
{page_size, State#state.page_size},
{max_mem, State#state.max_mem},
{max_logical_page, State#state.max_logical_page},
{trim_watermark, State#state.trim_watermark}],
{reply, {ok, L}, State};
handle_call(stop, _From, State) ->
{stop, normal, ok, State};
handle_call(Request, _From, State) ->
Reply = {whaaaaaaaaaaaaaaaaaa, Request},
{reply, Reply, State}.
handle_cast(_Msg, State) ->
{noreply, State}.
handle_info(finish_init, State) ->
MLP = find_max_logical_page(State),
State2 = State#state{max_logical_page=MLP},
ok = write_hard_state(State2),
{noreply, State2};
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, State) ->
ok = write_hard_state(State),
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%%
read_hard_state(Dir) ->
File = hard_state_path(Dir),
case file:read_file(File) of
{ok, Bin} ->
case binary_to_term(Bin) of
T when element(1, T) == v1 ->
T
end;
Else ->
Else
end.
write_hard_state(#state{min_epoch=MinEpoch, page_size=PageSize, max_mem=MaxMem,
trim_watermark=TrimWatermark} = S) ->
NewPath = hard_state_path(S#state.dir),
TmpPath = NewPath ++ ".tmp",
{ok, FH} = file:open(TmpPath, [write, binary, raw]),
HS = {v1, MinEpoch, PageSize, MaxMem, TrimWatermark},
ok = file:write(FH, term_to_binary(HS)),
%% ok = file:sync(FH), % TODO uncomment when the training wheels come off
ok = file:close(FH),
ok = file:rename(TmpPath, NewPath).
memfile_path(Dir) ->
Dir ++ "/memfile".
hard_state_path(Dir) ->
Dir ++ "/hard-state".
calc_page_offset(PhysicalPN, #state{page_size=PageSize}) ->
TotalSize = ?PAGE_OVERHEAD + PageSize,
PhysicalPN * TotalSize.
%% find_max_logical_page(): This is a kludge, based on our naive
%% implementation of not keeping the maximum logical page in hard
%% state.
find_max_logical_page(S) ->
{ok, FI} = file:read_file_info(memfile_path(S#state.dir)),
find_max_logical_page(0, 0, FI#file_info.size, S).
find_max_logical_page(MLP, PhysicalPN, FSize,
#state{mem_fh=FH, max_mem=MaxMem}=S) ->
Offset = calc_page_offset(PhysicalPN, S),
if Offset < MaxMem, Offset < FSize ->
case file:pread(FH, Offset, 9) of
{ok, <<1:8/big, LP:64/big>>} ->
find_max_logical_page(erlang:max(MLP, LP), PhysicalPN + 1,
FSize, S);
_ ->
find_max_logical_page(MLP, PhysicalPN + 1, FSize, S)
end;
true ->
MLP
end.
check_write(LogicalPN, PageBin,
#state{max_mem=MaxMem, page_size=PageSize} = S) ->
Offset = calc_page_offset(LogicalPN, S),
if Offset < MaxMem, byte_size(PageBin) =:= PageSize ->
case check_is_written(Offset, LogicalPN, S) of
false ->
{ok, Offset};
true ->
error_overwritten
end;
true ->
{bummer, ?MODULE, ?LINE, lpn, LogicalPN, offset, Offset, max_mem, MaxMem, page_size, PageSize, bin_size, byte_size(PageBin)}
end.
check_is_written(Offset, _PhysicalPN, #state{mem_fh=FH}) ->
case file:pread(FH, Offset, 1) of
{ok, <<0:8>>} ->
false;
{ok, <<1:8>>} -> % written
true;
{ok, <<2:8>>} -> % trimmed
true;
eof ->
%% We assume that Offset has been bounds-checked
false
end.
write_page(Offset, LogicalPN, PageBin, #state{mem_fh=FH}) ->
IOList = [<<1:8>>, <<LogicalPN:64/big>>, PageBin, <<1:8>>],
ok = file:pwrite(FH, Offset, IOList).
read_page(LogicalPN, #state{max_mem=MaxMem, mem_fh=FH,
page_size=PageSize} = S) ->
Offset = calc_page_offset(LogicalPN, S),
if Offset < MaxMem ->
case file:pread(FH, Offset, PageSize + ?PAGE_OVERHEAD) of
{ok, <<1:8, LogicalPN:64/big, Page:PageSize/binary, 1:8>>} ->
{ok, Page};
{ok, <<1:8, _LogicalPN:64/big, _:PageSize/binary, 0:8>>} ->
io:format("BUMMER: ~s line ~w: incomplete write at ~p\n",
[?MODULE, ?LINE, LogicalPN]),
error_unwritten;
{ok, <<2:8, _/binary>>} ->
error_trimmed;
{ok, _} ->
error_unwritten;
eof ->
error_unwritten;
Else ->
io:format("BUMMER: ~s line ~w: ~p\n",
[?MODULE, ?LINE, Else]),
badarg % TODO: better idea
end;
true ->
badarg
end.
do_trim_or_fill(Op, LogicalPN,
#state{trim_watermark=TrimWatermark, trim_count=TrimCount} = S) ->
case trim_page(Op, LogicalPN, S) of
ok ->
NewS = S#state{trim_watermark=erlang:max(
TrimWatermark, LogicalPN),
trim_count=TrimCount + 1},
if TrimCount rem 1000 == 0 ->
ok = write_hard_state(NewS);
true ->
ok
end,
{ok, NewS};
Else ->
{Else, S}
end.
trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) ->
Offset = calc_page_offset(LogicalPN, S),
if Offset < MaxMem ->
Status = case file:pread(FH, Offset, 1) of
{ok, <<0:8>>} ->
error_unwritten;
{ok, <<1:8>>} ->
error_overwritten;
{ok, <<2:8>>} ->
error_trimmed;
eof ->
error_unwritten;
Else ->
io:format("BUMMER: ~s line ~w: ~p\n",
[?MODULE, ?LINE, Else]),
error_trimmed % TODO
end,
if Status == error_overwritten andalso Op == trim ->
ok = file:pwrite(FH, Offset, <<2:8>>),
ok;
Status == error_unwritten andalso Op == fill ->
ok = file:pwrite(FH, Offset, <<2:8>>),
ok;
true ->
Status
end;
true ->
badarg
end.
-ifdef(PULSE).
%% We do *not* want to remove any special PULSE return code.
undo_special_pulse_test_result(Res) ->
Res.
-else. % PULSE
undo_special_pulse_test_result({special_trimmed, LPN}) ->
{ok, LPN};
undo_special_pulse_test_result(Res) ->
Res.
-endif. % PULSE
-ifdef(PULSE_HACKING).
%% Create a trace file that can be formatted by "mscgen" utility.
%% Lots of hand-editing is required after creating the file, sorry!
msc(_From, _To, _Tag) ->
{ok, FH} = file:open("/tmp/goo", [write, append]),
io:format(FH, " \"~w\" -> \"~w\" [ label = \"~w\" ] ;\n", [_From, _To, _Tag]),
file:close(FH).
-else. % PULSE_HACKING
msc(_From, _To, _Tag) ->
ok.
-endif. % PULSE_HACkING
-ifdef(PULSE).
lclock_init() ->
lamport_clock:init().
lclock_get() ->
lamport_clock:get().
lclock_update(LC) ->
lamport_clock:update(LC).
-else. % PULSE
lclock_init() ->
ok.
lclock_get() ->
ok.
lclock_update(_LC) ->
ok.
-endif. % PLUSE

View file

@ -0,0 +1,208 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_sequencer).
-behaviour(gen_server).
-export([start_link/1, start_link/2, start_link/3,
stop/1, stop/2,
get/2, get_tails/3]).
-export([set_tails/2]).
-ifdef(TEST).
-compile(export_all).
-endif.
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
-endif.
-define(SERVER, ?MODULE).
%% -define(LONG_TIME, 30*1000).
-define(LONG_TIME, 5*1000).
-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])).
start_link(FLUs) ->
start_link(FLUs, standard).
start_link(FLUs, SeqType) ->
start_link(FLUs, SeqType, ?SERVER).
start_link(FLUs, SeqType, RegName) ->
case gen_server:start_link({local, RegName}, ?MODULE, {FLUs, SeqType},[]) of
{ok, Pid} ->
{ok, Pid};
{error, {already_started, Pid}} ->
{ok, Pid};
Else ->
Else
end.
stop(Pid) ->
stop(Pid, stop).
stop(Pid, Method) ->
Res = gen_server:call(Pid, stop, infinity),
if Method == kill ->
%% Emulate gen.erl's client-side behavior when the server process
%% is killed.
exit(killed);
true ->
Res
end.
get(Pid, NumPages) ->
{LPN, LC} = gen_server:call(Pid, {get, NumPages, lclock_get()},
?LONG_TIME),
lclock_update(LC),
LPN.
get_tails(Pid, NumPages, StreamList) ->
{Tails, LC} = gen_server:call(Pid,
{get_tails, NumPages, StreamList, lclock_get()},
?LONG_TIME),
lclock_update(LC),
Tails.
set_tails(Pid, StreamTails) ->
ok = gen_server:call(Pid, {set_tails, StreamTails}, ?LONG_TIME).
%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%%
init({FLUs, TypeOrSeed}) ->
lclock_init(),
MLP = get_max_logical_page(FLUs),
Tab = ets:new(?MODULE, [set, private, {keypos, 1}]),
if TypeOrSeed == standard ->
{ok, {Tab, MLP + 1}};
true ->
{Seed, BadPercent, MaxDifference} = TypeOrSeed,
random:seed(Seed),
{ok, {Tab, MLP+1, BadPercent, MaxDifference}}
end.
handle_call({get, NumPages, LC}, _From, {Tab, MLP}) ->
NewLC = lclock_update(LC),
{reply, {{ok, MLP}, NewLC}, {Tab, MLP + NumPages}};
handle_call({get, NumPages, LC}, _From,
{Tab, MLP, BadPercent, MaxDifference}) ->
NewLC = lclock_update(LC),
Fudge = case random:uniform(100) of
N when N < BadPercent ->
random:uniform(MaxDifference * 2) - MaxDifference;
_ ->
0
end,
{reply, {{ok, erlang:max(1, MLP + Fudge)}, NewLC},
{Tab, MLP + NumPages, BadPercent, MaxDifference}};
handle_call({get_tails, NumPages, StreamList, LC}, _From, MLP_tuple) ->
Tab = element(1, MLP_tuple),
MLP = element(2, MLP_tuple),
Tails = [case (catch ets:lookup_element(Tab, Stream, 2)) of
{'EXIT', _} ->
[];
Res ->
Res
end || Stream <- StreamList],
if NumPages > 0 ->
update_stream_tails(Tab, StreamList, MLP);
true ->
ok
end,
NewLC = lclock_update(LC),
{reply, {{ok, MLP, Tails}, NewLC},
setelement(2, MLP_tuple, MLP + NumPages)};
handle_call({set_tails, StreamTails}, _From, MLP_tuple) ->
Tab = element(1, MLP_tuple),
true = ets:delete_all_objects(Tab),
[ets:insert(Tab, {Stream, Tail}) || {Stream, Tail} <- StreamTails],
{reply, ok, MLP_tuple};
handle_call(stop, _From, MLP) ->
{stop, normal, ok, MLP};
handle_call(_Request, _From, MLP) ->
Reply = idunnoooooooooooooooooooooooooo,
{reply, Reply, MLP}.
handle_cast(_Msg, MLP) ->
{noreply, MLP}.
handle_info(_Info, MLP) ->
{noreply, MLP}.
terminate(_Reason, _MLP) ->
ok.
code_change(_OldVsn, MLP, _Extra) ->
{ok, MLP}.
%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%%
get_max_logical_page(FLUs) ->
lists:max([proplists:get_value(max_logical_page, Ps, 0) ||
FLU <- FLUs,
{ok, Ps} <- [corfurl_flu:status(FLU)]]).
update_stream_tails(Tab, StreamList, LPN) ->
[begin
OldBackPs = try ets:lookup_element(Tab, Stream, 2)
catch error:badarg -> []
end,
NewBackPs = add_back_pointer(OldBackPs, LPN),
ets:insert(Tab, {Stream, NewBackPs})
end || Stream <- StreamList].
add_back_pointer([D,C,B,_A|_], New) ->
[New,D,C,B];
add_back_pointer([], New) ->
[New];
add_back_pointer(BackPs, New) ->
[New|BackPs].
-ifdef(PULSE).
lclock_init() ->
lamport_clock:init().
lclock_get() ->
lamport_clock:get().
lclock_update(LC) ->
lamport_clock:update(LC).
-else. % PULSE
lclock_init() ->
ok.
lclock_get() ->
ok.
lclock_update(_LC) ->
ok.
-endif. % PLUSE

View file

@ -0,0 +1,40 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_util).
-export([delete_dir/1]).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
delete_dir(Dir) ->
%% We don't recursively delete directories, the ok pattern match will fail.
[ok = file:delete(X) || X <- filelib:wildcard(Dir ++ "/*")],
case file:del_dir(Dir) of
ok ->
ok;
{error, enoent} ->
ok;
Else ->
Else
end.

View file

@ -0,0 +1,219 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
%% A prototype implementation of Tango over Corfurl.
-module(tango).
-include("corfurl.hrl").
-export([pack_v1/4, unpack_v1/2,
add_back_pointer/2,
add_back_pointer/3,
scan_backward/4,
scan_backward/5,
pad_bin/2,
append_page/3,
back_ps2last_lpn/1,
append_lpns/2]).
-define(MAGIC_NUMBER_V1, 16#88990011).
-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])).
%% TODO: for version 2: add strong checksum
pack_v1(Stream_BackPs, Options, Page, PageSize)
when is_list(Stream_BackPs), is_list(Options), is_binary(Page),
is_integer(PageSize), PageSize > 0 ->
Stream_BackPsBin = term_to_binary(Stream_BackPs),
Stream_BackPsSize = byte_size(Stream_BackPsBin),
OptionsInt = convert_options_list2int(Options),
PageActualSize = byte_size(Page),
pad_bin(PageSize,
list_to_binary([<<?MAGIC_NUMBER_V1:32/big>>,
<<OptionsInt:8/big>>,
<<Stream_BackPsSize:16/big>>,
Stream_BackPsBin,
<<PageActualSize:16/big>>,
Page])).
unpack_v1(<<?MAGIC_NUMBER_V1:32/big,
_Options:8/big,
Stream_BackPsSize:16/big, Stream_BackPsBin:Stream_BackPsSize/binary,
PageActualSize:16/big, Page:PageActualSize/binary,
_/binary>>, Part) ->
if Part == stream_list ->
binary_to_term(Stream_BackPsBin);
Part == page ->
Page
end.
pad_bin(Size, Bin) when byte_size(Bin) >= Size ->
Bin;
pad_bin(Size, Bin) ->
PadSize = Size - byte_size(Bin),
<<Bin/binary, 0:(PadSize*8)>>.
add_back_pointer(StreamNum, BackPs, NewBackP) ->
case proplists:get_value(StreamNum, BackPs) of
undefined ->
[{StreamNum, [NewBackP]}];
IndividualBackPs ->
[{StreamNum, add_back_pointer(IndividualBackPs, NewBackP)}
|lists:keydelete(StreamNum, 1, BackPs)]
end.
add_back_pointer([A,B,C,_D|_], New) ->
[New,A,B,C];
add_back_pointer([], New) ->
[New];
add_back_pointer(BackPs, New) ->
[New|BackPs].
convert_options_list2int(Options) ->
lists:foldl(fun(t_final_page, Int) -> Int + 1;
(_, Int) -> Int
end, 0, Options).
scan_backward(Proj, Stream, LastLPN, WithPagesP) ->
scan_backward(Proj, Stream, LastLPN, 0, WithPagesP).
scan_backward(Proj, Stream, LastLPN, StopAtLPN, WithPagesP) ->
lists:reverse(scan_backward2(Proj, Stream, LastLPN, StopAtLPN,
0, WithPagesP)).
scan_backward2(_Proj, _Stream, LastLPN, StopAtLPN, _NumPages, _WithPagesP)
when LastLPN =< StopAtLPN; LastLPN =< 0 ->
[];
scan_backward2(Proj, Stream, LastLPN, StopAtLPN, NumPages, WithPagesP) ->
case corfurl:read_page(Proj, LastLPN) of
{ok, FullPage} ->
case proplists:get_value(Stream, unpack_v1(FullPage, stream_list)) of
undefined ->
if NumPages == 0 ->
%% We were told to start scanning backward at some
%% LPN, but that LPN doesn't have a stream for us.
%% So we'll go backward a page and see if we get
%% lucky there.
scan_backward2(Proj, Stream, LastLPN-1, StopAtLPN,
NumPages, WithPagesP);
true ->
%% Oops, we pointed into a hole. That's bad.
%% TODO: fixme
{gah_fixme, lpn, LastLPN, unpack_v1(FullPage, stream_list)}
end;
[] ->
if WithPagesP ->
[{LastLPN, unpack_v1(FullPage, page)}];
true ->
[LastLPN]
end;
BackPs ->
if WithPagesP ->
%% ?D({bummer, BackPs}),
[{LastLPN, unpack_v1(FullPage, page)}|
scan_backward2(Proj, Stream,
hd(BackPs), StopAtLPN, NumPages + 1,
WithPagesP)];
true ->
SkipLPN = lists:last(BackPs),
AddLPNs = [LPN || LPN <- BackPs,
LPN /= SkipLPN,
LPN > StopAtLPN],
[LastLPN] ++ AddLPNs ++
scan_backward2(Proj, Stream,
SkipLPN, StopAtLPN, NumPages + 1,
WithPagesP)
end
end;
Err ->
%% ?D({scan, LastLPN, Err}),
Err
end.
%% Hrm, this looks pretty similar to corfurl_client:append_page.
append_page(Proj, Page, StreamList) ->
append_page(Proj, Page, StreamList, 5).
append_page(Proj, _Page, _StreamList, 0) ->
{{error_failed, ?MODULE, ?LINE}, Proj};
append_page(#proj{seq={Sequencer,_,_}, page_size=PageSize} = Proj,
OrigPage, StreamList, Retries) ->
try
{ok, LPN, BackPsList} = corfurl_sequencer:get_tails(Sequencer, 1,
StreamList),
%% pulse_tracing_add(write, LPN),
StreamBackPs = lists:zip(StreamList, BackPsList),
Page = tango:pack_v1(StreamBackPs, [t_final_page],
OrigPage, PageSize),
append_page1(Proj, LPN, Page, StreamList, 5, OrigPage)
catch
exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}}
when Reason == noproc; Reason == normal ->
NewSeq = corfurl_client:restart_sequencer(Proj),
append_page(Proj#proj{seq=NewSeq}, OrigPage, StreamList, Retries);
exit:Exit ->
{{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit}
end.
append_page1(Proj, _LPN, _Page, _StreamList, 0, _OrigPage) ->
{{error_failed, ?MODULE, ?LINE}, Proj};
append_page1(Proj, LPN, Page, StreamList, Retries, OrigPage) ->
case append_page2(Proj, LPN, Page) of
lost_race ->
append_page(Proj, OrigPage, StreamList, Retries - 1);
error_badepoch ->
case corfurl_sequencer:poll_for_new_epoch_projection(Proj) of
{ok, NewProj} ->
append_page1(NewProj, LPN, Page, StreamList, Retries - 1,
OrigPage);
Else ->
{Else, Proj}
end;
Else ->
{Else, Proj}
end.
append_page2(Proj, LPN, Page) ->
case corfurl:write_page(Proj, LPN, Page) of
ok ->
{ok, LPN};
X when X == error_overwritten; X == error_trimmed ->
%% report_lost_race(LPN, X),
lost_race;
{special_trimmed, LPN}=XX ->
XX;
error_badepoch=XX->
XX
%% Let it crash: error_unwritten
end.
back_ps2last_lpn([]) ->
0;
back_ps2last_lpn([H|_]) ->
H.
append_lpns([], BPs) ->
BPs;
append_lpns(LPNs, BPs) ->
lists:reverse(LPNs) ++ BPs.

View file

@ -0,0 +1,171 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_dt).
-behaviour(gen_server).
%% API
-export([start_link/5, stop/1, checkpoint/1]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
-define(LONG_TIME, 30*1000).
-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])).
-type lpn() :: non_neg_integer().
-record(state, {
page_size :: non_neg_integer(), % Corfurl page size
seq :: pid(), % sequencer pid
proj :: term(), % projection
stream_num :: non_neg_integer(), % this instance's OID number
cb_mod :: atom(), % callback module
last_fetch_lpn :: lpn(), %
all_back_ps :: [lpn()], % All back-pointers LIFO order!
i_state :: term() % internal state thingie
}).
-type callback_i_state() :: term().
-type gen_server_from() :: {pid(), Tag::term()}.
-callback fresh() -> callback_i_state().
-callback do_pure_op(term(), callback_i_state()) -> term().
-callback do_dirty_op(term(), gen_server_from(), callback_i_state(),
StreamNum::non_neg_integer(),
Proj0::term(), PageSize::non_neg_integer()) ->
{Reply::term(), New_I_State::callback_i_state(),
Proj::term(), LPN::non_neg_integer(), NewBackPs::list()}.
-callback play_log_mutate_i_state([binary()], boolean(), callback_i_state()) ->
callback_i_state().
start_link(PageSize, SequencerPid, Proj, CallbackMod, StreamNum) ->
gen_server:start_link(?MODULE,
[PageSize, SequencerPid, Proj, CallbackMod, StreamNum],
[]).
stop(Pid) ->
gen_server:call(Pid, {stop}, ?LONG_TIME).
checkpoint(Pid) ->
gen_server:call(Pid, {sync_checkpoint}, ?LONG_TIME).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
init([PageSize, SequencerPid, Proj, CallbackMod, StreamNum]) ->
LastLPN = find_last_lpn(SequencerPid, StreamNum),
{LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, 0, StreamNum),
?D({self(), LPNs}),
BackPs = tango:append_lpns(LPNs, []),
LastFetchLPN = tango:back_ps2last_lpn(BackPs),
I_State = play_log_pages(Pages, CallbackMod:fresh(), CallbackMod, false),
{ok, #state{page_size=PageSize,
seq=SequencerPid,
proj=Proj,
cb_mod=CallbackMod,
stream_num=StreamNum,
last_fetch_lpn=LastFetchLPN,
all_back_ps=BackPs,
i_state=I_State}}.
handle_call({cb_dirty_op, Op}, From,
#state{proj=Proj0, cb_mod=CallbackMod, stream_num=StreamNum,
page_size=PageSize, i_state=I_State}=State)->
{AsyncType, I_State2, Proj1, _LPN} =
CallbackMod:do_dirty_op(Op, From, I_State, StreamNum,
Proj0, PageSize),
State2 = State#state{i_state=I_State2,
proj=Proj1},
if AsyncType == op_t_async ->
{reply, ok, State2};
AsyncType == op_t_sync ->
State3 = roll_log_forward(State2),
{noreply, State3}
end;
handle_call({cb_pure_op, Op}, _From, #state{cb_mod=CallbackMod} = State) ->
State2 = #state{i_state=I_State} = roll_log_forward(State),
Reply = CallbackMod:do_pure_op(Op, I_State),
{reply, Reply, State2};
handle_call({sync_checkpoint}, From,
#state{proj=Proj0, cb_mod=CallbackMod, stream_num=StreamNum,
page_size=PageSize, i_state=I_State}=State)->
CheckpointOps = CallbackMod:do_checkpoint(I_State),
%% CheckpointBackPs = [],
{_OpT, I_State2, Proj1, _LPN} =
CallbackMod:do_dirty_op(CheckpointOps, From, I_State, StreamNum,
Proj0, PageSize),
?D({sync_checkpoint, _LPN}),
%% TODO: Use this LPN so that we can tell the corfurl log GC
%% that we have created some dead bytes in the log.
{reply, ok, State#state{i_state=I_State2,
proj=Proj1}};
handle_call({stop}, _From, State) ->
{stop, normal, ok, State};
handle_call(_Request, _From, State) ->
Reply = whaaaaaaaaaaaa,
{reply, Reply, State}.
handle_cast(_Msg, State) ->
{noreply, State}.
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
find_last_lpn(SequencerPid, StreamNum) ->
{ok, _, [BackPs]} = corfurl_sequencer:get_tails(SequencerPid,
0, [StreamNum]),
tango:back_ps2last_lpn(BackPs).
fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum)
when LastLPN >= StopAtLPN ->
LPNandPages = tango:scan_backward(Proj, StreamNum, LastLPN,
StopAtLPN, true),
{_LPNs, _Pages} = lists:unzip(LPNandPages).
play_log_pages(Pages, SideEffectsP,
#state{cb_mod=CallbackMod, i_state=I_State} = State) ->
I_State2 = play_log_pages(Pages, I_State, CallbackMod, SideEffectsP),
State#state{i_state=I_State2}.
play_log_pages(Pages, I_State, CallbackMod, SideEffectsP) ->
CallbackMod:play_log_mutate_i_state(Pages, SideEffectsP, I_State).
roll_log_forward(#state{seq=SequencerPid, proj=Proj, all_back_ps=BackPs,
stream_num=StreamNum,
last_fetch_lpn=StopAtLPN} = State) ->
LastLPN = find_last_lpn(SequencerPid, StreamNum),
{LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum),
?D({self(), LPNs}),
NewBackPs = tango:append_lpns(LPNs, BackPs),
LastFetchLPN = tango:back_ps2last_lpn(NewBackPs),
play_log_pages(Pages, true,
State#state{all_back_ps=NewBackPs,
last_fetch_lpn=LastFetchLPN}).

View file

@ -0,0 +1,83 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_dt_map).
-behaviour(tango_dt).
-export([start_link/4, stop/1,
set/3, get/2,
checkpoint/1]).
%% Tango datatype callbacks
-export([fresh/0,
do_pure_op/2, do_dirty_op/6, do_checkpoint/1,
play_log_mutate_i_state/3]).
-define(DICTMOD, dict).
-define(LONG_TIME, 30*1000).
start_link(PageSize, SequencerPid, Proj, StreamNum) ->
gen_server:start_link(tango_dt,
[PageSize, SequencerPid, Proj, ?MODULE, StreamNum],
[]).
stop(Pid) ->
tango_dt:stop(Pid).
set(Pid, Key, Val) ->
gen_server:call(Pid, {cb_dirty_op, {o_set, Key, Val}}, ?LONG_TIME).
get(Pid, Key) ->
gen_server:call(Pid, {cb_pure_op, {o_get, Key}}, ?LONG_TIME).
checkpoint(Pid) ->
tango_dt:checkpoint(Pid).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fresh() ->
?DICTMOD:new().
do_pure_op({o_get, Key}, Dict) ->
?DICTMOD:find(Key, Dict).
do_dirty_op(Op0, _From,
I_State, StreamNum, Proj0, ___TODO_delme_PageSize) ->
Op = if is_list(Op0) -> Op0;
true -> [Op0] % always make a list
end,
Page = term_to_binary(Op),
{{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]),
{op_t_async, I_State, Proj1, LPN}.
do_checkpoint(Dict=_I_State) ->
[{o_start_checkpoint}|[{o_set, X, Y} || {X, Y} <- ?DICTMOD:to_list(Dict)]].
play_log_mutate_i_state(Pages, _SideEffectsP, I_State) ->
lists:foldl(fun({o_set, Key, Val}=_Op, Dict) ->
?DICTMOD:store(Key, Val, Dict);
({o_start_checkpoint}, _Dict) ->
fresh()
end,
I_State,
lists:append([binary_to_term(Page) || Page <- Pages])).

View file

@ -0,0 +1,131 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_dt_queue).
-behaviour(tango_dt).
-export([start_link/4, stop/1,
is_empty/1, length/1, peek/1, to_list/1, member/2,
in/2, out/1, reverse/1, filter/2,
checkpoint/1]).
%% Tango datatype callbacks
-export([fresh/0,
do_pure_op/2, do_dirty_op/6, do_checkpoint/1,
play_log_mutate_i_state/3]).
-define(LONG_TIME, 30*1000).
-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])).
start_link(PageSize, SequencerPid, Proj, StreamNum) ->
gen_server:start_link(tango_dt,
[PageSize, SequencerPid, Proj, ?MODULE, StreamNum],
[]).
stop(Pid) ->
tango_dt:stop(Pid).
is_empty(Pid) ->
gen_server:call(Pid, {cb_pure_op, {o_is_empty}}, ?LONG_TIME).
length(Pid) ->
gen_server:call(Pid, {cb_pure_op, {o_length}}, ?LONG_TIME).
peek(Pid) ->
gen_server:call(Pid, {cb_pure_op, {o_peek}}, ?LONG_TIME).
to_list(Pid) ->
gen_server:call(Pid, {cb_pure_op, {o_to_list}}, ?LONG_TIME).
member(Pid, X) ->
gen_server:call(Pid, {cb_pure_op, {o_member, X}}, ?LONG_TIME).
in(Pid, Val) ->
gen_server:call(Pid, {cb_dirty_op, {o_in, Val}}, ?LONG_TIME).
out(Pid) ->
gen_server:call(Pid, {cb_dirty_op, {o_out}}, ?LONG_TIME).
reverse(Pid) ->
gen_server:call(Pid, {cb_dirty_op, {o_reverse}}, ?LONG_TIME).
filter(Pid, Fun) ->
gen_server:call(Pid, {cb_dirty_op, {o_filter, Fun}}, ?LONG_TIME).
checkpoint(Pid) ->
tango_dt:checkpoint(Pid).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fresh() ->
queue:new().
do_pure_op({o_is_empty}, Q) ->
{ok, queue:is_empty(Q)};
do_pure_op({o_length}, Q) ->
{ok, queue:len(Q)};
do_pure_op({o_peek}, Q) ->
{ok, queue:peek(Q)};
do_pure_op({o_to_list}, Q) ->
{ok, queue:to_list(Q)};
do_pure_op({o_member, X}, Q) ->
{ok, queue:member(X, Q)}.
do_dirty_op(Op0, From,
I_State, StreamNum, Proj0, ___TODO_delme_PageSize) ->
{AsyncType, Op} = transform_dirty_op(Op0, From),
Page = term_to_binary(Op),
{{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]),
{AsyncType, I_State, Proj1, LPN}.
do_checkpoint(Q=_I_State) ->
[{o_start_checkpoint}|[{o_in, X} || X <- queue:to_list(Q)]].
play_log_mutate_i_state(Pages, _SideEffectsP, I_State) ->
lists:foldl(fun({o_in, Val}=_Op, Q) ->
queue:in(Val, Q);
({o_out, From, Node, WritingPid}, Q) ->
{Reply, NewQ} = queue:out(Q),
if Node == node(), WritingPid == self() ->
gen_server:reply(From, {ok, Reply});
true ->
ok
end,
NewQ;
({o_reverse}, Q) ->
queue:reverse(Q);
({o_filter, Fun}, Q) ->
queue:filter(Fun, Q);
({o_start_checkpoint}, _Q) ->
fresh()
end,
I_State,
lists:append([binary_to_term(Page) || Page <- Pages])).
transform_dirty_op({o_out}, From) ->
%% This func will be executed on the server side prior to writing
%% to the log.
{op_t_sync, [{o_out, From, node(), self()}]};
transform_dirty_op(OpList, _From) when is_list(OpList) ->
{op_t_async, OpList};
transform_dirty_op(Op, _From) ->
{op_t_async, [Op]}.

View file

@ -0,0 +1,80 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_dt_register).
-behaviour(tango_dt).
-export([start_link/4, stop/1,
set/2, get/1,
checkpoint/1]).
%% Tango datatype callbacks
-export([fresh/0,
do_pure_op/2, do_dirty_op/6, do_checkpoint/1,
play_log_mutate_i_state/3]).
-define(LONG_TIME, 30*1000).
start_link(PageSize, SequencerPid, Proj, StreamNum) ->
gen_server:start_link(tango_dt,
[PageSize, SequencerPid, Proj, ?MODULE, StreamNum],
[]).
stop(Pid) ->
tango_dt:stop(Pid).
set(Pid, Val) ->
gen_server:call(Pid, {cb_dirty_op, {o_set, Val}}, ?LONG_TIME).
get(Pid) ->
gen_server:call(Pid, {cb_pure_op, {o_get}}, ?LONG_TIME).
checkpoint(Pid) ->
tango_dt:checkpoint(Pid).
fresh() ->
undefined.
do_pure_op({o_get}, Register) ->
{ok, Register}.
do_dirty_op(Op0, _From,
I_State, StreamNum, Proj0, ___TODO_delme_PageSize) ->
Op = if is_list(Op0) -> Op0;
true -> [Op0] % always make a list
end,
Page = term_to_binary(Op),
{{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]),
{op_t_async, I_State, Proj1, LPN}.
do_checkpoint(Register=_I_State) ->
[{o_start_checkpoint},{o_set, Register}].
play_log_mutate_i_state(Pages, _SideEffectsP, OldRegister=_I_State) ->
lists:foldl(fun({o_set, Val}=_Op, _OldVal) ->
Val;
({o_start_checkpoint}, _OldVal) ->
fresh()
end,
OldRegister,
lists:append([binary_to_term(Page) || Page <- Pages])).

View file

@ -0,0 +1,192 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_oid).
-behaviour(gen_server).
%% API
-export([start_link/3, stop/1,
new/2, get/2]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
%% Tango datatype callbacks (prototype)
-export([fresh/0,
do_pure_op/2, do_dirty_op/4, play_log_mutate_i_state/3]).
-define(SERVER, ?MODULE).
-define(OID_STREAM_NUMBER, 0).
-define(LONG_TIME, 30*1000).
-define(D(X), io:format(user, "Dbg: ~s = ~p\n", [??X, X])).
-type lpn() :: non_neg_integer().
-record(state, {
page_size :: non_neg_integer(), % Corfurl page size
seq :: pid(), % sequencer pid
proj :: term(), % projection
last_fetch_lpn :: lpn(), %
all_back_ps :: [lpn()], % All back-pointers LIFO order!
i_state :: term() % internal state thingie
}).
start_link(PageSize, SequencerPid, Proj) ->
gen_server:start_link(?MODULE,
[PageSize, SequencerPid, Proj], []).
stop(Pid) ->
gen_server:call(Pid, {stop}, ?LONG_TIME).
new(Pid, Key) ->
gen_server:call(Pid, {new, Key}, ?LONG_TIME).
get(Pid, Key) ->
gen_server:call(Pid, {get, Key}, ?LONG_TIME).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
init([PageSize, SequencerPid, Proj]) ->
LastLPN = find_last_lpn(SequencerPid, ?OID_STREAM_NUMBER),
{LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, 0, ?OID_STREAM_NUMBER),
BackPs = tango:append_lpns(LPNs, []),
LastFetchLPN = tango:back_ps2last_lpn(BackPs),
I_State = play_log_pages(Pages, fresh(), ?MODULE, false),
{ok, #state{page_size=PageSize,
seq=SequencerPid,
proj=Proj,
last_fetch_lpn=LastFetchLPN,
all_back_ps=BackPs,
i_state=I_State}}.
handle_call({new, Key}, From,
#state{proj=Proj0, i_state=I_State}=State) ->
Op = {new_oid, Key, From, 0},
{_Res, I_State2, Proj1, _LPN} =
do_dirty_op(Op, I_State, ?OID_STREAM_NUMBER, Proj0),
%% Let's see how much trouble we can get outselves in here.
%% If we're here, then we've written to the log without error.
%% So then the cast to roll forward must see that log entry
%% (if it also operates without error). So, the side-effect of
%% the op ought to always send a reply to the client.
gen_server:cast(self(), {roll_forward}),
{noreply, State#state{i_state=I_State2,
proj=Proj1}};
handle_call({get, _Key}=Op, _From, State) ->
State2 = #state{i_state=I_State} = roll_log_forward(State),
Reply = do_pure_op(Op, I_State),
{reply, Reply, State2};
handle_call({stop}, _From, State) ->
{stop, normal, ok, State};
handle_call(_Request, _From, State) ->
Reply = whaaaaaaaaaaaa,
{reply, Reply, State}.
handle_cast({roll_forward}, State) ->
State2 = roll_log_forward(State),
{noreply, State2};
handle_cast(_Msg, State) ->
{noreply, State}.
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
find_last_lpn(SequencerPid, StreamNum) ->
{ok, _, [BackPs]} = corfurl_sequencer:get_tails(SequencerPid,
0, [StreamNum]),
tango:back_ps2last_lpn(BackPs).
fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum)
when LastLPN >= StopAtLPN ->
LPNandPages = tango:scan_backward(Proj, StreamNum, LastLPN,
StopAtLPN, true),
{_LPNs, _Pages} = lists:unzip(LPNandPages).
play_log_pages(Pages, SideEffectsP,
#state{i_state=I_State} = State) ->
I_State2 = play_log_pages(Pages, I_State, ?MODULE, SideEffectsP),
State#state{i_state=I_State2}.
play_log_pages(Pages, I_State, CallbackMod, SideEffectsP) ->
CallbackMod:play_log_mutate_i_state(Pages, SideEffectsP, I_State).
roll_log_forward(#state{seq=SequencerPid, proj=Proj, all_back_ps=BackPs,
last_fetch_lpn=StopAtLPN} = State) ->
LastLPN = find_last_lpn(SequencerPid, ?OID_STREAM_NUMBER),
{LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, StopAtLPN, ?OID_STREAM_NUMBER),
NewBPs = tango:append_lpns(LPNs, BackPs),
play_log_pages(Pages, true, State#state{all_back_ps=NewBPs}).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-record(oid_map, {
next :: non_neg_integer(),
map :: dict()
}).
-define(DICTMOD, dict).
fresh() ->
#oid_map{next=1,
map=?DICTMOD:new()}.
do_pure_op({get, Key}, #oid_map{map=Dict}) ->
?DICTMOD:find(Key, Dict).
do_dirty_op({new_oid, _Key, _From, _NumOfAttempts}=Op,
I_State, StreamNum, Proj0) ->
Page = term_to_binary(Op),
{{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]),
{ok, I_State, Proj1, LPN}.
play_log_mutate_i_state(Pages, SideEffectsP, I_State) ->
lists:foldl(fun({new_oid, Key, From, _NumOfAttempts}=_Op,
#oid_map{map=Dict, next=Next}=O) ->
{Res, O2} =
case ?DICTMOD:find(Key, Dict) of
error ->
Dict2 = ?DICTMOD:store(Key, Next, Dict),
{{ok, Next},O#oid_map{map=Dict2,
next=Next + 1}};
{ok, _} ->
{already_exists, O}
end,
if SideEffectsP ->
gen_server:reply(From, Res);
true ->
ok
end,
O2
end,
I_State,
[binary_to_term(Page) || Page <- Pages]).

View file

@ -0,0 +1,135 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_flu_test).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-endif.
-include("corfurl.hrl").
-define(M, corfurl_flu).
-ifdef(TEST).
-ifndef(PULSE).
startstop_test() ->
Dir = "/tmp/flu." ++ os:getpid(),
{ok, P1} = ?M:start_link(Dir),
try
{ok, _} = ?M:status(P1),
ok = ?M:stop(P1),
{'EXIT', _} = (catch ?M:stop(P1)),
{ok, P2} = ?M:start_link(Dir),
0 = ?M:get__mlp(P2),
0 = ?M:get__min_epoch(P2),
ok = ?M:stop(P2),
ok
after
ok = corfurl_util:delete_dir(Dir)
end.
basic_test() ->
Dir = "/tmp/flu." ++ os:getpid(),
{ok, P1} = ?M:start_link(Dir),
try
Epoch1 = 1,
Epoch2 = 2,
Epoch3 = 3,
LPN = 1,
Bin1 = <<42:64>>,
Bin2 = <<42042:64>>,
error_unwritten = ?M:read(P1, Epoch1, LPN),
error_unwritten = ?M:trim(P1, Epoch1, LPN),
error_unwritten = ?M:trim(P1, Epoch1, LPN+77),
ok = ?M:write(P1, Epoch1, LPN, Bin1),
error_overwritten = ?M:write(P1, Epoch1, LPN, Bin1),
error_overwritten = ?M:fill(P1, Epoch1, LPN),
LPN = ?M:get__mlp(P1),
0 = ?M:get__min_epoch(P1),
0 = ?M:get__trim_watermark(P1),
{ok, LPN} = ?M:seal(P1, Epoch1),
2 = ?M:get__min_epoch(P1),
error_overwritten = ?M:write(P1, Epoch2, LPN, Bin1),
ok = ?M:write(P1, Epoch2, LPN+1, Bin2),
Epoch2 = ?M:get__min_epoch(P1),
error_badepoch = ?M:read(P1, Epoch1, LPN),
{ok, Bin2} = ?M:read(P1, Epoch2, LPN+1),
error_unwritten = ?M:read(P1, Epoch2, LPN+2),
badarg = ?M:read(P1, Epoch2, 1 bsl 2982),
error_badepoch = ?M:seal(P1, Epoch1),
{ok, _} = ?M:seal(P1, Epoch2),
error_badepoch = ?M:seal(P1, Epoch2),
error_badepoch = ?M:read(P1, Epoch1, LPN),
error_badepoch = ?M:read(P1, Epoch1, LPN+1),
{ok, Bin1} = ?M:read(P1, Epoch3, LPN),
{ok, Bin2} = ?M:read(P1, Epoch3, LPN+1),
error_badepoch = ?M:trim(P1, Epoch1, LPN+1),
ok = ?M:trim(P1, Epoch3, LPN+1),
error_trimmed = ?M:trim(P1, Epoch3, LPN+1),
%% Current watermark processing is broken. But we'll test what's
%% there now.
ExpectedWaterFixMe = LPN+1,
ExpectedWaterFixMe = ?M:get__trim_watermark(P1),
ok = ?M:fill(P1, Epoch3, LPN+3),
error_trimmed = ?M:read(P1, Epoch3, LPN+3),
error_trimmed = ?M:fill(P1, Epoch3, LPN+3),
error_trimmed = ?M:trim(P1, Epoch3, LPN+3),
Epoch3 = ?M:get__min_epoch(P1),
ok = ?M:stop(P1),
ok
after
ok = corfurl_util:delete_dir(Dir)
end.
seal_persistence_test() ->
Dir = "/tmp/flu." ++ os:getpid(),
{ok, P1} = ?M:start_link(Dir),
try
0 = ?M:get__min_epoch(P1),
Epoch = 665,
{ok, LPN} = ?M:seal(P1, Epoch-1),
Epoch = ?M:get__min_epoch(P1),
ok = ?M:stop(P1),
{ok, P2} = ?M:start_link(Dir),
Epoch = ?M:get__min_epoch(P2),
ok = ?M:stop(P2),
ok
after
ok = corfurl_util:delete_dir(Dir)
end.
-endif. % not PULSE
-endif. % TEST

View file

@ -0,0 +1,950 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_pulse).
-ifdef(TEST).
-ifdef(PULSE).
-compile(export_all).
-include_lib("eqc/include/eqc.hrl").
-include_lib("eqc/include/eqc_statem.hrl").
-include("corfurl.hrl").
-include_lib("eunit/include/eunit.hrl").
-compile({parse_transform, pulse_instrument}).
-compile({pulse_skip,[{prop_pulse_test_,0},{clean_up_runtime,1},{delete_dir,1}]}).
%% -compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}).
%% Used for output within EUnit...
-define(QC_FMT(Fmt, Args),
io:format(user, Fmt, Args)).
%% And to force EUnit to output QuickCheck output...
-define(QC_OUT(P),
eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)).
-define(MAX_PAGES, 50000).
-define(MY_TAB, i_have_a_name).
-define(MY_KEY, ?MY_TAB).
-define(PROJECTION_DIR, "./tmp.projection." ++ os:getpid()).
-define(SEQUENCER_NAME, 'corfurl pulse seq thingie').
-record(run, {
proj, % Projection
flus % List of FLUs
}).
-record(state, {
is_setup = false :: boolean(),
num_chains = 0 :: integer(),
chain_len = 0 :: integer(),
page_size = 0 :: integer(),
run :: #run{}
}).
%% Model testing things:
%% Define true to fake bad behavior that model **must** notice & fail!
-ifndef(TRIP_no_append_duplicates).
-define(TRIP_no_append_duplicates, false).
-endif.
-ifndef(TRIP_bad_read).
-define(TRIP_bad_read, false).
-endif.
-ifndef(TRIP_bad_scan_forward).
-define(TRIP_bad_scan_forward, false).
-endif.
-ifndef(TRIP_bad_fill).
-define(TRIP_bad_fill, false).
-endif.
-ifndef(TRIP_bad_trim).
-define(TRIP_bad_trim, false).
-endif.
initial_state() ->
#state{}.
gen_page(PageSize) ->
binary(PageSize).
gen_seed() ->
noshrink({choose(1, 20000), choose(1, 20000), choose(1, 20000)}).
gen_sequencer_percent() ->
frequency([{10, choose(1,100)},
{5, choose(90,100)}]).
gen_sequencer() ->
frequency([{100, standard},
{50, {gen_seed(), gen_sequencer_percent(), choose(1, 2)}}]).
gen_approx_page() ->
%% EQC can't know what pages are perhaps-written, so pick something big.
noshrink(?LET(I, largeint(), abs(I))).
gen_scan_forward_start() ->
oneof([1, gen_approx_page()]).
gen_stop_method() ->
oneof([stop, kill]).
command(#state{run=Run} = S) ->
?LET({NumChains, ChainLen, PageSize},
{parameter(num_chains), parameter(chain_len), parameter(page_size)},
frequency(
[{50, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize, gen_sequencer()]}}
|| not S#state.is_setup] ++
[{50, {call, ?MODULE, append, [Run, gen_page(PageSize)]}}
|| S#state.is_setup] ++
[{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}}
|| S#state.is_setup] ++
[{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}}
|| S#state.is_setup] ++
[{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}}
|| S#state.is_setup] ++
[{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}}
|| S#state.is_setup] ++
[{10, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}}
|| S#state.is_setup] ++
[])).
%% Precondition, checked before a command is added to the command sequence.
precondition(S, {call, _, setup, _}) ->
not S#state.is_setup;
precondition(S, {call, _, _, _}) ->
S#state.is_setup.
%% Next state transformation, S is the current state and V is the result of the
%% command.
next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize, _SeqType]}) ->
S#state{is_setup=true,
num_chains=NumChains,
chain_len=ChainLen,
page_size=PageSize,
run=Res};
next_state(S, _, {call, _, append, _}) ->
S;
next_state(S, _, {call, _, read_approx, _}) ->
S;
next_state(S, _, {call, _, scan_forward, _}) ->
S;
next_state(S, _, {call, _, fill, _}) ->
S;
next_state(S, _, {call, _, trim, _}) ->
S;
next_state(S, _, {call, _, stop_sequencer, _}) ->
S.
eqeq(X, X) -> true;
eqeq(X, Y) -> {X, '/=', Y}.
postcondition(_S, {call, _, setup, _}, #run{} = _V) ->
true;
postcondition(_S, {call, _, append, _}, V) ->
case V of
{ok, LPN} when is_integer(LPN) -> true;
{special_trimmed, LPN} when is_integer(LPN) -> true;
error_badepoch -> true;
_ -> eqeq(V, todoTODO_fixit)
end;
postcondition(_S, {call, _, read_approx, _}, V) ->
valid_read_result(V);
postcondition(_S, {call, _, scan_forward, _}, V) ->
case V of
{ok, LastLSN, MoreP, Pages} ->
true = is_integer(LastLSN),
true = LastLSN > 0,
true = (MoreP == true orelse MoreP == false),
[] = lists:usort([X || {_LPN, Pg} <- Pages,
X <- [valid_read_result(Pg)], X /= true]),
true;
_ ->
eqeq(V, {todoTODO_fixit,?LINE})
end;
postcondition(_S, {call, _, FillTrim, _}, V)
when FillTrim == fill; FillTrim == trim ->
case V of
ok -> true;
error_trimmed -> true;
error_unwritten -> true;
error_overwritten -> true;
_ -> eqeq(V, {error, FillTrim, V})
end;
postcondition(_S, {call, _, stop_sequencer, _}, _V) ->
true.
valid_read_result(Pg) when is_binary(Pg) -> true;
valid_read_result(error_unwritten) -> true;
valid_read_result(error_trimmed) -> true;
valid_read_result(V) -> eqeq(V, {todoTODO_fixit,?LINE}).
run_commands_on_node(LocalOrSlave, Cmds, Seed) ->
AfterTime = if LocalOrSlave == local -> 50000;
LocalOrSlave == slave -> 1000000
end,
event_logger:start_link(),
pulse:start(),
delete_dir(?PROJECTION_DIR),
error_logger:tty(false),
error_logger:add_report_handler(handle_errors),
event_logger:start_logging(),
X =
try
{H, S, Res, Trace} = pulse:run(fun() ->
catch ets:new(?MY_TAB, [public, set, named_table]),
ets:insert(?MY_TAB, {?MY_KEY, undefined}),
%% application:start(my_test_app),
%% receive after AfterTime -> ok end,
{H, S, R} = run_parallel_commands(?MODULE, Cmds),
%% io:format(user, "Yooo: H = ~p\n", [H]),
%% io:format(user, "Yooo: S = ~p\n", [S]),
%% io:format(user, "Yooo: R = ~p\n", [R]),
receive after AfterTime -> ok end,
Trace = event_logger:get_events(),
%% receive after AfterTime -> ok end,
[{_, ThisRun}] = ets:lookup(?MY_TAB, ?MY_KEY),
[clean_up_runtime(ThisRun) || ThisRun /= undefined],
%% stop pulse controller *after* clean_up_runtime().
catch exit(pulse_application_controller, shutdown),
{H, S, R, Trace}
end, [{seed, Seed},
{strategy, unfair}]),
Schedule = pulse:get_schedule(),
Errors = gen_event:call(error_logger, handle_errors, get_errors, 60*1000),
{H, S, Res, Trace, Schedule, Errors}
catch
_:Err ->
{'EXIT', Err}
end,
X.
prop_pulse() ->
prop_pulse(local).
prop_pulse(LocalOrSlave) ->
?FORALL({NumChains, ChainLen, PageSize},
{choose(1, 3), choose(1, 3), choose(1, 16)},
begin
P = ?FORALL({Cmds, Seed},
{with_parameters([{num_chains, NumChains},
{chain_len, ChainLen},
{page_size, PageSize}], parallel_commands(?MODULE)),
pulse:seed()},
begin
case run_commands_on_node(LocalOrSlave, Cmds, Seed) of
{'EXIT', Err} ->
equals({'EXIT', Err}, ok);
{_H, S, Res, Trace, Schedule, Errors} ->
CheckTrace = check_trace(Trace, Cmds, Seed),
?WHENFAIL(
S = S, % ?QC_FMT("\nState: ~p\n", [S]),
measure(schedule, length(Schedule),
conjunction(
[{simple_result, equals(Res, ok)},
{errors, equals(Errors, [])},
{events, CheckTrace} ])))
end
end),
P
end).
prop_pulse_test_() ->
Timeout = case os:getenv("PULSE_TIME") of
false -> 60;
Val -> list_to_integer(Val)
end,
ExtraTO = case os:getenv("PULSE_SHRINK_TIME") of
false -> 0;
Val2 -> list_to_integer(Val2)
end,
io:format(user, "prop_pulse_test time: ~p + ~p seconds\n",
[Timeout, ExtraTO]),
{timeout, (Timeout+ExtraTO) + 60,
fun() ->
?assert(eqc:quickcheck(eqc:testing_time(Timeout,?QC_OUT(prop_pulse()))))
end}.
%% Example Trace0 (raw event info, from the ?LOG macro)
%%
%% [{32014,{call,<0.467.0>,{append,<<"O">>}}},
%% {32421,{call,<0.466.0>,{append,<<134>>}}},
%% {44522,{result,<0.467.0>,{ok,1}}},
%% {47651,{result,<0.466.0>,{ok,2}}}]
check_trace(Trace0, _Cmds, _Seed) ->
%% Let's treat this thing like a KV store. It is, mostly.
%% Key = LPN, Value = error_unwritten | {ok, Blob} | error_trimmed
%%
%% Problem: At {call, Pid, ...} time, we don't know what Key is!
%% We find out at {return, Pid, {ok, LSN}} time.
%% Also, the append might fail, so the model can ignore those
%% failures because they're not mutating any state that and
%% external viewer can see.
%% WARNING: Trace0 + lamport_clocks means Trace0 is not strictly sorted!
Trace = add_LPN_to_append_calls(lists:sort(Trace0)),
Events = eqc_temporal:from_timed_list(Trace),
%% Example Events, temporal style, 1 usec resolution, same as original trace
%%
%% [{0,32014,[]},
%% {32014,32015,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]},
%% {32015,32421,[]},
%% {32421,32422,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]},
%% {32422,44522,[]},
%% {44522,44523,[{result,<0.467.0>,{ok,...}}]},
%% {44523,47651,[]},
%% {47651,47652,[{result,<0.466.0>,{ok,...}}]},
%% {47652,infinity,[]}]
Calls = eqc_temporal:stateful(
fun({call, _Pid, _Call} = I) -> [I] end,
fun({call, Pid, _Call}, {result, Pid, _}) -> [] end,
Events),
%% Example Calls (temporal map of when a call is in progress)
%%
%% [{0,32014,[]},
%% {32014,32421,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]},
%% {32421,44522,
%% [{call,<0.466.0>,{append,<<134>>,will_be,2}},{call,<0.467.0>,{append,<<"O">>,will_be,1}}]},
%% {44522,47651,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]},
%% {47651,infinity,[]}]
AllLPNsR = eqc_temporal:stateful(
fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN;
({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN;
({call, _Pid, {read, LPN, _, _}}) -> LPN;
({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN;
({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN;
({call, _Pid, {goo_write, LPN, _Pg}}) -> LPN
end,
fun(x) -> [] end,
Calls),
%%io:format("Calls ~p\n", [Calls]),
%%io:format("AllLPNsR ~p\n", [AllLPNsR]),
%% The last item in the relation tells us what the final facts are in the
%% relation. In this case, it's all LPNs ever mentioned in the test run.
{_, infinity, AllLPNs} = lists:last(eqc_temporal:all_future(AllLPNsR)),
%% Use the following atoms to denote transitions ("Ttn") by an LPN:
%% w_0 = not written yet, error_unwritten
%% w_1 = written successfully, {ok, binary::()}
%% w_ft = fill trimmed, error_trimmed
%% w_tt = trim trimmed, error_trimmed
Mods = eqc_temporal:stateful(
fun({call, Pid, {append, Pg, will_be, LPN}}) ->
{mod_working, w_1, LPN, Pg, Pid};
({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) ->
%% This is a special case for the model. We know that
%% a write raced with a trim and lost (at least some of
%% the time inside the chain). But the transition that
%% we model in this case is a special w_ type that is
%% is trated specially by the dictionary-making
%% creation of the ValuesR relation.
{mod_working, w_special_trimmed, LPN, Pg, Pid};
({call, Pid, {fill, LPN, will_be, ok}}) ->
{mod_working, w_ft, LPN, fill, Pid};
({call, Pid, {trim, LPN, will_be, ok}}) ->
{mod_working, w_tt, LPN, trim, Pid};
({call, Pid, {read, LPN, will_fail, error_trimmed}}) ->
{mod_working, w_tt, LPN, read_repair_maybe, Pid}
end,
fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})->
[]
end,
Events),
%% StartMod contains {mod_start, Ttn, LPN, V} when a modification finished.
%% DoneMod contains {mod_end, Ttn, LPN, V} when a modification finished.
%% This is a clever trick: Mods contains the start & end timestamp
%% for each modification. Use shift() by 1 usec to move all timestamps
%% forward/backward 1 usec, then subtract away the original time range to
%% leave a 1 usec relation in time.
StartMod = eqc_temporal:map(
fun({mod_working, Ttn, LPN, Pg, _Pid}) ->
{mod_start, Ttn, LPN, Pg}
end,
eqc_temporal:subtract(Mods, eqc_temporal:shift(1, Mods))),
DoneMod = eqc_temporal:map(
fun({mod_working, Ttn, LPN, Pg, _Pid}) ->
{mod_end, Ttn, LPN, Pg}
end,
eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)),
StartsDones = eqc_temporal:union(StartMod, DoneMod),
%% TODO: A brighter mind than mine might figure out how to do this
%% next step using only eqc_temporal.
%%
%% We create a new relation, ValuesR. This relation contains
%% {values, OD::orddict()} for each time interval in the relation.
%% The OD contains all possible values for a particular LPN at
%% that time in the relation.
%% The key for OD is LPN, the value is an unordered list of possible values.
InitialValDict = orddict:from_list([{LPN, [error_unwritten]} ||
LPN <- AllLPNs]),
ValuesRFun =
fun({TS1, TS2, StEnds}, Dict1) ->
Dict2 = lists:foldl(
fun({mod_start, w_1, LPN, Pg}, D) ->
orddict:append(LPN, Pg, D);
({mod_start, WType, LPN, _Pg}, D)
when WType == w_ft; WType == w_tt ->
case lists:member(error_trimmed,
orddict:fetch(LPN, D)) of
true ->
D;
false ->
orddict:append(LPN, error_trimmed,D)
end;
({mod_start, w_special_trimmed, LPN, Pg}, D)->
orddict:append(LPN, Pg, D)
end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]),
Dict3 = lists:foldl(
fun({mod_end, w_1, LPN, Pg}, D) ->
Vs1 = orddict:fetch(LPN, D),
%% We've written a page. error_unwriten is
%% now impossible; any other binary() is
%% also impossible. However, there may be
%% a trim operation that's still in flight!
Vs2 = [V || V <- Vs1, V /= error_unwritten,
not is_binary(V)],
orddict:store(LPN, [Pg|Vs2], D);
({mod_end, WType, LPN, _Pg}, D)
when WType == w_ft; WType == w_tt ->
orddict:store(LPN, [error_trimmed], D);
({mod_end, w_special_trimmed, LPN, Pg}, D) ->
orddict:store(LPN, [Pg,error_trimmed], D)
end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]),
{{TS1, TS2, [{values, Dict3}]}, Dict3}
end,
{ValuesR, _} = lists:mapfoldl(ValuesRFun, InitialValDict, StartsDones),
InitialTtnDict = orddict:from_list([{LPN, [w_0]} || LPN <- AllLPNs]),
{TransitionsR, _} =
lists:mapfoldl(
fun({TS1, TS2, StEnds}, Dict1) ->
Dict2 = lists:foldl(
fun({mod_end, Ttn, LPN, _Pg}, D) ->
%% orddict does not discard duplicates
orddict:append(LPN, Ttn, D);
(_, D) ->
D
end, Dict1, [X || X={mod_end,_,_,_} <- StEnds]),
{{TS1, TS2, [{transitions, Dict2}]}, Dict2}
end, InitialTtnDict, StartsDones),
%% Checking reads is a tricky thing. My first attempt created a temporal
%% relation for the 1usec window when the read call was complete, then
%% union with the ValuesR relation to see what values were valid at that
%% particular instant. That approach fails sometimes!
%%
%% The reason is honest race conditions with a mutation: the model doesn't
%% know exactly when the data was written, so a valid value may have been
%% added/removed from the ValuesR relation that aren't there for the
%% 1usec window that intersects with ValuesR.
%%
%% Instead, we need to merge together all possible values from ValuesR
%% that appear at any time during the read op's lifetime.
PerhapsR = eqc_temporal:stateful(
fun({call, _Pid, {goo_write, LPN, Pg}}) ->
{perhaps, LPN, Pg}
end,
fun(x)-> [] end,
Events),
{_, _, Perhaps} = lists:last(eqc_temporal:all_future(PerhapsR)),
%%?QC_FMT("*Perhaps: ~p\n", [Perhaps]),
Reads = eqc_temporal:stateful(
fun({call, Pid, {read, LPN, _, _}}) ->
{read, Pid, LPN, []}
end,
fun({read, Pid, LPN, V1s}, {values, Values}) ->
{ok, V2s} = orddict:find(LPN, Values),
NewVs = lists:umerge(lists:sort(V1s),
lists:sort(V2s)),
%% Throw an exception (which is equivalent to a no-op)
%% if there are no differences: if we make multiples
%% of the exact same thing, stateful() will get confused.
false = NewVs == V1s,
{read, Pid, LPN, NewVs};
({read, Pid, LPN, Vs}, {result, Pid, Pg}) ->
%% case lists:member(Pg, Vs) orelse
%% lists:member({perhaps, LPN, Pg}, Perhaps) of
case lists:member(Pg, Vs) of
true ->
[];
false ->
case lists:member({perhaps, LPN, Pg}, Perhaps) of
true ->
%% The checking of the Perhaps list in
%% this manner is not strictly
%% temporally valid. It is possible
%% for the {perhaps,...} event to be
%% after the event we're checking here.
%% TODO work is to make this check 100%
%% temporally valid.
io:format(user, "Yo, found ~p ~p in Perhaps\n", [LPN, Pg]),
[];
false ->
[{bad, read, LPN, Pid, got, Pg,
possible, Vs}]
end
end
end, eqc_temporal:union(Events, ValuesR)),
BadFilter = fun(bad) -> true;
(Bad) when is_tuple(Bad), element(1, Bad) == bad -> true;
(_) -> false end,
BadReads = filter_relation_facts(BadFilter, Reads),
%% Property: For all LPNs, the transition list for K must be one of the
%% following four (4) acceptable transition orderings.
{_, _, [{transitions, FinalTtns}]} = lists:last(
eqc_temporal:all_future(TransitionsR)),
FinaTtns_filtered = filter_transition_trimfill_suffixes(FinalTtns),
InvalidTransitions = orddict:fold(
fun(_LPN, [w_0], Acc) ->
Acc;
(_LPN, [w_0,w_1], Acc) ->
Acc;
(_LPN, [w_0,'w_t+'], Acc) ->
Acc;
(_LPN, [w_0,w_1,'w_t+'], Acc) ->
Acc;
(LPN, BadTtns, Acc) ->
[{LPN, BadTtns}|Acc]
end, [], FinaTtns_filtered),
?WHENFAIL(begin
?QC_FMT("*Trace: ~p\n", [Trace]),
?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]),
?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]),
?QC_FMT("*ValuesR: ~p\n", [eqc_temporal:unions([ValuesR, StartsDones])]),
?QC_FMT("*Calls: ~p\n", [Calls]),
?QC_FMT("*BadReads: ~p\n", [BadReads]),
?QC_FMT("*Perhaps: ~p\n", [Perhaps])
end,
conjunction(
[
{all_calls_finish,
eqc_temporal:is_false(eqc_temporal:all_future(Calls))},
{no_invalidTransitions,
InvalidTransitions == []},
{no_bad_reads,
eqc_temporal:is_false(eqc_temporal:all_future(BadReads))},
%% If you want to see PULSE causing crazy scheduling, then
%% change one of the "true orelse" -> "false orelse" below.
%% {bogus_no_gaps,
%% true orelse
%% (AppendLPNs == [] orelse length(range_ify(AppendLPNs)) == 1)},
%% {bogus_exactly_1_to_N,
%% true orelse (AppendLPNs == lists:seq(1, length(AppendLPNs)))},
{true, true}
])).
add_LPN_to_append_calls([{TS, {call, Pid, {append, Page}}}|Rest]) ->
Res = trace_lookahead_pid(Pid, Rest),
New = case Res of
{ok, LPN} ->
{TS, {call, Pid, {append, Page, will_be, LPN}}};
Else ->
{TS, {call, Pid, {append, Page, will_fail, Else}}}
end,
[New|add_LPN_to_append_calls(Rest)];
add_LPN_to_append_calls([{TS, {call, Pid, {OpName, LPN}}}|Rest])
when OpName == fill; OpName == trim ->
Res = trace_lookahead_pid(Pid, Rest),
New = case Res of
ok ->
{TS, {call, Pid, {OpName, LPN, will_be, ok}}};
Else ->
{TS, {call, Pid, {OpName, LPN, will_fail, Else}}}
end,
[New|add_LPN_to_append_calls(Rest)];
add_LPN_to_append_calls([{TS, {call, Pid, {read, LPN}}}|Rest]) ->
Res = trace_lookahead_pid(Pid, Rest),
New = case Res of
Page when is_binary(Page) ->
{TS, {call, Pid, {read, LPN, will_be, Page}}};
Else ->
{TS, {call, Pid, {read, LPN, will_fail, Else}}}
end,
[New|add_LPN_to_append_calls(Rest)];
add_LPN_to_append_calls([X|Rest]) ->
[X|add_LPN_to_append_calls(Rest)];
add_LPN_to_append_calls([]) ->
[].
trace_lookahead_pid(Pid, [{_TS, {result, Pid, Res}}|_]) ->
Res;
trace_lookahead_pid(Pid, [_H|T]) ->
trace_lookahead_pid(Pid, T).
%% Presenting command data statistics in a nicer way
command_data({set, _, {call, _, Fun, _}}, {_S, _V}) ->
Fun.
%% Convenience functions for running tests
test() ->
test({20, sec}).
test(N) when is_integer(N) ->
quickcheck(numtests(N, prop_pulse()));
test({Time, sec}) ->
quickcheck(eqc:testing_time(Time, prop_pulse()));
test({Time, min}) ->
test({Time * 60, sec});
test({Time, h}) ->
test({Time * 60, min}).
check() ->
check(current_counterexample()).
verbose() ->
verbose(current_counterexample()).
verbose(CE) ->
erlang:put(verbose, true),
Ok = check(CE),
erlang:put(verbose, false),
Ok.
check(CE) ->
check(on_output(fun("OK" ++ _, []) -> ok; (Fmt, Args) -> io:format(Fmt, Args) end,
prop_pulse(true == erlang:get(verbose))),
CE).
recheck() ->
recheck(prop_pulse()).
zipwith(F, [X|Xs], [Y|Ys]) ->
[F(X, Y)|zipwith(F, Xs, Ys)];
zipwith(_, _, _) -> [].
delete_dir(Dir) ->
corfurl_util:delete_dir(Dir).
clean_up_runtime(#run{flus=Flus, proj=P}) ->
%% io:format(user, "clean_up_runtime: run = ~p\n", [R]),
#proj{seq={Seq,_,_}} = P,
catch corfurl_sequencer:stop(Seq),
[catch corfurl_flu:stop(F) || F <- Flus],
corfurl_test:setup_del_all(length(Flus)),
delete_dir(?PROJECTION_DIR),
(catch exit(whereis(?SEQUENCER_NAME), kill)).
make_chains(ChainLen, FLUs) ->
make_chains(ChainLen, FLUs, [], []).
make_chains(_ChainLen, [], SmallAcc, BigAcc) ->
[lists:reverse(SmallAcc)|BigAcc];
make_chains(ChainLen, [H|T], SmallAcc, BigAcc) ->
if length(SmallAcc) == ChainLen ->
make_chains(ChainLen, T, [H], [lists:reverse(SmallAcc)|BigAcc]);
true ->
make_chains(ChainLen, T, [H|SmallAcc], BigAcc)
end.
setup(NumChains, ChainLen, PageSize, SeqType) ->
(catch exit(whereis(?SEQUENCER_NAME), kill)),
lamport_clock:init(),
N = NumChains * ChainLen,
FLUs = corfurl_test:setup_basic_flus(N, PageSize, ?MAX_PAGES),
{ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType),
Chains = make_chains(ChainLen, FLUs),
%% io:format(user, "Cs = ~p\n", [Chains]),
Proj = corfurl:new_simple_projection(?PROJECTION_DIR,
1, 1, ?MAX_PAGES, Chains),
ok = corfurl:save_projection(?PROJECTION_DIR, Proj),
error_overwritten = corfurl:save_projection(?PROJECTION_DIR, Proj),
1 = corfurl:latest_projection_epoch_number(?PROJECTION_DIR),
{ok, Proj} = corfurl:read_projection(?PROJECTION_DIR, 1),
Run = #run{proj=Proj#proj{seq={Seq, node(), ?SEQUENCER_NAME}},
flus=FLUs},
ets:insert(?MY_TAB, {?MY_KEY, Run}),
Run.
range_ify([]) ->
[];
range_ify(L) ->
[H|T] = lists:sort(L),
range_ify(H, H+1, T).
range_ify(Beginning, Next, [Next|T]) ->
range_ify(Beginning, Next+1, T);
range_ify(Beginning, Next, [Else|T]) ->
[{Beginning, to, Next-1}|range_ify(Else, Else+1, T)];
range_ify(Beginning, Next, []) ->
[{Beginning, to, Next-1}].
filter_relation_facts(FilterFun, R) ->
[{TS1, TS2, lists:filter(FilterFun, Facts)} || {TS1, TS2, Facts} <- R].
%% {TS1, TS2, Facts} <- Reads, Fact <- Facts, BadFilter(Fact)],
filter_transition_trimfill_suffixes(Ttns) ->
[{X, filter_1_transition_list(L)} || {X, L} <- Ttns].
filter_1_transition_list([]) ->
[];
filter_1_transition_list(Old) ->
%% Strategy: Chop off all of the w_* at the end, then look at **Old** to
%% see if we chopped off any. If we did chop off any, then add back a
%% constant 'w_t+' as a suffix.
New = lists:reverse(lists:dropwhile(fun(w_tt) -> true;
(w_ft) -> true;
(w_special_trimmed) -> true;
(_) -> false
end, lists:reverse(Old))),
Suffix = case lists:last(Old) of
w_ft -> ['w_t+'];
w_tt -> ['w_t+'];
w_special_trimmed -> ['w_t+'];
_ -> []
end,
New ++ Suffix.
log_make_call(Tag) ->
log_make_call(self(), Tag).
log_make_call(Pid, Tag) ->
{call, Pid, Tag}.
log_make_result(Result) ->
log_make_result(self(), Result).
log_make_result(Pid, Result) ->
{result, Pid, Result}.
pick_an_LPN(#proj{seq={Seq,_,_}} = P, SeedInt) ->
case (catch corfurl_sequencer:get(Seq, 0)) of
{ok, Max} ->
%% The sequencer may be lying to us, shouganai.
if SeedInt > Max -> (SeedInt rem Max) + 1;
true -> SeedInt
end;
_Else ->
pick_an_LPN(corfurl_client:restart_sequencer(P), SeedInt)
end.
-define(LOG3(Tag, MkCall, PostCall),
begin
LOG__Start = lamport_clock:get(),
event_logger:event(log_make_call(Tag), LOG__Start),
LOG__Result = MkCall,
LOG__End = lamport_clock:get(),
PostCall,
event_logger:event(log_make_result(LOG__Result), LOG__End),
LOG__Result
end).
-define(LOG(Tag, MkCall), ?LOG3(Tag, MkCall, okqq)).
append(#run{proj=OriginalProj}, Page) ->
lamport_clock:init(),
lamport_clock:incr(),
Proj = get_projection(OriginalProj),
?LOG3({append, Page},
try
corfurl_client:pulse_tracing_start(write),
{Res, Proj2} = corfurl_client:append_page(Proj, Page),
put_projection(Proj2),
OtherPages0 = lists:usort(corfurl_client:pulse_tracing_get(write)),
OtherPages = case Res of
{ok, LPN} ->
OtherPages0 -- [LPN];
_ ->
OtherPages0
end,
put(zzzOtherPages, OtherPages),
perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page)
catch X:Y ->
{caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()}
end,
try
OPages = get(zzzOtherPages),
%%if OPages /= [] -> io:format("OPages = ~w\n", [OPages]); true -> ok end,
GooPid = {self(), goo, now()},
[begin
event_logger:event(log_make_call(GooPid, {goo_write, OP, Page}),
LOG__Start),
event_logger:event(log_make_result(GooPid, who_knows),
LOG__End)
end || OP <- OPages]
catch XX:YY ->
exit({oops, ?MODULE, ?LINE, XX, YY, erlang:get_stacktrace()})
end).
read_result_mangle({ok, Page}) ->
Page;
read_result_mangle(Else) ->
Else.
read_approx(#run{proj=OriginalProj}, SeedInt) ->
lamport_clock:init(),
lamport_clock:incr(),
Proj = get_projection(OriginalProj),
LPN = pick_an_LPN(Proj, SeedInt),
?LOG({read, LPN},
try
{Res, Proj2} = corfurl_client:read_page(Proj, LPN),
put_projection(Proj2),
Res2 = read_result_mangle(Res),
perhaps_trip_read_approx(?TRIP_bad_read, Res2, LPN)
catch X:Y ->
{caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()}
end).
scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) ->
lamport_clock:init(),
lamport_clock:incr(),
Proj = get_projection(OriginalProj),
StartLPN = if SeedInt == 1 -> 1;
true -> pick_an_LPN(Proj, SeedInt)
end,
%% Our job is complicated by the ?LOG() macro, which isn't good enough
%% for our purpose: we must lie about the starting timestamp, to make
%% it appear as if each LPN result that scan_forward() gives us came
%% instead from a single-page read_page() call.
?LOG({scan_forward, StartLPN, NumPages},
try
TS1 = lamport_clock:get(),
case corfurl_client:scan_forward(Proj, StartLPN, NumPages) of
{{Res, EndLPN, MoreP, Pages}, Proj2}
when Res == ok; Res == error_badepoch ->
put_projection(Proj2),
PageIs = lists:zip(Pages, lists:seq(1, length(Pages))),
TS2 = lamport_clock:get(),
[begin
PidI = {self(), s_f, I},
event_logger:event(log_make_call(PidI, {read, LPN}),
TS1),
Pm = perhaps_trip_scan_forward(
?TRIP_bad_scan_forward, read_result_mangle(P),
EndLPN),
event_logger:event(log_make_result(PidI, Pm), TS2)
end || {{LPN, P}, I} <- PageIs],
Ps = [{LPN, read_result_mangle(P)} ||
{LPN, P} <- Pages],
{ok, EndLPN, MoreP, Ps}
end
catch X:Y ->
{caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()}
end).
fill(#run{proj=OriginalProj}, SeedInt) ->
lamport_clock:init(),
lamport_clock:incr(),
Proj = get_projection(OriginalProj),
LPN = pick_an_LPN(Proj, SeedInt),
?LOG({fill, LPN},
try
{Res, Proj2} = corfurl_client:fill_page(Proj, LPN),
put_projection(Proj2),
perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN)
catch X:Y ->
{caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()}
end).
trim(#run{proj=OriginalProj}, SeedInt) ->
lamport_clock:init(),
lamport_clock:incr(),
Proj = get_projection(OriginalProj),
LPN = pick_an_LPN(Proj, SeedInt),
?LOG({trim, LPN},
try
{Res, Proj2} = corfurl_client:trim_page(Proj, LPN),
put_projection(Proj2),
perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN)
catch X:Y ->
{caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()}
end).
stop_sequencer(#run{proj=OriginalProj}, Method) ->
Proj = get_projection(OriginalProj),
Seq = element(1,Proj#proj.seq),
try
corfurl_sequencer:stop(Seq, Method),
ok
catch _:_ ->
ok
end.
get_projection(OriginalProj) ->
case get(projection) of
undefined ->
OriginalProj;
Proj ->
Proj
end.
put_projection(Proj) ->
put(projection, Proj).
perhaps_trip_append_page(false, Res, _Page) ->
Res;
perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 ->
io:format(user, "TRIP: append_page\n", []),
{ok, 3};
perhaps_trip_append_page(true, Else, _Page) ->
Else.
perhaps_trip_read_approx(false, Res, _LPN) ->
Res;
perhaps_trip_read_approx(true, _Res, 3 = LPN) ->
io:format(user, "TRIP: read_approx LPN ~p\n", [LPN]),
<<"FAKE!">>;
perhaps_trip_read_approx(true, Res, _LPN) ->
Res.
perhaps_trip_scan_forward(false, Res, _EndLPN) ->
Res;
perhaps_trip_scan_forward(true, _Res, 10) ->
io:format(user, "TRIP: scan_forward\n", []),
<<"magic number bingo, you are a winner">>;
perhaps_trip_scan_forward(true, Res, _EndLPN) ->
Res.
perhaps_trip_fill_page(false, Res, _EndLPN) ->
Res;
perhaps_trip_fill_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 ->
io:format(user, "TRIP: fill_page\n", []),
ok; % can trigger both invalid ttn and bad read
perhaps_trip_fill_page(true, Res, _EndLPN) ->
Res.
perhaps_trip_trim_page(false, Res, _EndLPN) ->
Res;
perhaps_trip_trim_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 ->
io:format(user, "TRIP: trim_page\n", []),
ok;
perhaps_trip_trim_page(true, Res, _EndLPN) ->
Res.
-endif. % PULSE
-endif. % TEST

View file

@ -0,0 +1,98 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_sequencer_test).
-compile(export_all).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
-endif.
-define(M, corfurl_sequencer).
-ifdef(TEST).
-ifndef(PULSE).
smoke_test() ->
BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".",
PageSize = 8,
NumPages = 500,
NumFLUs = 4,
MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end,
Del = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) ||
X <- lists:seq(1, NumFLUs)] end,
Del(),
FLUs = [begin
element(2, corfurl_flu:start_link(MyDir(X),
PageSize, NumPages*PageSize))
end || X <- lists:seq(1, NumFLUs)],
FLUsNums = lists:zip(FLUs, lists:seq(1, NumFLUs)),
try
[ok = corfurl_flu:write(FLU, 1, PageNum, <<42:(8*8)>>) ||
{FLU, PageNum} <- FLUsNums],
MLP0 = NumFLUs,
NumFLUs = ?M:get_max_logical_page(FLUs),
{ok, Sequencer} = ?M:start_link(FLUs),
try
{ok, _} = ?M:get(Sequencer, 5000),
[{Stream9, Tails9}] = StreamTails = [{9, [1125, 1124, 1123]}],
ok = ?M:set_tails(Sequencer, StreamTails),
{ok, _, [Tails9]} = ?M:get_tails(Sequencer, 0, [Stream9]),
{ok, LPN0a} = ?M:get(Sequencer, 2),
{ok, LPN0b} = ?M:get(Sequencer, 0),
LPN0a = LPN0b - 2,
{ok, LPN2a, _} = ?M:get_tails(Sequencer, 1, [2]),
{ok, LPN1a, _} = ?M:get_tails(Sequencer, 1, [1]),
{ok, _, [[LPN1a], [LPN2a]]} = ?M:get_tails(Sequencer,
0, [1,2]),
{ok, LPN2b, _} = ?M:get_tails(Sequencer, 1, [2]),
{ok, LPN2c, _} = ?M:get_tails(Sequencer, 1, [2]),
{ok, _, [[LPN1a], [LPN2c, LPN2b, LPN2a]]} =
?M:get_tails(Sequencer, 0, [1,2]),
{ok, LPN2d, _} = ?M:get_tails(Sequencer, 1, [2]),
{ok, LPN2e, _} = ?M:get_tails(Sequencer, 1, [2]),
{ok, LPNX, [[LP1a], [LPN2e, LPN2d, LPN2c, LPN2b]]} =
?M:get_tails(Sequencer, 0, [1,2]),
{ok, LPNX, [[LP1a], [LPN2e, LPN2d, LPN2c, LPN2b]]} =
?M:get_tails(Sequencer, 0, [1,2]), % same results
LPNX = LPN2e + 1, % no change with 0 request
ok
after
?M:stop(Sequencer)
end
after
[ok = corfurl_flu:stop(FLU) || FLU <- FLUs],
Del()
end.
-endif. % not PULSE
-endif. % TEST

View file

@ -0,0 +1,262 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(corfurl_test).
-include("corfurl.hrl").
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-define(M, corfurl).
%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%%
setup_flu_basedir() ->
"./tmp." ++
atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".".
setup_flu_dir(N) ->
setup_flu_basedir() ++ integer_to_list(N).
setup_del_all(NumFLUs) ->
[ok = corfurl_util:delete_dir(setup_flu_dir(N)) ||
N <- lists:seq(1, NumFLUs)].
setup_basic_flus(NumFLUs, PageSize, NumPages) ->
setup_del_all(NumFLUs),
[begin
element(2, corfurl_flu:start_link(setup_flu_dir(X),
PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD)))
end || X <- lists:seq(1, NumFLUs)].
-ifndef(PULSE).
save_read_test() ->
Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read",
PDir = Dir ++ ".projection",
Chain = [a,b],
P1 = ?M:new_simple_projection(PDir, 1, 1, 1*100, [Chain]),
try
filelib:ensure_dir(Dir ++ "/ignored"),
ok = ?M:save_projection(Dir, P1),
error_overwritten = ?M:save_projection(Dir, P1),
{ok, P1} = ?M:read_projection(Dir, 1),
error_unwritten = ?M:read_projection(Dir, 2),
ok
after
ok = corfurl_util:delete_dir(Dir),
ok = corfurl_util:delete_dir(PDir)
end.
smoke1_test() ->
PDir = "./tmp.smoke1.projection",
NumFLUs = 6,
PageSize = 8,
NumPages = 10,
FLUs = [F1, F2, F3, F4, F5, F6] =
setup_basic_flus(NumFLUs, PageSize, NumPages),
{ok, Seq} = corfurl_sequencer:start_link(FLUs),
%% We know that the first LPN will be 1.
LPN_Pgs = [{X, list_to_binary(
lists:flatten(io_lib:format("~8..0w", [X])))} ||
X <- lists:seq(1, 5)],
try
P0 = ?M:new_simple_projection(PDir, 1, 1, 1*100,
[[F1, F2, F3], [F4, F5, F6]]),
P1 = P0#proj{seq={Seq, unused, unused}},
[begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs],
[begin {ok, Pg} = ?M:read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs],
[begin
LPNplus = LPN + 1,
{ok, LPNplus, true, [{LPN, Pg}]} = ?M:scan_forward(P1, LPN, 1)
end || {LPN, Pg} <- LPN_Pgs],
{ok, 6, false, []} = ?M:scan_forward(P1, 6, 1),
{ok, 6, false, []} = ?M:scan_forward(P1, 6, 10),
[{LPN1,Pg1}, {LPN2,Pg2}, {LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}] = LPN_Pgs,
{ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = ?M:scan_forward(P1, 2, 2),
{ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} =
?M:scan_forward(P1, 3, 10),
%% Let's smoke read-repair: regular write failure
Epoch = P1#proj.epoch,
Pg6 = <<424242:(PageSize*8)>>,
%% Simulate a failed write to the chain.
[F6a, F6b, F6c] = Chain6 = ?M:project_to_chain(6, P1),
NotHead6 = [F6b, F6c],
ok = ?M:write_page_to_chain([F6a], [F6a], Epoch, 6, Pg6, 1),
%% Does the chain look as expected?
{ok, Pg6} = corfurl_flu:read(?M:flu_pid(F6a), Epoch, 6),
[error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) ||
X <- NotHead6],
%% Read repair should fix it.
{ok, Pg6} = ?M:read_page(P1, 6),
[{ok, Pg6} = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) || X <- Chain6],
%% Let's smoke read-repair: failed fill
[F7a, F7b, F7c] = Chain7 = ?M:project_to_chain(7, P1),
NotHead7 = [F7b, F7c],
ok = corfurl_flu:fill(?M:flu_pid(F7a), Epoch, 7),
%% Does the chain look as expected?
error_trimmed = corfurl_flu:read(?M:flu_pid(F7a), Epoch, 7),
[error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) ||
X <- NotHead7],
%% Read repair should fix it.
error_trimmed = ?M:read_page(P1, 7),
[error_trimmed = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) || X <- Chain7],
%% scan_forward shouldn't see it either
{ok, 8, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10),
[F8a|_] = Chain8 = ?M:project_to_chain(8, P1),
ok = corfurl_flu:fill(?M:flu_pid(F8a), Epoch, 8),
%% No read before scan, scan_forward shouldn't see 8 either,
%% but the next seq should be 9
{ok, 9, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10),
ok
after
corfurl_util:delete_dir(PDir),
corfurl_sequencer:stop(Seq),
[corfurl_flu:stop(F) || F <- FLUs],
setup_del_all(NumFLUs)
end.
smoke_append_badepoch_test() ->
PDir = "./tmp.smoke2.projection",
NumFLUs = 6,
PageSize = 8,
NumPages = 10,
FLUs = [F1, F2, F3, F4, F5, F6] =
setup_basic_flus(NumFLUs, PageSize, NumPages),
{ok, Seq} = corfurl_sequencer:start_link(FLUs),
%% We know that the first LPN will be 1.
LPN_Pgs = [{X, list_to_binary(
lists:flatten(io_lib:format("~8..0w", [X])))} ||
X <- lists:seq(1, 5)],
try
LittleEpoch = 4,
BigEpoch = 42,
P0 = ?M:new_simple_projection(PDir, BigEpoch, 1, 1*100,
[[F1, F2, F3], [F4, F5, F6]]),
P1 = P0#proj{seq={Seq, unused, unused}},
[begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs],
[{ok, _} = corfurl_flu:seal(FLU, BigEpoch) || FLU <- FLUs],
{_LPN, Pg} = hd(LPN_Pgs),
{error_badepoch, _} = corfurl_client:append_page(P1, Pg),
P2 = P1#proj{epoch=LittleEpoch},
{error_badepoch, _} = corfurl_client:append_page(P2, Pg),
ok
after
corfurl_util:delete_dir(PDir),
corfurl_sequencer:stop(Seq),
[corfurl_flu:stop(F) || F <- FLUs],
setup_del_all(NumFLUs)
end.
-ifdef(TIMING_TEST).
forfun_test_() ->
{timeout, 99999, fun() ->
[forfun(Procs) || Procs <- [10,100,1000,5000]]
end}.
forfun_append(0, _P, _Page) ->
ok;
forfun_append(N, #proj{seq={Seq, _, _}} = P, Page) ->
{ok, _} = ?M:append_page(Seq, P, Page),
forfun_append(N - 1, P, Page).
%%% My MBP, SSD
%%% The 1K and 5K procs shows full-mailbox-scan ickiness
%%% when getting replies from prim_file. :-(
%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.016815 sec
%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.547976 sec
%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 13.706686 sec
%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 33.516312 sec
%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.350147 sec
%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.429485 sec
%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.643233 sec
%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 15.686058 sec
%%%% forfun: 10 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 13.479458 sec
%%%% forfun: 100 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 14.752565 sec
%%%% forfun: 1000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 25.012306 sec
%%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec
forfun(NumProcs) ->
PDir = "./tmp.forfun.projection",
io:format(user, "\n", []),
NumFLUs = 4,
PageSize = 8,
%%PageSize = 4096,
NumPages = 200*1000,
PagesPerProc = NumPages div NumProcs,
FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages),
{ok, Seq} = corfurl_sequencer:start_link(FLUs),
try
Chains = [[F1, F2], [F3, F4]],
%%Chains = [[F1], [F2], [F3], [F4]],
P0 = ?M:new_simple_projection(PDir, 1, 1, NumPages*2, Chains),
P = P0#proj{seq={Seq, unused, unused}},
Me = self(),
Start = now(),
Ws = [begin
Page = <<X:(PageSize*8)>>,
spawn_link(fun() ->
forfun_append(PagesPerProc, P, Page),
Me ! {done, self()}
end)
end || X <- lists:seq(1, NumProcs)],
[receive {done, W} -> ok end || W <- Ws],
End = now(),
io:format(user, "forfun: ~p procs writing ~p pages of ~p bytes/page to ~p chains of ~p total FLUs in ~p sec\n",
[NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]),
ok
after
corfur_util:delete_dir(PDir),
corfurl_sequencer:stop(Seq),
[corfurl_flu:stop(F) || F <- FLUs],
setup_del_all(NumFLUs)
end.
-endif. % TIMING_TEST
-endif. % not PULSE
-endif. % TEST

View file

@ -0,0 +1,133 @@
%%% File : handle_errors.erl
%%% Author : Ulf Norell
%%% Description :
%%% Created : 26 Mar 2012 by Ulf Norell
-module(event_logger).
-compile(export_all).
-behaviour(gen_server).
%% API
-export([start_link/0, event/1, event/2, get_events/0, start_logging/0]).
-export([timestamp/0]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).
-define(SERVER, ?MODULE).
-record(state, { start_time, events = [] }).
-record(event, { timestamp, data }).
%%====================================================================
%% API
%%====================================================================
%%--------------------------------------------------------------------
%% Function: start_link() -> {ok,Pid} | ignore | {error,Error}
%% Description: Starts the server
%%--------------------------------------------------------------------
start_link() ->
gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
start_logging() ->
gen_server:call(?MODULE, {start, timestamp()}).
event(EventData) ->
event(EventData, timestamp()).
event(EventData, Timestamp) ->
gen_server:call(?MODULE,
#event{ timestamp = Timestamp, data = EventData }).
async_event(EventData) ->
gen_server:cast(?MODULE,
#event{ timestamp = timestamp(), data = EventData }).
get_events() ->
gen_server:call(?MODULE, get_events).
%%====================================================================
%% gen_server callbacks
%%====================================================================
%%--------------------------------------------------------------------
%% Function: init(Args) -> {ok, State} |
%% {ok, State, Timeout} |
%% ignore |
%% {stop, Reason}
%% Description: Initiates the server
%%--------------------------------------------------------------------
init([]) ->
{ok, #state{}}.
%%--------------------------------------------------------------------
%% Function: %% handle_call(Request, From, State) ->
%% {reply, Reply, State} |
%% {reply, Reply, State, Timeout} |
%% {noreply, State} |
%% {noreply, State, Timeout} |
%% {stop, Reason, Reply, State} |
%% {stop, Reason, State}
%% Description: Handling call messages
%%--------------------------------------------------------------------
handle_call(Event = #event{}, _From, State) ->
{reply, ok, add_event(Event, State)};
handle_call({start, Now}, _From, S) ->
{reply, ok, S#state{ events = [], start_time = Now }};
handle_call(get_events, _From, S) ->
{reply, lists:reverse([ {E#event.timestamp, E#event.data} || E <- S#state.events]),
S#state{ events = [] }};
handle_call(Request, _From, State) ->
{reply, {error, {bad_call, Request}}, State}.
%%--------------------------------------------------------------------
%% Function: handle_cast(Msg, State) -> {noreply, State} |
%% {noreply, State, Timeout} |
%% {stop, Reason, State}
%% Description: Handling cast messages
%%--------------------------------------------------------------------
handle_cast(Event = #event{}, State) ->
{noreply, add_event(Event, State)};
handle_cast(_Msg, State) ->
{noreply, State}.
%%--------------------------------------------------------------------
%% Function: handle_info(Info, State) -> {noreply, State} |
%% {noreply, State, Timeout} |
%% {stop, Reason, State}
%% Description: Handling all non call/cast messages
%%--------------------------------------------------------------------
handle_info(_Info, State) ->
{noreply, State}.
%%--------------------------------------------------------------------
%% Function: terminate(Reason, State) -> void()
%% Description: This function is called by a gen_server when it is about to
%% terminate. It should be the opposite of Module:init/1 and do any necessary
%% cleaning up. When it returns, the gen_server terminates with Reason.
%% The return value is ignored.
%%--------------------------------------------------------------------
terminate(_Reason, _State) ->
ok.
%%--------------------------------------------------------------------
%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState}
%% Description: Convert process state when code is changed
%%--------------------------------------------------------------------
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%--------------------------------------------------------------------
%%% Internal functions
%%--------------------------------------------------------------------
add_event(#event{timestamp = Now, data = Data}, State) ->
Event = #event{ timestamp = Now, data = Data },
State#state{ events = [Event|State#state.events] }.
timestamp() ->
lamport_clock:get().

View file

@ -0,0 +1,153 @@
%%%-------------------------------------------------------------------
%%% @author Hans Svensson <>
%%% @copyright (C) 2012, Hans Svensson
%%% @doc
%%%
%%% @end
%%% Created : 19 Mar 2012 by Hans Svensson <>
%%%-------------------------------------------------------------------
-module(handle_errors).
-behaviour(gen_event).
%% API
-export([start_link/0, add_handler/0]).
%% gen_event callbacks
-export([init/1, handle_event/2, handle_call/2,
handle_info/2, terminate/2, code_change/3]).
-define(SERVER, ?MODULE).
-record(state, { errors = [] }).
%%%===================================================================
%%% gen_event callbacks
%%%===================================================================
%%--------------------------------------------------------------------
%% @doc
%% Creates an event manager
%%
%% @spec start_link() -> {ok, Pid} | {error, Error}
%% @end
%%--------------------------------------------------------------------
start_link() ->
gen_event:start_link({local, ?SERVER}).
%%--------------------------------------------------------------------
%% @doc
%% Adds an event handler
%%
%% @spec add_handler() -> ok | {'EXIT', Reason} | term()
%% @end
%%--------------------------------------------------------------------
add_handler() ->
gen_event:add_handler(?SERVER, ?MODULE, []).
%%%===================================================================
%%% gen_event callbacks
%%%===================================================================
%%--------------------------------------------------------------------
%% @private
%% @doc
%% Whenever a new event handler is added to an event manager,
%% this function is called to initialize the event handler.
%%
%% @spec init(Args) -> {ok, State}
%% @end
%%--------------------------------------------------------------------
init([]) ->
{ok, #state{}}.
%%--------------------------------------------------------------------
%% @private
%% @doc
%% Whenever an event manager receives an event sent using
%% gen_event:notify/2 or gen_event:sync_notify/2, this function is
%% called for each installed event handler to handle the event.
%%
%% @spec handle_event(Event, State) ->
%% {ok, State} |
%% {swap_handler, Args1, State1, Mod2, Args2} |
%% remove_handler
%% @end
%%--------------------------------------------------------------------
handle_event({error, _, {_, "Hintfile '~s' has bad CRC" ++ _, _}}, State) ->
{ok, State};
handle_event({error, _, {_, "** Generic server" ++ _, _}}, State) ->
{ok, State};
handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, not_ready]}}, State) ->
{ok, State};
handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, {merge_locked, _, _}]}}, State) ->
{ok, State};
handle_event({error, _, {_, "Failed to read lock data from ~s: ~p\n", [_, {invalid_data, <<>>}]}}, State) ->
{ok, State};
handle_event({error, _, Event}, State) ->
{ok, State#state{ errors = [Event|State#state.errors] }};
handle_event(_Event, State) ->
{ok, State}.
%%--------------------------------------------------------------------
%% @private
%% @doc
%% Whenever an event manager receives a request sent using
%% gen_event:call/3,4, this function is called for the specified
%% event handler to handle the request.
%%
%% @spec handle_call(Request, State) ->
%% {ok, Reply, State} |
%% {swap_handler, Reply, Args1, State1, Mod2, Args2} |
%% {remove_handler, Reply}
%% @end
%%--------------------------------------------------------------------
handle_call(get_errors, S) ->
{ok, S#state.errors, S#state{ errors = [] }};
handle_call(_Request, State) ->
Reply = ok,
{ok, Reply, State}.
%%--------------------------------------------------------------------
%% @private
%% @doc
%% This function is called for each installed event handler when
%% an event manager receives any other message than an event or a
%% synchronous request (or a system message).
%%
%% @spec handle_info(Info, State) ->
%% {ok, State} |
%% {swap_handler, Args1, State1, Mod2, Args2} |
%% remove_handler
%% @end
%%--------------------------------------------------------------------
handle_info(_Info, State) ->
{ok, State}.
%%--------------------------------------------------------------------
%% @private
%% @doc
%% Whenever an event handler is deleted from an event manager, this
%% function is called. It should be the opposite of Module:init/1 and
%% do any necessary cleaning up.
%%
%% @spec terminate(Reason, State) -> void()
%% @end
%%--------------------------------------------------------------------
terminate(_Reason, _State) ->
ok.
%%--------------------------------------------------------------------
%% @private
%% @doc
%% Convert process state when code is changed
%%
%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
%% @end
%%--------------------------------------------------------------------
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%%===================================================================
%%% Internal functions
%%%===================================================================

View file

@ -0,0 +1,48 @@
-module(lamport_clock).
-export([init/0, get/0, update/1, incr/0]).
-define(KEY, ?MODULE).
-ifdef(TEST).
init() ->
case get(?KEY) of
undefined ->
%% {Ca, Cb, _} = now(),
%% FakeTOD = ((Ca * 1000000) + Cb) * 1000000,
FakeTOD = 0,
put(?KEY, FakeTOD + 1);
N when is_integer(N) ->
ok
end.
get() ->
get(?KEY).
update(Remote) ->
New = erlang:max(get(?KEY), Remote) + 1,
put(?KEY, New),
New.
incr() ->
New = get(?KEY) + 1,
put(?KEY, New),
New.
-else. % TEST
init() ->
ok.
get() ->
ok.
update(_) ->
ok.
incr() ->
ok.
-endif. % TEST

View file

@ -0,0 +1,75 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_oid_test).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
-endif.
-define(D(X), io:format(user, "Dbg: ~s = ~p\n", [??X, X])).
-ifdef(TEST).
-ifndef(PULSE).
tango_oid_smoke_test() ->
ok = tango_test:run_test("/tmp", "tango_oid_smoke", 4096, 5*1024, 1,
fun tango_oid_smoke_test_int/3).
tango_oid_smoke_test_int(PageSize, Seq, Proj) ->
{ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj),
ok = tango_oid:stop(OID_Map),
ok.
tango_oid_one_test() ->
ok = tango_test:run_test("/tmp", "tango_oid_one", 4096, 5*1024, 1,
fun tango_oid_one_test_int/3).
tango_oid_one_test_int(PageSize, Seq, Proj) ->
{ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj),
try
K1 = foo,
K2 = bar,
OID_Num1 = 1,
error = tango_oid:get(OID_Map, "does not exist"),
{ok, OID_Num1} = tango_oid:new(OID_Map, K1),
{ok, OID_Num1} = tango_oid:get(OID_Map, K1),
already_exists = tango_oid:new(OID_Map, K1),
%% The V2 put should *not* have clobbered the previous value
{ok, OID_Num1} = tango_oid:get(OID_Map, K1),
error = tango_oid:get(OID_Map, "does not exist"),
{ok, OID_Num2} = tango_oid:new(OID_Map, K2),
{ok, OID_Num2} = tango_oid:get(OID_Map, K2),
ok
after
tango_oid:stop(OID_Map)
end.
-endif. % not PULSE
-endif. % TEST

View file

@ -0,0 +1,315 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(tango_test).
-compile(export_all).
-include("corfurl.hrl").
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-compile(export_all).
-ifdef(PULSE).
-compile({parse_transform, pulse_instrument}).
-endif.
-endif.
-define(SEQ, corfurl_sequencer).
-define(T, tango).
-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])).
-ifdef(TEST).
-ifndef(PULSE).
pack_v1_test() ->
[begin
Packed = ?T:pack_v1(StreamList, Options, term_to_binary(Term), Size),
StreamList = ?T:unpack_v1(Packed, stream_list),
TermBin = ?T:unpack_v1(Packed, page),
Term = binary_to_term(TermBin)
end || StreamList <- [[], [1], [1,2,4]],
Options <- [[]],
Term <- [foo, {bar, baz, <<"yo">>}],
Size <- lists:seq(100, 5000, 500)].
run_test(RootDir, BaseDirName, PageSize, NumPages, NumFLUs, FUN) ->
{FLUs, Seq, P1, Del} = corfurl:simple_test_setup(
RootDir, BaseDirName, PageSize, NumPages, NumFLUs),
try
FUN(PageSize, Seq, P1)
after
?SEQ:stop(Seq),
[ok = corfurl_flu:stop(FLU) || FLU <- FLUs],
Del()
end.
smoke_test() ->
ok = run_test("/tmp", "projection",
4096, 5*1024, 1, fun smoke_test_int/3).
smoke_test_int(PageSize, Seq, P1) ->
ok = ?SEQ:set_tails(Seq, [{42,4242}, {43,4343}]),
{ok, _, [4242, 4343]} = ?SEQ:get_tails(Seq, 0, [42, 43]),
LPN_Pgs = [{X, ?T:pad_bin(PageSize, term_to_binary({smoke, X}))} ||
X <- lists:seq(1, 5)],
[begin
{{ok, LPN}, _} = corfurl_client:append_page(P1, Pg)
end || {LPN, Pg} <- LPN_Pgs],
[begin
{ok, Pg} = corfurl:read_page(P1, LPN)
end || {LPN, Pg} <- LPN_Pgs],
ok.
write_forward_test() ->
ok = run_test("/tmp", "write_forward",
4096, 5*1024, 1, fun write_forward_test_int/3).
write_forward_test_int(PageSize, _Seq, P1) ->
StreamNum = 0,
NumPages = 10,
Pages = [term_to_binary({smoke, X}) || X <- lists:seq(1, NumPages)],
BackPs0 = [{StreamNum, []}],
{P2, BackPs1} = write_stream_pages(P1, Pages, PageSize, BackPs0, StreamNum),
{_P3, _BackPs2} = write_stream_pages(P2, Pages, PageSize, BackPs1, StreamNum, 3),
ok.
write_stream_pages(Proj0, Pages, PageSize, InitialBackPs, StreamNum) ->
write_stream_pages(Proj0, Pages, PageSize, InitialBackPs, StreamNum, 0).
write_stream_pages(Proj0, Pages, _PageSize, InitialBackPs, StreamNum, Junk) ->
WriteJunk = fun() -> JP0 = <<"blah">>,
{{ok, _}, _} = tango:append_page(Proj0, JP0,
[StreamNum])
end,
F = fun(Page, {Proj1, BackPs}) ->
if Junk band 1 /= 0 -> WriteJunk();
true -> ok end,
{{ok, LPN}, Proj2} =
tango:append_page(Proj1, Page, [StreamNum]),
if Junk band 1 /= 0 -> WriteJunk();
true -> ok end,
{Proj2, tango:add_back_pointer(StreamNum, BackPs, LPN)}
end,
{_Px, _BackPs} = Res = lists:foldl(F, {Proj0, InitialBackPs}, Pages),
%% io:format(user, "BackPs ~w\n", [_BackPs]),
Res.
scan_backward_test() ->
ok = run_test("/tmp", "scan_backward",
4096, 5*1024, 1, fun scan_backward_test_int/3).
scan_backward_test_int(PageSize, _Seq, P1) ->
StreamNum = 0,
NumPages = 10,
PageSeq = lists:seq(1, NumPages),
Pages = [term_to_binary({smoke, X}) || X <- PageSeq],
BackPs0 = [{StreamNum, []}],
{P2, BackPs1} = write_stream_pages(P1, Pages, PageSize, BackPs0, StreamNum),
LastLPN = hd(proplists:get_value(StreamNum, BackPs1)),
LastLPN=LastLPN,
[begin
ShouldBe = lists:seq(1, BackwardStartLPN),
ShouldBePages = lists:zip(ShouldBe, lists:sublist(Pages, BackwardStartLPN)),
%% If we scan backward, we should get a list of LPNs in
%% oldest -> newest (i.e. smallest LPN to largest LPN) order.
ShouldBe = tango:scan_backward(P2, StreamNum, BackwardStartLPN,
false),
StopAtLimit = NumPages div 2,
StopAtKicksInAt = StopAtLimit + 2,
{StopAtLPN, ShouldBeLPNS} =
if BackwardStartLPN < StopAtKicksInAt ->
{0, ShouldBe};
true ->
{StopAtLimit, [LPN || LPN <- ShouldBe, LPN > StopAtLimit]}
end,
ShouldBeLPNS =
tango:scan_backward(P2, StreamNum, BackwardStartLPN, StopAtLPN,
false),
%% If we scan backward, we should get a list of LPNs in
%% oldest -> newest (i.e. smallest LPN to largest LPN) order
%% together with the actual page data.
ShouldBePages = tango:scan_backward(P2, StreamNum, BackwardStartLPN,
true),
ok
end || BackwardStartLPN <- lists:seq(1, NumPages)],
ok.
tango_dt_register_test() ->
ok = run_test("/tmp", "tango_dt_register",
4096, 5*1024, 1, fun tango_dt_register_int/3).
tango_dt_register_int(PageSize, Seq, Proj) ->
{ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj),
{ok, Reg1Num} = tango_oid:new(OID_Map, "register1"),
{ok, Reg1} = tango_dt_register:start_link(PageSize, Seq, Proj,
Reg1Num),
{ok, Reg2Num} = tango_oid:new(OID_Map, "register2"),
{ok, Reg2} = tango_dt_register:start_link(PageSize, Seq, Proj,
Reg2Num),
NumVals = 8,
Vals = [lists:flatten(io_lib:format("version ~w", [X])) ||
X <- lists:seq(1, NumVals)],
[tango_dt_register:set(Reg, Val) || Reg <- [Reg1, Reg2], Val <- Vals],
LastVal = lists:last(Vals),
{ok, LastVal} = tango_dt_register:get(Reg1),
{ok, LastVal} = tango_dt_register:get(Reg2),
%% If we instantiate a new instance of an existing register, then
%% a single get should show the most recent modification.
{ok, Reg2b} = tango_dt_register:start_link(PageSize, Seq, Proj,
Reg2Num),
{ok, LastVal} = tango_dt_register:get(Reg2b),
%% If we update the "old" instance of a register, then the "new"
%% instance should also see the update.
NewVal = {"Heh", "a new value"},
ok = tango_dt_register:set(Reg2, NewVal),
C1 = fun() -> {ok, NewVal} = tango_dt_register:get(Reg2), % sanity check
{ok, NewVal} = tango_dt_register:get(Reg2b), ok end,
ok = C1(),
ok = tango_dt_register:checkpoint(Reg2),
ok = C1(),
{ok, Reg2c} = tango_dt_register:start_link(PageSize, Seq, Proj,
Reg2Num),
{ok, NewVal} = tango_dt_register:get(Reg2c),
[ok = tango_dt_register:stop(X) || X <- [Reg1, Reg2, Reg2b, Reg2c]],
ok.
tango_dt_map_test() ->
ok = run_test("/tmp", "tango_dt_map",
4096, 5*1024, 1, fun tango_dt_map_int/3).
tango_dt_map_int(PageSize, Seq, Proj) ->
{ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj),
{ok, Reg1Num} = tango_oid:new(OID_Map, "map1"),
{ok, Reg1} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg1Num),
{ok, Reg2Num} = tango_oid:new(OID_Map, "map2"),
{ok, Reg2} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg2Num),
NumVals = 8,
Vals = [lists:flatten(io_lib:format("version ~w", [X])) ||
X <- lists:seq(1, NumVals)],
Keys = ["key1", "key2"],
[tango_dt_map:set(Reg, Key, Val) || Reg <- [Reg1, Reg2],
Key <- Keys, Val <- Vals],
LastVal = lists:last(Vals),
C1 = fun(R, LV) -> [{ok, LV} = tango_dt_map:get(R, Key) || Key <- Keys],
ok end,
ok = C1(Reg1, LastVal),
ok = C1(Reg2, LastVal),
%% If we instantiate a new instance of an existing map, then
%% a single get should show the most recent modification.
{ok, Reg2b} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg2Num),
[{ok, LastVal} = tango_dt_map:get(Reg2b, Key) || Key <- Keys],
%% If we update the "old" instance of a map, then the "new"
%% instance should also see the update.
NewVal = {"Heh", "a new value"},
[ok = tango_dt_map:set(Reg2, Key, NewVal) || Key <- Keys],
[ok = C1(R, NewVal) || R <- [Reg2, Reg2b]],
[ok = C1(R, LastVal) || R <- [Reg1]],
[ok = tango_dt_map:checkpoint(R) || R <- [Reg1, Reg2, Reg2b]],
NewVal2 = "after the checkpoint....",
[ok = tango_dt_map:set(Reg2, Key, NewVal2) || Key <- Keys],
[ok = C1(R, NewVal2) || R <- [Reg2, Reg2b]],
[ok = C1(R, LastVal) || R <- [Reg1]],
ok.
tango_dt_queue_test() ->
ok = run_test("/tmp", "tango_dt_queue",
4096, 5*1024, 1, fun tango_dt_queue_int/3).
tango_dt_queue_int(PageSize, Seq, Proj) ->
MOD = tango_dt_queue,
{ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj),
{ok, Q1Num} = tango_oid:new(OID_Map, "queue1"),
{ok, Q1} = MOD:start_link(PageSize, Seq, Proj, Q1Num),
{ok, true} = MOD:is_empty(Q1),
{ok, 0} = MOD:length(Q1),
Num1 = 4,
Seq1 = lists:seq(1, Num1),
RevSeq1 = lists:reverse(Seq1),
[ok = MOD:in(Q1, X) || X <- Seq1],
{ok, Num1} = MOD:length(Q1),
{ok, {value, 1}} = MOD:peek(Q1),
{ok, Seq1} = MOD:to_list(Q1),
ok = MOD:reverse(Q1),
{ok, RevSeq1} = MOD:to_list(Q1),
ok = MOD:reverse(Q1),
[{ok, {value, X}} = MOD:out(Q1) || X <- lists:seq(1, Num1)],
{ok, empty} = MOD:out(Q1),
{ok, []} = MOD:to_list(Q1),
[ok = MOD:in(Q1, X) || X <- Seq1],
{ok, false} = MOD:member(Q1, does_not_exist),
{ok, true} = MOD:member(Q1, Num1),
ok = MOD:filter(Q1, fun(X) when X == Num1 -> false;
(_) -> true
end),
Num1Minus1 = Num1 - 1,
C1 = fun(Q, Expected) -> {ok, false} = MOD:member(Q, Num1),
{ok, true} = MOD:member(Q, Num1 - 1),
{ok, Expected} = MOD:length(Q), ok end,
ok = C1(Q1, Num1Minus1),
{ok, Q2} = MOD:start_link(PageSize, Seq, Proj, Q1Num),
ok = C1(Q2, Num1Minus1),
ok = MOD:in(Q2, 88),
ok = C1(Q2, Num1),
ok = C1(Q1, Num1),
?D(before_CPs_start),
[ok = MOD:checkpoint(Q1) || _ <- lists:seq(1, 4)],
?D(after_CPs_end),
[ok = C1(X, Num1) || X <- [Q1, Q2]],
{ok, Q3} = MOD:start_link(PageSize, Seq, Proj, Q1Num),
[ok = C1(X, Num1) || X <- [Q1, Q2, Q3]],
?D(before_Q4_start),
{ok, Q4} = MOD:start_link(PageSize, Seq, Proj, Q1Num),
?D({after_Q4_start, Q4}),
ok = MOD:in(Q4, 89),
?D(after_Q4_in),
Num1Plus1 = Num1 + 1,
[ok = C1(X, Num1Plus1) || X <- [Q1, Q2, Q3, Q4]],
[ok = MOD:stop(X) || X <- [Q1, Q2, Q3, Q4]],
ok.
-endif. % not PULSE
-endif. % TEST