diff --git a/prototype/tango/.gitignore b/prototype/tango/.gitignore new file mode 100644 index 0000000..91f0c9c --- /dev/null +++ b/prototype/tango/.gitignore @@ -0,0 +1,7 @@ +.eunit +.eqc-info +current_counterexample.eqc +deps +ebin/*.beam +ebin/*.app +erl_crash.dump diff --git a/prototype/tango/LICENSE b/prototype/tango/LICENSE new file mode 100644 index 0000000..e454a52 --- /dev/null +++ b/prototype/tango/LICENSE @@ -0,0 +1,178 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/prototype/tango/Makefile b/prototype/tango/Makefile new file mode 100644 index 0000000..e9951ac --- /dev/null +++ b/prototype/tango/Makefile @@ -0,0 +1,39 @@ +REBAR_BIN := $(shell which rebar) +ifeq ($(REBAR_BIN),) +REBAR_BIN = ./rebar +endif + +.PHONY: rel deps package pkgclean + +all: deps compile + +compile: + $(REBAR_BIN) compile + +deps: + $(REBAR_BIN) get-deps + +clean: + $(REBAR_BIN) clean + +test: deps compile eunit + +eunit: + $(REBAR_BIN) -v skip_deps=true eunit + +pulse: compile + env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile + env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit + +APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \ + xmerl webtool snmp public_key mnesia eunit syntax_tools compiler +PLT = $(HOME)/.dbms_dialyzer_plt + +build_plt: deps compile + dialyzer --build_plt --output_plt $(PLT) --apps $(APPS) deps/*/ebin + +dialyzer: deps compile + dialyzer -Wno_return --plt $(PLT) ebin + +clean_plt: + rm $(PLT) diff --git a/prototype/tango/README.md b/prototype/tango/README.md new file mode 100644 index 0000000..15b0d21 --- /dev/null +++ b/prototype/tango/README.md @@ -0,0 +1,33 @@ + +Tango prototype TODO list +========================= + +__ The current checkpoint implementation is fundamentally broken and + needs a rewrite, or else. + This issue is not mentioned at all in the Tango paper. + + option 1: fix checkpoint to be 100% correct + option 2: checkpointing is for the weak and the memory-constrained, so + don't bother. Instead, rip out the current checkpoint code, + period. + option 3: other + +xx Checkpoint fix option #1: history splicing within the same OID? + +xx Checkpoint fix option #2: checkpoint to a new OID, history writes to both + OIDs during the CP, then a marker in the old OID + to switch over to the new OID? + +History splicing has a flaw that I belive just won't work. The switch to a +new OID has problems with updates written to the old OID before and before the +new checkpoint has finished. + +I believe that a checkpoint where: + * all Tango writes, checkpoint and non-checkpoint alike, are noted with + a checkpoint number. + * that checkpoint number is strictly increasing + * a new checkpoint has a new checkpoint number + * scans ignore blocks with checkpoint numbers larger than the current + active checkpoint #, until the checkpoint is complete. + +... ought to work correctly. diff --git a/prototype/tango/docs/corfurl.md b/prototype/tango/docs/corfurl.md new file mode 100644 index 0000000..08960dc --- /dev/null +++ b/prototype/tango/docs/corfurl.md @@ -0,0 +1,191 @@ +## CORFU papers + +I recommend the "5 pages" paper below first, to give a flavor of +what the CORFU is about. When Scott first read the CORFU paper +back in 2011 (and the Hyder paper), he thought it was insanity. +He recommends waiting before judging quite so hastily. :-) + +After that, then perhaps take a step back are skim over the +Hyder paper. Hyder started before CORFU, but since CORFU, the +Hyder folks at Microsoft have rewritten Hyder to use CORFU as +the shared log underneath it. But the Hyder paper has lots of +interesting bits about how you'd go about creating a distributed +DB where the transaction log *is* the DB. + +### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log" + +MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN +PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI, +University of California, San Diego, TED WOBBER, Microsoft Research +Silicon Valley + +Long version of introduction to CORFU (~30 pages) +http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf + +### "CORFU: A Shared Log Design for Flash Clusters" + +Same authors as above + +Short version of introduction to CORFU paper above (~12 pages) + +http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf + +### "From Paxos to CORFU: A Flash-Speed Shared Log" + +Same authors as above + +5 pages, a short summary of CORFU basics and some trial applications +that have been implemented on top of it. + +http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf + +### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware" + +Wei, Davis, Wobber, Balakrishnan, Malkhi + +Summary report of implmementing the CORFU server-side in +FPGA-style hardware. (~11 pages) + +http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf + +### "Tango: Distributed Data Structures over a Shared Log" + +Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck + +Describes a framework for developing data structures that reside +persistently within a CORFU log: the log *is* the database/data +structure store. + +http://www.snookles.com/scottmp/corfu/Tango.pdf + +### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service" + +Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber + +The ZooKeeper inter-server communication is replaced with CORFU. +Faster, fewer lines of code than ZK, and more features than the +original ZK code base. + +http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf + +### "Hyder – A Transactional Record Manager for Shared Flash" + +Bernstein, Reid, Das + +Describes a distributed log-based DB system where the txn log is +treated quite oddly: a "txn intent" record is written to a +shared common log All participants read the shared log in +parallel and make commit/abort decisions in parallel, based on +what conflicts (or not) that they see in the log. Scott's first +reading was "No way, wacky" ... and has since changed his mind. + +http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf +pages 9-20 + + + +## Fiddling with PULSE + +Do the following: + + make clean + make + make pulse + +... then watch the dots go across the screen for 60 seconds. If you +wish, you can press `Control-c` to interrupt the test. We're really +interested in the build artifacts. + + erl -pz .eunit deps/*/ebin + eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + +This will run the PULSE test for 5 seconds. Feel free to adjust for +as many seconds as you wish. + + Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace] + + Eshell V5.10.3 (abort with ^G) + 1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + Starting Quviq QuickCheck version 1.30.4 + (compiled at {{2014,2,7},{9,19,50}}) + Licence for Basho reserved until {{2014,2,17},{1,41,39}} + ...................................................................................... + OK, passed 86 tests + schedule: Count: 86 Min: 2 Max: 1974 Avg: 3.2e+2 Total: 27260 + true + 2> + +REPL interactive work can be done via: + +1. Edit code, e.g. `corfurl_pulse.erl`. +2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP` +to compile. +3. Reload any recompiled modules, e.g. `l(corfurl_pulse).` +4. Resume QuickCheck activities. + +## Seeing an PULSE scheduler interleaving failure in action + +1. Edit `corfurl_pulse:check_trace()` to uncomment the + use of `conjunction()` that mentions `bogus_order_check_do_not_use_me` + and comment out the real `conjunction()` call below it. +2. Recompile & reload. +3. Check. + +For example: + + 9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + .........Failed! After 9 tests. + +Sweet! The first tuple below are the first `?FORALL()` values, +and the 2nd is the list of commands, +`{SequentialCommands, ListofParallelCommandLists}`. The 3rd is the +seed used to perturb the PULSE scheduler. + +In this case, `SequentialCommands` has two calls (to `setup()` then +`append()`) and there are two parallel procs: one makes 1 call +call to `append()` and the other makes 2 calls to `append()`. + + {2,2,9} + {{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}], + [[{set,{var,3}, + {call,corfurl_pulse,append, + [{var,1},<<231,149,226,203,10,105,54,223,147>>]}}], + [{set,{var,2}, + {call,corfurl_pulse,append, + [{var,1},<<7,206,146,75,249,13,154,238,110>>]}}, + {set,{var,4}, + {call,corfurl_pulse,append, + [{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]}, + {27492,46961,4884}} + +Here are our results: + + simple_result: passed + errors: passed + events: failed + identity: passed + bogus_order_check_do_not_use_me: failed + [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}] + +Our (bogus!) order expectation was violated. Shrinking! + + simple_result: passed + errors: passed + events: failed + identity: passed + bogus_order_check_do_not_use_me: failed + [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}] + +Shrinking was able to remove two `append()` calls and to shrink the +size of the pages down from 9 bytes down to 1 byte. + + Shrinking........(8 times) + {1,1,1} + {{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}], + [[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}], + [{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]}, + {27492,46961,4884}} + events: failed + bogus_order_check_do_not_use_me: failed + [{ok,2},{ok,1}] /= [{ok,1},{ok,2}] + false diff --git a/prototype/tango/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen b/prototype/tango/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen new file mode 100644 index 0000000..3e01ac1 --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen @@ -0,0 +1,35 @@ +msc { + client1, FLU1, FLU2, client2, client3; + + client1 box client3 [label="Epoch #1: chain = FLU1 -> FLU2"]; + client1 -> FLU1 [label="{write,epoch1,<>}"]; + client1 <- FLU1 [label="ok"]; + client1 box client1 [label="Client crash", textcolour="red"]; + + FLU1 box FLU1 [label="FLU crash", textcolour="red"]; + + client1 box client3 [label="Epoch #2: chain = FLU2"]; + + client2 -> FLU2 [label="{write,epoch2,<>}"]; + client2 <- FLU2 [label="ok"]; + + client3 box client3 [label="Read repair starts", textbgcolour="aqua"]; + + client3 -> FLU2 [label="{read,epoch2}"]; + client3 <- FLU2 [label="{ok,<>}"]; + client3 -> FLU1 [label="{write,epoch2,<>}"]; + FLU1 box FLU1 [label="What do we do here? Our current value is <>.", textcolour="red"] ; + FLU1 box FLU1 [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ; + FLU1 box FLU1 [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ; + FLU1 -> client3 [label="I'm sorry, Dave, I cannot do that."]; + + FLU1 box FLU1 [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ; + + client3 -> FLU1 [label="{write,epoch2,<>,repair,witnesses=[FLU2]}", textbgcolour="silver"]; + FLU1 box FLU1 [label="Start an async process to ask the witness list to corroborate this repair."]; + FLU1 -> FLU2 [label="{read,epoch2}", textbgcolour="aqua"]; + FLU1 <- FLU2 [label="{ok,<>}", textbgcolour="aqua"]; + FLU1 box FLU1 [label="Overwrite local storage with repair page.", textbgcolour="silver"]; + client3 <- FLU1 [label="Async proc replies: ok", textbgcolour="silver"]; + +} diff --git a/prototype/tango/docs/corfurl/notes/README.md b/prototype/tango/docs/corfurl/notes/README.md new file mode 100644 index 0000000..b5757aa --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/README.md @@ -0,0 +1,92 @@ + +## read-repair-race.1. + +First attempt at using "mscgen" to make some Message Sequence +Chart (MSC) for a race found at commit 087c2605ab. + + +## read-repair-race.2. + +Second attempt. This is almost exactly the trace that is +generated by this failing test case at commit 087c2605ab: + + C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]]. + eqc:check(corfurl_pulse:prop_pulse(), C2). + +## read-repair-race.2b.* + +Same basic condition as read-repair-race.2, but edited +substantially to make it clearer what is happening. +Also for commit 087c2605ab. + +I believe that I have a fix for the silver-colored +`error-overwritten` ... and it was indeed added to the code soon +afterward, but it turns out that it doesn't solve the entire problem +of "two clients try to write the exact same data at the same time to +the same LPN". + + +## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN" + +This situation is something that CORFU cannot protect against, IMO. + +I have been struggling for a while, to try to find a way for CORFU +clients to know *always* when there is a conflict with another +writer. It usually works: the basic nature of write-once registers is +very powerful. However, in the case where two clients are trying to +write the same page data to the same LPN, it looks impossible to +resolve. + +How do you tell the difference between: + +1. A race between a client A writing page P at address LPN and + read-repair fixing P. P *is* A's data and no other's, so this race + doesn't confuse anyone. + +1. A race between a client A writing page P at address LPN and client + B writing the exact same page data P at the same LPN. + A's page P = B's page P, but clients A & B don't know that. + + If CORFU tells both A & B that they were successful, A & B assume + that the CORFU log has two new pages appended to it, but in truth + only one new page was appended. + +If we try to solve this by always avoiding the same LPN address +conflict, we are deluding ourselves. If we assume that the sequencer +is 100% correct in that it never assigns the same LPN twice, and if we +assume that a client must never write a block without an assignment +from the sequencer, then the problem is solved. But the problem has a +_heavy_ price: the log is only available when the sequencer is +available, and only when never more than one sequencer running at a +time. + +The CORFU base system promises correct operation, even if: + +* Zero sequencers are running, and clients might choose the same LPN + to write to. +* Two more more sequencers are running, and different sequencers + assign the same LPN to two different clients. + +But CORFU's "correct" behavior does not include detecting the same +page at the same LPN. The papers don't specifically say it, alas. +But IMO it's impossible to guarantee, so all docs ought to explicitly +say that it's impossible and that clients must not assume it. + +See also +* two-clients-race.1.png + +## A scenario of chain repair & write-once registers + +See: +* 2014-02-27.chain-repair-write-twice.png + +... for a scenario where write-once registers that are truly only +write-once-ever-for-the-rest-of-the-future are "inconvenient" when it +comes to chain repair. Client 3 is attempting to do chain repair ops, +bringing FLU1 back into sync with FLU2. + +The diagram proposes one possible idea for making overwriting a +read-once register a bit safer: ask another node in the chain to +verify that the page you've been asked to repair is exactly the same +as that other FLU's page. + diff --git a/prototype/tango/docs/corfurl/notes/read-repair-race.1.mscgen b/prototype/tango/docs/corfurl/notes/read-repair-race.1.mscgen new file mode 100644 index 0000000..1cbec57 --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/read-repair-race.1.mscgen @@ -0,0 +1,49 @@ +msc { + "<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"]; + + "<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"]; + "<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ; + "<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"]; + "<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ; + "<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ; +} diff --git a/prototype/tango/docs/corfurl/notes/read-repair-race.2.mscgen b/prototype/tango/docs/corfurl/notes/read-repair-race.2.mscgen new file mode 100644 index 0000000..9afffe2 --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/read-repair-race.2.mscgen @@ -0,0 +1,60 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; +} diff --git a/prototype/tango/docs/corfurl/notes/read-repair-race.2b.mscgen b/prototype/tango/docs/corfurl/notes/read-repair-race.2b.mscgen new file mode 100644 index 0000000..978dc72 --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/read-repair-race.2b.mscgen @@ -0,0 +1,57 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; +} diff --git a/prototype/tango/docs/corfurl/notes/two-clients-race.1.mscgen b/prototype/tango/docs/corfurl/notes/two-clients-race.1.mscgen new file mode 100644 index 0000000..ce8e614 --- /dev/null +++ b/prototype/tango/docs/corfurl/notes/two-clients-race.1.mscgen @@ -0,0 +1,33 @@ +msc { + client1, FLU1, FLU2, client2, client3; + + client1 -> FLU1 [label="{write,epoch1,<>}"]; + client1 <- FLU1 [label="ok"]; + + client3 -> FLU2 [label="{seal,epoch1}"]; + client3 <- FLU2 [label="{ok,...}"]; + client3 -> FLU1 [label="{seal,epoch1}"]; + client3 <- FLU1 [label="{ok,...}"]; + + client2 -> FLU1 [label="{write,epoch1,<>}"]; + client2 <- FLU1 [label="error_epoch"]; + client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"]; + client2 -> FLU1 [label="{write,epoch2,<>}"]; + client2 <- FLU1 [label="error_overwritten"]; + + client1 -> FLU2 [label="{write,epoch1,<>}"]; + client1 <- FLU2 [label="error_epoch"]; + client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"]; + + client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ; + client3 -> FLU1 [label="{read,epoch2}"]; + client3 <- FLU1 [label="{ok,<>}"]; + client3 -> FLU2 [label="{write,epoch2,<>}"]; + client3 <- FLU2 [label="ok"]; + client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ; + client3 abox client3 [ label = "We saw <>", textbgcolour="silver"] ; + + client1 -> FLU2 [label="{write,epoch2,<>}"]; + client1 <- FLU2 [label="error_overwritten"]; + +} diff --git a/prototype/tango/include/corfurl.hrl b/prototype/tango/include/corfurl.hrl new file mode 100644 index 0000000..819e79b --- /dev/null +++ b/prototype/tango/include/corfurl.hrl @@ -0,0 +1,47 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-type flu_name() :: atom(). +-type flu() :: pid() | flu_name(). +-type flu_chain() :: [flu()]. + +-type seq_name() :: {'undefined' | pid(), atom(), atom()}. + +-record(range, { + pn_start :: non_neg_integer(), % start page number + pn_end :: non_neg_integer(), % end page number + %% chains :: [flu_chain()] + chains :: tuple() + }). + +-record(proj, { % Projection + dir :: string(), + page_size :: non_neg_integer(), + epoch :: non_neg_integer(), + seq :: 'undefined' | seq_name(), + r :: [#range{}] + }). + +%% 1 byte @ offset 0: 0=unwritten, 1=written, 2=trimmed, 255=corrupt? TODO +%% 8 bytes @ offset 1: logical page number +%% P bytes @ offset 9: page data +%% 1 byte @ offset 9+P: 0=unwritten, 1=written +-define(PAGE_OVERHEAD, (1 + 8 + 1)). + diff --git a/prototype/tango/rebar.config.script b/prototype/tango/rebar.config.script new file mode 100644 index 0000000..b625bd2 --- /dev/null +++ b/prototype/tango/rebar.config.script @@ -0,0 +1,55 @@ +PulseBuild = case os:getenv("USE_PULSE") of + false -> + false; + _ -> + true + end, +case PulseBuild of + true -> + PulseOpts = + [{pulse_no_side_effect, + [{erlang,display,1} + ]}, + {pulse_side_effect, + [ {corfurl_sequencer, get, '_'} + , {corfurl_flu, write, '_'} + , {corfurl_flu, read, '_'} + , {corfurl_flu, seal, '_'} + , {corfurl_flu, trim, '_'} + , {corfurl_flu, fill, '_'} + + , {corfurl, read_projection, '_'} + , {corfurl, save_projection, '_'} + + , {prim_file, '_', '_'} + , {file, '_', '_'} + , {filelib, '_', '_'} + , {os, '_', '_'} ]}, + + {pulse_replace_module, + [ {gen_server, pulse_gen_server} + , {application, pulse_application} + , {supervisor, pulse_supervisor} ]} + ], + PulseCFlags = [{"CFLAGS", "$CFLAGS -DPULSE"}], + UpdConfig = case lists:keysearch(eunit_compile_opts, 1, CONFIG) of + {value, {eunit_compile_opts, Opts}} -> + lists:keyreplace(eunit_compile_opts, + 1, + CONFIG, + {eunit_compile_opts, Opts ++ PulseOpts}); + _ -> + [{eunit_compile_opts, PulseOpts} | CONFIG] + end, + case lists:keysearch(port_env, 1, UpdConfig) of + {value, {port_env, PortEnv}} -> + lists:keyreplace(port_env, + 1, + UpdConfig, + {port_env, PortEnv ++ PulseCFlags}); + _ -> + [{port_env, PulseCFlags} | UpdConfig] + end; + false -> + CONFIG +end. diff --git a/prototype/tango/src/corfurl.erl b/prototype/tango/src/corfurl.erl new file mode 100644 index 0000000..8852ef2 --- /dev/null +++ b/prototype/tango/src/corfurl.erl @@ -0,0 +1,376 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl). + +-export([new_simple_projection/5, + new_range/3, + read_projection/2, + save_projection/2, + latest_projection_epoch_number/1]). +-export([write_page/3, read_page/2, scan_forward/3, + fill_page/2, trim_page/2]). +-export([simple_test_setup/5]). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +%%% Debugging: for extra events in the PULSE event log, use the 2nd statement. +-define(EVENT_LOG(X), ok). +%%% -define(EVENT_LOG(X), event_logger:event(X)). + +write_page(#proj{epoch=Epoch} = P, LPN, Page) -> + Chain = project_to_chain(LPN, P), + write_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). + +write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth) -> + write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth, ok). + +write_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) -> + Reply; +write_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> + case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of + ok -> + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); + error_badepoch -> + %% TODO: Interesting case: there may be cases where retrying with + %% a new epoch & that epoch's projection is just fine (and + %% we'll succeed) and cases where retrying will fail. + %% Figure out what those cases are, then for the + %% destined-to-fail case, try to clean up (via trim?)? + error_badepoch; + error_trimmed when Nth == 1 -> + %% Whoa, partner, you're movin' kinda fast for a trim. + %% This might've been due to us being too slow and someone + %% else junked us. + error_trimmed; + error_trimmed when Nth > 1 -> + %% We're racing with a trimmer. We won the race at head, + %% but here in the middle or tail (Nth > 1), we lost. + %% Our strategy is keep racing down to the tail. + %% If we continue to lose the exact same race for the rest + %% of the chain, the 1st clause of this func will return 'ok'. + %% That is *exactly* our intent and purpose! + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); + error_overwritten when Nth == 1 -> + %% The sequencer lied, or we didn't use the sequencer and + %% guessed and guessed poorly, or someone is accidentally + %% trying to take our page. Shouganai, these things happen. + error_overwritten; + error_overwritten when Nth > 1 -> + %% The likely cause is that another reader has noticed that + %% we haven't finished writing this page in this chain and + %% has repaired the remainder of the chain while we were + %% drinking coffee. Let's double-check. + case corfurl_flu:read(flu_pid(FLU), Epoch, LPN) of + {ok, AlreadyThere} when AlreadyThere =:= Page -> + %% Alright, well, let's go continue the repair/writing, + %% since we agree on the page's value. + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); + error_badepoch -> + %% TODO: same TODO as the above error_badepoch case. + error_badepoch; + error_trimmed -> + %% This is the same as 'error_trimmed when Nth > 1' above. + %% Do the same thing. + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); + Else -> + %% Can PULSE can drive us to this case? + giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth}) + end + end. + +read_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + Tail = lists:last(Chain), + case corfurl_flu:read(flu_pid(Tail), Epoch, LPN) of + {ok, _} = OK -> + OK; + error_badepoch -> + error_badepoch; + error_trimmed -> + %% TODO: A sanity/should-never-happen check would be to + %% see if everyone else in the chain are also trimmed. + error_trimmed; + error_unwritten -> + %% TODO: During scan_forward(), this pestering of the upstream + %% nodes in the chain is possibly-excessive-work. + %% For now, we'll assume that we always want to repair. + read_repair_chain(Epoch, LPN, Chain) + %% Let it crash: error_overwritten + end. + +ok_or_trim(ok) -> + ok; +ok_or_trim(error_trimmed) -> + ok; +ok_or_trim(Else) -> + Else. + +read_repair_chain(Epoch, LPN, Chain) -> + try + read_repair_chain1(Epoch, LPN, Chain) + catch + throw:{i_give_up,Res} -> + Res + end. + +read_repair_chain1(Epoch, LPN, [Head|Rest] = Chain) -> + ?EVENT_LOG({read_repair, LPN, Chain, i_am, self()}), + case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of + {ok, Page} -> + ?EVENT_LOG({read_repair, LPN, Head, ok}), + read_repair_chain2(Rest, Epoch, LPN, Page, Chain); + error_badepoch -> + ?EVENT_LOG({read_repair, LPN, Head, badepoch}), + error_badepoch; + error_trimmed -> + ?EVENT_LOG({read_repair, LPN, Head, trimmed}), + %% TODO: robustify + [begin + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X)}), + ok = case ok_or_trim(corfurl_flu:fill(flu_pid(X), Epoch, + LPN)) of + ok -> + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), ok}), + ok; + error_overwritten -> + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), overwritten, try_to_trim}), + Res2 = ok_or_trim(corfurl_flu:trim( + flu_pid(X), Epoch, LPN)), + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), trim, Res2}), + case Res2 of ok -> ok; + _ -> throw({i_give_up,Res2}) + end; + Else -> + %% We're too deeply nested for the current code + %% to deal with, and we're racing. Fine, let + %% our opponent continue. We'll give up, and if + %% the client wants to try again, we can try + %% again from the top. + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), Else}), + throw({i_give_up,Else}) + end + end || X <- Rest], + error_trimmed; + error_unwritten -> + ?EVENT_LOG({read_repair, LPN, read, Head, unwritten}), + error_unwritten + %% Let it crash: error_overwritten + end. + +read_repair_chain2([] = _Repairees, _Epoch, _LPN, Page, _OriginalChain) -> + ?EVENT_LOG({read_repair2, _LPN, finished, {ok, Page}}), + {ok, Page}; +read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) -> + case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of + ok -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), ok}), + read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain); + error_badepoch -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), badepoch}), + error_badepoch; + error_trimmed -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), trimmed}), + error_trimmed; + error_overwritten -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), overwritten}), + %% We're going to do an optional sanity check here. + %% TODO: make the sanity check configurable? + case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of + {ok, Page2} when Page2 =:= Page -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), exact_page}), + %% We're probably going to be racing against someone else + %% that's also doing repair, but so be it. + read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain); + {ok, _Page2} -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), bad_page, _Page2}), + giant_error({bummerbummer, ?MODULE, ?LINE, + sanity_check_failure, lpn, LPN, epoch, Epoch}); + error_badepoch -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), badepoch}), + error_badepoch; + error_trimmed -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), trimmed}), + %% Start repair at the beginning to handle this case + read_repair_chain(Epoch, LPN, OriginalChain) + %% Let it crash: error_overwritten, error_unwritten + end + %% Let it crash: error_unwritten + end. + +scan_forward(P, LPN, MaxPages) -> + scan_forward(P, LPN, MaxPages, ok, true, []). + +scan_forward(_P, LPN, 0, Status, MoreP, Acc) -> + {Status, LPN, MoreP, lists:reverse(Acc)}; +scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) -> + case read_page(P, LPN) of + {ok, Page} -> + Res = {LPN, Page}, + scan_forward(P, LPN + 1, MaxPages - 1, ok, true, [Res|Acc]); + error_badepoch -> + %% Halt, allow recursion to create our return value. + scan_forward(P, LPN, 0, error_badepoch, false, Acc); + error_trimmed -> + %% TODO: API question, do we add a 'trimmed' indicator + %% in the Acc? Or should the client assume that if + %% scan_forward() doesn't mention a page that + scan_forward(P, LPN + 1, MaxPages - 1, ok, true, Acc); + error_unwritten -> + %% Halt, allow recursion to create our return value. + %% TODO: It's possible that we're stuck here because a client + %% crashed and that we see an unwritten page at LPN. + %% We ought to ask the sequencer always/sometime?? what + %% tail LPN is, and if there's a hole, start a timer to + %% allow us to fill the hole. + scan_forward(P, LPN, 0, ok, false, Acc) + %% Let it crash: error_overwritten + end. + +fill_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + fill_or_trim_page(Chain, Epoch, LPN, fill). + +trim_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + fill_or_trim_page(Chain, Epoch, LPN, trim). + +fill_or_trim_page([], _Epoch, _LPN, _Func) -> + ok; +fill_or_trim_page([H|T], Epoch, LPN, Func) -> + case corfurl_flu:Func(flu_pid(H), Epoch, LPN) of + Res when Res == ok; Res == error_trimmed -> + %% Detecting a race here between fills and trims is too crazy, + %% and I don't believe that it *matters*. The ickiest one + %% is a race between Proc A = trim and Proc B = read, + %% chain length of 2 or more: + %% Proc A: trim head -> ok + %% Proc B: read tail -> error_unwritten + %% Proc B: read head -> error_trimmed + %% Proc B: trim tail -> ok + %% Proc A: trim tail -> ?? + %% + %% The result that we want that both A & B & any later + %% readers agree that the LPN is trimmed. If the chain is + %% >2, then the procs can win some/all/none of the races + %% to fix up the chain, that's no problem. But don't tell + %% the caller that there was an error during those races. + fill_or_trim_page(T, Epoch, LPN, Func); + Else -> + %% TODO: worth doing anything here, if we're in the middle of chain? + %% TODO: is that ^^ anything different for fill vs. trim? + Else + end. + +flu_pid(X) when is_pid(X) -> + X; +flu_pid(X) when is_atom(X) -> + ets:lookup_element(flu_pid_tab, X, 1). + +giant_error(Err) -> + io:format(user, "GIANT ERROR: ~p\n", [Err]), + exit(Err). + +%%%% %%%% %%%% projection utilities %%%% %%%% %%%% + +new_range(Start, End, ChainList) -> + %% TODO: sanity checking of ChainList, Start < End, yadda + #range{pn_start=Start, pn_end=End, chains=list_to_tuple(ChainList)}. + +new_simple_projection(Dir, Epoch, Start, End, ChainList) -> + ok = filelib:ensure_dir(Dir ++ "/unused"), + #proj{dir=Dir, epoch=Epoch, r=[new_range(Start, End, ChainList)]}. + +make_projection_path(Dir, Epoch) -> + lists:flatten(io_lib:format("~s/~12..0w.proj", [Dir, Epoch])). + +read_projection(Dir, Epoch) -> + case file:read_file(make_projection_path(Dir, Epoch)) of + {ok, Bin} -> + {ok, binary_to_term(Bin)}; % TODO if corrupted? + {error, enoent} -> + error_unwritten; + Else -> + Else % TODO API corner case + end. + +save_projection(Dir, #proj{epoch=Epoch} = P) -> + Path = make_projection_path(Dir, Epoch), + ok = filelib:ensure_dir(Dir ++ "/ignored"), + {_, B, C} = now(), + TmpPath = Path ++ lists:flatten(io_lib:format(".~w.~w.~w", [B, C, node()])), + %% TODO: don't be lazy, do a flush before link when training wheels come off + ok = file:write_file(TmpPath, term_to_binary(P)), + case file:make_link(TmpPath, Path) of + ok -> + file:delete(TmpPath), + ok; + {error, eexist} -> + error_overwritten; + Else -> + Else % TODO API corner case + end. + +latest_projection_epoch_number(Dir) -> + case filelib:wildcard("*.proj", Dir) of + [] -> + -1; + Files -> + {Epoch, _} = string:to_integer(lists:last(Files)), + Epoch + end. + +project_to_chain(LPN, P) -> + %% TODO fixme + %% TODO something other than round-robin? + [#range{pn_start=Start, pn_end=End, chains=Chains}] = P#proj.r, + if Start =< LPN, LPN =< End -> + I = ((LPN - Start) rem tuple_size(Chains)) + 1, + element(I, Chains); + true -> + exit({?MODULE, project_to_chain, [{start, Start}, + {lpn, LPN}, + {'end', End}]}) + end. + +simple_test_setup(RootDir, BaseDirName, PageSize, NumPages, NumFLUs) -> + PDir = RootDir ++ "/" ++ BaseDirName ++ ".projection", + filelib:ensure_dir(PDir), + BaseDir = RootDir ++ "/flu." ++ BaseDirName ++ ".", + MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end, + DeleteFLUData = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) || + X <- lists:seq(1, NumFLUs)] end, + DeleteFLUData(), + FLUs = [begin + element(2, corfurl_flu:start_link(MyDir(X), + PageSize, NumPages*PageSize)) + end || X <- lists:seq(1, NumFLUs)], + + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + P0 = corfurl:new_simple_projection(PDir, 1, 1, 1*100, [FLUs]), + P1 = P0#proj{seq={Seq, unused, unused}, page_size=PageSize}, + {FLUs, Seq, P1, DeleteFLUData}. diff --git a/prototype/tango/src/corfurl_client.erl b/prototype/tango/src/corfurl_client.erl new file mode 100644 index 0000000..d1ca1e4 --- /dev/null +++ b/prototype/tango/src/corfurl_client.erl @@ -0,0 +1,268 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_client). + +-export([append_page/2, append_page/3, + read_page/2, fill_page/2, trim_page/2, scan_forward/3]). +-export([restart_sequencer/1]). +%% For debugging/verification only +-export([pulse_tracing_start/1, pulse_tracing_add/2, pulse_tracing_get/1]). + +-include("corfurl.hrl"). + +-define(LONG_TIME, 5*1000). +%% -define(LONG_TIME, 30*1000). + +append_page(Proj, Page) -> + append_page(Proj, Page, []). + +append_page(Proj, Page, StreamList) -> + append_page(Proj, Page, StreamList, 5). + +append_page(Proj, _Page, _StreamList, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +%% TODO: remove _StreamList arg entirely? +append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, StreamList, Retries) -> + try + {ok, LPN} = corfurl_sequencer:get(Sequencer, 1), + pulse_tracing_add(write, LPN), + append_page1(Proj, LPN, Page, StreamList, 5) + catch + exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} + when Reason == noproc; Reason == normal -> + append_page(restart_sequencer(Proj), Page, StreamList, Retries); + exit:Exit -> + {{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit} + end. + +append_page1(Proj, _LPN, _Page, _StreamList, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +append_page1(Proj, LPN, Page, StreamList, Retries) -> + case append_page2(Proj, LPN, Page) of + lost_race -> + append_page(Proj, Page, StreamList, Retries - 1); + error_badepoch -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + append_page1(NewProj, LPN, Page, StreamList, Retries - 1); + Else -> + {Else, Proj} + end; + Else -> + {Else, Proj} + end. + +append_page2(Proj, LPN, Page) -> + case corfurl:write_page(Proj, LPN, Page) of + ok -> + {ok, LPN}; + X when X == error_overwritten; X == error_trimmed -> + report_lost_race(LPN, X), + lost_race; + {special_trimmed, LPN}=XX -> + XX; + error_badepoch=XX-> + XX + %% Let it crash: error_unwritten + end. + +read_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:read_page(P, LPN) end, 10). + +fill_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:fill_page(P, LPN) end, 10). + +trim_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:trim_page(P, LPN) end, 10). + +scan_forward(Proj, LPN, MaxPages) -> + %% This is fiddly stuff that I'll get 0.7% wrong if I try to be clever. + %% So, do something simple and (I hope) obviously correct. + %% TODO: do something "smarter". + case corfurl:scan_forward(Proj, LPN, MaxPages) of + {error_badepoch, _LPN2, _MoreP, _Pages} = Res -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + {Res, NewProj}; + _Else -> + %% TODO: What is the risk of getting caught in a situation + %% where we can never make any forward progress when pages + %% really are being written? + {Res, Proj} + end; + Res -> + {Res, Proj} + end. + +%%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% + +retry_loop(Proj, _Fun, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +retry_loop(Proj, Fun, Retries) -> + case Fun(Proj) of + error_badepoch -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + retry_loop(NewProj, Fun, Retries - 1); + _Else -> + {{error_failed, ?MODULE, ?LINE}, Proj} + end; + Else -> + {Else, Proj} + end. + +restart_sequencer(#proj{epoch=Epoch, dir=Dir} = P) -> + case corfurl:latest_projection_epoch_number(Dir) of + N when N > Epoch -> + %% Yay, someone else has intervened. Perhaps they've solved + %% our sequencer problem for us? + read_latest_projection(P); + _ -> + restart_sequencer2(P) + end. + +restart_sequencer2(#proj{seq={OldSequencer, _SeqHost, SeqName}, + epoch=Epoch, r=Ranges} = P) -> + spawn(fun() -> + (catch corfurl_sequencer:stop(OldSequencer)) + end), + TODO_type = standard, % TODO: fix this hard-coding + FLUs = lists:usort( + [FLU || R <- Ranges, + C <- tuple_to_list(R#range.chains), FLU <- C]), + %% TODO: We can proceed if we can seal at least one FLU in + %% each chain. Robustify and sanity check. + [begin + _Res = corfurl_flu:seal(FLU, Epoch) + end || FLU <- lists:reverse(FLUs)], + case corfurl_sequencer:start_link(FLUs, TODO_type, SeqName) of + {ok, Pid} -> + NewP = P#proj{seq={Pid, node(), SeqName}, epoch=Epoch+1}, + save_projection_or_get_latest(NewP) + end. + +poll_for_new_epoch_projection(P) -> + put(silly_poll_counter, 0), + poll_for_new_epoch_projection(P, get_poll_retries()). + +poll_for_new_epoch_projection(P, 0) -> + %% TODO: The client that caused the seal may have crashed before + %% writing a new projection. We should try to pick up here, + %% write a new projection, and bully forward. + %% NOTE: When that new logic is added, the huge polling interval + %% that PULSE uses should be reduced to something tiny. + case corfurl:latest_projection_epoch_number(P#proj.dir) of + Neg when Neg < 0 -> + error_badepoch; + Other -> + exit({bummer, ?MODULE, ?LINE, latest_epoch, Other}) + end; +poll_for_new_epoch_projection(#proj{dir=Dir, epoch=Epoch} = P, Tries) -> + case corfurl:latest_projection_epoch_number(Dir) of + NewEpoch when NewEpoch > Epoch -> + corfurl:read_projection(Dir, NewEpoch); + _ -> + timer:sleep(get_poll_sleep_time()), + case put(silly_poll_counter, get(silly_poll_counter) + 1) div 10*1000 of + 0 -> io:format(user, "P", []); + _ -> ok + end, + poll_for_new_epoch_projection(P, Tries - 1) + end. + +save_projection_or_get_latest(#proj{dir=Dir} = P) -> + case corfurl:save_projection(Dir, P) of + ok -> + P; + error_overwritten -> + read_latest_projection(P) + end. + +read_latest_projection(#proj{dir=Dir}) -> + NewEpoch = corfurl:latest_projection_epoch_number(Dir), + {ok, NewP} = corfurl:read_projection(Dir, NewEpoch), + NewP. + +-ifdef(TEST). +-ifdef(PULSE). +report_lost_race(_LPN, _Reason) -> + %% It's interesting (sometime?) to know if a page was overwritten + %% because the sequencer was configured by QuickCheck to hand out + %% duplicate LPNs. If this gets too annoying, this can be a no-op + %% function. + io:format(user, "o", []). +-else. % PULSE +report_lost_race(LPN, Reason) -> + io:format(user, "LPN ~p race lost: ~p\n", [LPN, Reason]). +-endif. % PULSE +-else. % TEST + +report_lost_race(LPN, Reason) -> + %% Perhaps it's an interesting event, but the rest of the system + %% should react correctly whenever this happens, so it shouldn't + %% ever cause an external consistency problem. + error_logger:info_msg("LPN ~p race lost: ~p\n", [LPN, Reason]). + +-endif. % TEST + +-ifdef(PULSE). +get_poll_retries() -> + 999*1000. + +get_poll_sleep_time() -> + 1. + +-else. +get_poll_retries() -> + 25. + +get_poll_sleep_time() -> + 50. + +-endif. + +-ifdef(PULSE). + +pulse_tracing_start(Type) -> + put({?MODULE, Type}, []). + +pulse_tracing_add(Type, Stuff) -> + List = case pulse_tracing_get(Type) of + undefined -> []; + L -> L + end, + put({?MODULE, Type}, [Stuff|List]). + +pulse_tracing_get(Type) -> + get({?MODULE, Type}). + +-else. + +pulse_tracing_start(_Type) -> + ok. + +pulse_tracing_add(_Type, _Stuff) -> + ok. + +pulse_tracing_get(_Type) -> + ok. + +-endif. diff --git a/prototype/tango/src/corfurl_flu.erl b/prototype/tango/src/corfurl_flu.erl new file mode 100644 index 0000000..4feffa4 --- /dev/null +++ b/prototype/tango/src/corfurl_flu.erl @@ -0,0 +1,466 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_flu). + +-behaviour(gen_server). + +-type flu_error() :: 'error_badepoch' | 'error_trimmed' | + 'error_overwritten' | 'error_unwritten'. +-export_type([flu_error/0]). + +%% API +-export([start_link/1, start_link/3, status/1, stop/1]). +-export([write/4, read/3, seal/2, trim/3, fill/3]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-export([get__mlp/1, get__min_epoch/1, get__trim_watermark/1]). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-compile({pulse_skip,[{msc, 3}]}). +-endif. +-endif. + +-include_lib("kernel/include/file.hrl"). + +%%% Debugging: for extra events in the PULSE event log, use the 2nd statement. +-define(EVENT_LOG(X), ok). +%% -define(EVENT_LOG(X), event_logger:event(X)). + +-record(state, { + dir :: string(), + mem_fh :: term(), + min_epoch :: non_neg_integer(), + page_size :: non_neg_integer(), + max_mem :: non_neg_integer(), + max_logical_page :: 'unknown' | non_neg_integer(), + %% TODO: Trim watermark handling is *INCOMPLETE*. The + %% current code is broken but is occasionally correct, + %% like a broken analog watch is correct 2x per day. + trim_watermark :: non_neg_integer(), + trim_count :: non_neg_integer() + }). + +start_link(Dir) -> + start_link(Dir, 8, 64*1024*1024). + +start_link(Dir, PageSize, MaxMem) -> + gen_server:start_link(?MODULE, {Dir, PageSize, MaxMem}, []). + +status(Pid) -> + gen_server:call(Pid, status, infinity). + +stop(Pid) -> + gen_server:call(Pid, stop, infinity). + +write(Pid, Epoch, LogicalPN, PageBin) + when is_integer(LogicalPN), LogicalPN > 0, is_binary(PageBin) -> + g_call(Pid, {write, Epoch, LogicalPN, PageBin}, infinity). + +read(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + g_call(Pid, {read, Epoch, LogicalPN}, infinity). + +seal(Pid, Epoch) when is_integer(Epoch), Epoch > 0 -> + g_call(Pid, {seal, Epoch}, infinity). + +trim(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + g_call(Pid, {trim, Epoch, LogicalPN}, infinity). + +fill(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + Res = g_call(Pid, {fill, Epoch, LogicalPN}, infinity), + undo_special_pulse_test_result(Res). + +g_call(Pid, Arg, Timeout) -> + LC1 = lclock_get(), + msc(self(), Pid, Arg), + {Res, LC2} = gen_server:call(Pid, {Arg, LC1}, Timeout), + msc(Pid, self(), Res), + lclock_update(LC2), + Res. + +-ifdef(TEST). + +get__mlp(Pid) -> + gen_server:call(Pid, get__mlp, infinity). + +get__min_epoch(Pid) -> + gen_server:call(Pid, get__min_epoch, infinity). + +get__trim_watermark(Pid) -> + gen_server:call(Pid, get__trim_watermark, infinity). + +-endif. % TEST + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +init({Dir, ExpPageSize, ExpMaxMem}) -> + lclock_init(), + + MemFile = memfile_path(Dir), + ok = filelib:ensure_dir(MemFile), + {ok, FH} = file:open(MemFile, [read, write, raw, binary]), + + {_Version, MinEpoch, PageSize, MaxMem, TrimWatermark} = + try + Res = read_hard_state(Dir), + case Res of + {_V, _LE, PS, MM, TW} + when PS =:= ExpPageSize, MM =:= ExpMaxMem -> + Res + end + catch + X:Y -> + if X == error, + Y == {case_clause,{error,enoent}} -> + ok; + true -> + %% TODO: log-ify this + io:format("init: caught ~p ~p @ ~p\n", + [X, Y, erlang:get_stacktrace()]) + end, + {no_version_number, 0, ExpPageSize, ExpMaxMem, 0} + end, + State = #state{dir=Dir, mem_fh=FH, min_epoch=MinEpoch, page_size=PageSize, + max_mem=MaxMem, max_logical_page=unknown, + trim_watermark=TrimWatermark, trim_count=0}, + self() ! finish_init, % TODO + {ok, State}. + +handle_call(Call, From, #state{max_logical_page=unknown} = State) -> + {noreply, NewState} = handle_info(finish_init, State), + handle_call(Call, From, NewState); +handle_call({{write, ClientEpoch, _LogicalPN, _PageBin}, LC1}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + LC2 = lclock_update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, + #state{max_logical_page=MLPN} = State) -> + LC2 = lclock_update(LC1), + case check_write(LogicalPN, PageBin, State) of + {ok, Offset} -> + ok = write_page(Offset, LogicalPN, PageBin, State), + NewMLPN = erlang:max(LogicalPN, MLPN), + ?EVENT_LOG({flu, write, self(), LogicalPN, ok}), + {reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}}; + Else -> + ?EVENT_LOG({flu, write, self(), LogicalPN, Else}), + {reply, {Else, LC2}, State} + end; + +handle_call({{read, ClientEpoch, _LogicalPN}, LC1}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + LC2 = lclock_update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lclock_update(LC1), + Reply = read_page(LogicalPN, State), + ?EVENT_LOG({flu, read, self(), LogicalPN, Reply}), + {reply, {Reply, LC2}, State}; + +handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + LC2 = lclock_update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=State) -> + LC2 = lclock_update(LC1), + NewState = State#state{min_epoch=ClientEpoch+1}, + ok = write_hard_state(NewState), + {reply, {{ok, MLPN}, LC2}, NewState}; + +handle_call({{trim, ClientEpoch, _LogicalPN}, LC1}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + LC2 = lclock_update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lclock_update(LC1), + {Reply, NewState} = do_trim_or_fill(trim, LogicalPN, State), + ?EVENT_LOG({flu, trim, self(), LogicalPN, Reply}), + {reply, {Reply, LC2}, NewState}; + +handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + LC2 = lclock_update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lclock_update(LC1), + {Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State), + ?EVENT_LOG({flu, fill, self(), LogicalPN, Reply}), + {reply, {Reply, LC2}, NewState}; + +handle_call(get__mlp, _From, State) -> + {reply, State#state.max_logical_page, State}; +handle_call(get__min_epoch, _From, State) -> + {reply, State#state.min_epoch, State}; +handle_call(get__trim_watermark, _From, State) -> + {reply, State#state.trim_watermark, State}; +handle_call(status, _From, State) -> + L = [{min_epoch, State#state.min_epoch}, + {page_size, State#state.page_size}, + {max_mem, State#state.max_mem}, + {max_logical_page, State#state.max_logical_page}, + {trim_watermark, State#state.trim_watermark}], + {reply, {ok, L}, State}; +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(Request, _From, State) -> + Reply = {whaaaaaaaaaaaaaaaaaa, Request}, + {reply, Reply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(finish_init, State) -> + MLP = find_max_logical_page(State), + State2 = State#state{max_logical_page=MLP}, + ok = write_hard_state(State2), + {noreply, State2}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + ok = write_hard_state(State), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +read_hard_state(Dir) -> + File = hard_state_path(Dir), + case file:read_file(File) of + {ok, Bin} -> + case binary_to_term(Bin) of + T when element(1, T) == v1 -> + T + end; + Else -> + Else + end. + +write_hard_state(#state{min_epoch=MinEpoch, page_size=PageSize, max_mem=MaxMem, + trim_watermark=TrimWatermark} = S) -> + NewPath = hard_state_path(S#state.dir), + TmpPath = NewPath ++ ".tmp", + {ok, FH} = file:open(TmpPath, [write, binary, raw]), + HS = {v1, MinEpoch, PageSize, MaxMem, TrimWatermark}, + ok = file:write(FH, term_to_binary(HS)), + %% ok = file:sync(FH), % TODO uncomment when the training wheels come off + ok = file:close(FH), + ok = file:rename(TmpPath, NewPath). + +memfile_path(Dir) -> + Dir ++ "/memfile". + +hard_state_path(Dir) -> + Dir ++ "/hard-state". + +calc_page_offset(PhysicalPN, #state{page_size=PageSize}) -> + TotalSize = ?PAGE_OVERHEAD + PageSize, + PhysicalPN * TotalSize. + +%% find_max_logical_page(): This is a kludge, based on our naive +%% implementation of not keeping the maximum logical page in hard +%% state. + +find_max_logical_page(S) -> + {ok, FI} = file:read_file_info(memfile_path(S#state.dir)), + find_max_logical_page(0, 0, FI#file_info.size, S). + +find_max_logical_page(MLP, PhysicalPN, FSize, + #state{mem_fh=FH, max_mem=MaxMem}=S) -> + Offset = calc_page_offset(PhysicalPN, S), + if Offset < MaxMem, Offset < FSize -> + case file:pread(FH, Offset, 9) of + {ok, <<1:8/big, LP:64/big>>} -> + find_max_logical_page(erlang:max(MLP, LP), PhysicalPN + 1, + FSize, S); + _ -> + find_max_logical_page(MLP, PhysicalPN + 1, FSize, S) + end; + true -> + MLP + end. + +check_write(LogicalPN, PageBin, + #state{max_mem=MaxMem, page_size=PageSize} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem, byte_size(PageBin) =:= PageSize -> + case check_is_written(Offset, LogicalPN, S) of + false -> + {ok, Offset}; + true -> + error_overwritten + end; + true -> + {bummer, ?MODULE, ?LINE, lpn, LogicalPN, offset, Offset, max_mem, MaxMem, page_size, PageSize, bin_size, byte_size(PageBin)} + end. + +check_is_written(Offset, _PhysicalPN, #state{mem_fh=FH}) -> + case file:pread(FH, Offset, 1) of + {ok, <<0:8>>} -> + false; + {ok, <<1:8>>} -> % written + true; + {ok, <<2:8>>} -> % trimmed + true; + eof -> + %% We assume that Offset has been bounds-checked + false + end. + +write_page(Offset, LogicalPN, PageBin, #state{mem_fh=FH}) -> + IOList = [<<1:8>>, <>, PageBin, <<1:8>>], + ok = file:pwrite(FH, Offset, IOList). + +read_page(LogicalPN, #state{max_mem=MaxMem, mem_fh=FH, + page_size=PageSize} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem -> + case file:pread(FH, Offset, PageSize + ?PAGE_OVERHEAD) of + {ok, <<1:8, LogicalPN:64/big, Page:PageSize/binary, 1:8>>} -> + {ok, Page}; + {ok, <<1:8, _LogicalPN:64/big, _:PageSize/binary, 0:8>>} -> + io:format("BUMMER: ~s line ~w: incomplete write at ~p\n", + [?MODULE, ?LINE, LogicalPN]), + error_unwritten; + {ok, <<2:8, _/binary>>} -> + error_trimmed; + {ok, _} -> + error_unwritten; + eof -> + error_unwritten; + Else -> + io:format("BUMMER: ~s line ~w: ~p\n", + [?MODULE, ?LINE, Else]), + badarg % TODO: better idea + end; + true -> + badarg + end. + +do_trim_or_fill(Op, LogicalPN, + #state{trim_watermark=TrimWatermark, trim_count=TrimCount} = S) -> + case trim_page(Op, LogicalPN, S) of + ok -> + NewS = S#state{trim_watermark=erlang:max( + TrimWatermark, LogicalPN), + trim_count=TrimCount + 1}, + if TrimCount rem 1000 == 0 -> + ok = write_hard_state(NewS); + true -> + ok + end, + {ok, NewS}; + Else -> + {Else, S} + end. + +trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem -> + Status = case file:pread(FH, Offset, 1) of + {ok, <<0:8>>} -> + error_unwritten; + {ok, <<1:8>>} -> + error_overwritten; + {ok, <<2:8>>} -> + error_trimmed; + eof -> + error_unwritten; + Else -> + io:format("BUMMER: ~s line ~w: ~p\n", + [?MODULE, ?LINE, Else]), + error_trimmed % TODO + end, + if Status == error_overwritten andalso Op == trim -> + ok = file:pwrite(FH, Offset, <<2:8>>), + ok; + Status == error_unwritten andalso Op == fill -> + ok = file:pwrite(FH, Offset, <<2:8>>), + ok; + true -> + Status + end; + true -> + badarg + end. + +-ifdef(PULSE). +%% We do *not* want to remove any special PULSE return code. +undo_special_pulse_test_result(Res) -> + Res. +-else. % PULSE +undo_special_pulse_test_result({special_trimmed, LPN}) -> + {ok, LPN}; +undo_special_pulse_test_result(Res) -> + Res. +-endif. % PULSE + + +-ifdef(PULSE_HACKING). +%% Create a trace file that can be formatted by "mscgen" utility. +%% Lots of hand-editing is required after creating the file, sorry! +msc(_From, _To, _Tag) -> + {ok, FH} = file:open("/tmp/goo", [write, append]), + io:format(FH, " \"~w\" -> \"~w\" [ label = \"~w\" ] ;\n", [_From, _To, _Tag]), + file:close(FH). +-else. % PULSE_HACKING +msc(_From, _To, _Tag) -> + ok. +-endif. % PULSE_HACkING + +-ifdef(PULSE). + +lclock_init() -> + lamport_clock:init(). + +lclock_get() -> + lamport_clock:get(). + +lclock_update(LC) -> + lamport_clock:update(LC). + +-else. % PULSE + +lclock_init() -> + ok. + +lclock_get() -> + ok. + +lclock_update(_LC) -> + ok. + +-endif. % PLUSE diff --git a/prototype/tango/src/corfurl_sequencer.erl b/prototype/tango/src/corfurl_sequencer.erl new file mode 100644 index 0000000..085cdc1 --- /dev/null +++ b/prototype/tango/src/corfurl_sequencer.erl @@ -0,0 +1,208 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_sequencer). + +-behaviour(gen_server). + +-export([start_link/1, start_link/2, start_link/3, + stop/1, stop/2, + get/2, get_tails/3]). +-export([set_tails/2]). +-ifdef(TEST). +-compile(export_all). +-endif. + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +-define(SERVER, ?MODULE). +%% -define(LONG_TIME, 30*1000). +-define(LONG_TIME, 5*1000). + +-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])). + +start_link(FLUs) -> + start_link(FLUs, standard). + +start_link(FLUs, SeqType) -> + start_link(FLUs, SeqType, ?SERVER). + +start_link(FLUs, SeqType, RegName) -> + case gen_server:start_link({local, RegName}, ?MODULE, {FLUs, SeqType},[]) of + {ok, Pid} -> + {ok, Pid}; + {error, {already_started, Pid}} -> + {ok, Pid}; + Else -> + Else + end. + +stop(Pid) -> + stop(Pid, stop). + +stop(Pid, Method) -> + Res = gen_server:call(Pid, stop, infinity), + if Method == kill -> + %% Emulate gen.erl's client-side behavior when the server process + %% is killed. + exit(killed); + true -> + Res + end. + +get(Pid, NumPages) -> + {LPN, LC} = gen_server:call(Pid, {get, NumPages, lclock_get()}, + ?LONG_TIME), + lclock_update(LC), + LPN. + +get_tails(Pid, NumPages, StreamList) -> + {Tails, LC} = gen_server:call(Pid, + {get_tails, NumPages, StreamList, lclock_get()}, + ?LONG_TIME), + lclock_update(LC), + Tails. + +set_tails(Pid, StreamTails) -> + ok = gen_server:call(Pid, {set_tails, StreamTails}, ?LONG_TIME). + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +init({FLUs, TypeOrSeed}) -> + lclock_init(), + MLP = get_max_logical_page(FLUs), + Tab = ets:new(?MODULE, [set, private, {keypos, 1}]), + if TypeOrSeed == standard -> + {ok, {Tab, MLP + 1}}; + true -> + {Seed, BadPercent, MaxDifference} = TypeOrSeed, + random:seed(Seed), + {ok, {Tab, MLP+1, BadPercent, MaxDifference}} + end. + +handle_call({get, NumPages, LC}, _From, {Tab, MLP}) -> + NewLC = lclock_update(LC), + {reply, {{ok, MLP}, NewLC}, {Tab, MLP + NumPages}}; +handle_call({get, NumPages, LC}, _From, + {Tab, MLP, BadPercent, MaxDifference}) -> + NewLC = lclock_update(LC), + Fudge = case random:uniform(100) of + N when N < BadPercent -> + random:uniform(MaxDifference * 2) - MaxDifference; + _ -> + 0 + end, + {reply, {{ok, erlang:max(1, MLP + Fudge)}, NewLC}, + {Tab, MLP + NumPages, BadPercent, MaxDifference}}; +handle_call({get_tails, NumPages, StreamList, LC}, _From, MLP_tuple) -> + Tab = element(1, MLP_tuple), + MLP = element(2, MLP_tuple), + Tails = [case (catch ets:lookup_element(Tab, Stream, 2)) of + {'EXIT', _} -> + []; + Res -> + Res + end || Stream <- StreamList], + if NumPages > 0 -> + update_stream_tails(Tab, StreamList, MLP); + true -> + ok + end, + NewLC = lclock_update(LC), + {reply, {{ok, MLP, Tails}, NewLC}, + setelement(2, MLP_tuple, MLP + NumPages)}; +handle_call({set_tails, StreamTails}, _From, MLP_tuple) -> + Tab = element(1, MLP_tuple), + true = ets:delete_all_objects(Tab), + [ets:insert(Tab, {Stream, Tail}) || {Stream, Tail} <- StreamTails], + {reply, ok, MLP_tuple}; +handle_call(stop, _From, MLP) -> + {stop, normal, ok, MLP}; +handle_call(_Request, _From, MLP) -> + Reply = idunnoooooooooooooooooooooooooo, + {reply, Reply, MLP}. + +handle_cast(_Msg, MLP) -> + {noreply, MLP}. + +handle_info(_Info, MLP) -> + {noreply, MLP}. + +terminate(_Reason, _MLP) -> + ok. + +code_change(_OldVsn, MLP, _Extra) -> + {ok, MLP}. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +get_max_logical_page(FLUs) -> + lists:max([proplists:get_value(max_logical_page, Ps, 0) || + FLU <- FLUs, + {ok, Ps} <- [corfurl_flu:status(FLU)]]). + +update_stream_tails(Tab, StreamList, LPN) -> + [begin + OldBackPs = try ets:lookup_element(Tab, Stream, 2) + catch error:badarg -> [] + end, + NewBackPs = add_back_pointer(OldBackPs, LPN), + ets:insert(Tab, {Stream, NewBackPs}) + end || Stream <- StreamList]. + +add_back_pointer([D,C,B,_A|_], New) -> + [New,D,C,B]; +add_back_pointer([], New) -> + [New]; +add_back_pointer(BackPs, New) -> + [New|BackPs]. + +-ifdef(PULSE). + +lclock_init() -> + lamport_clock:init(). + +lclock_get() -> + lamport_clock:get(). + +lclock_update(LC) -> + lamport_clock:update(LC). + +-else. % PULSE + +lclock_init() -> + ok. + +lclock_get() -> + ok. + +lclock_update(_LC) -> + ok. + +-endif. % PLUSE diff --git a/prototype/tango/src/corfurl_util.erl b/prototype/tango/src/corfurl_util.erl new file mode 100644 index 0000000..7a69055 --- /dev/null +++ b/prototype/tango/src/corfurl_util.erl @@ -0,0 +1,40 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_util). + +-export([delete_dir/1]). + +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. + +delete_dir(Dir) -> + %% We don't recursively delete directories, the ok pattern match will fail. + [ok = file:delete(X) || X <- filelib:wildcard(Dir ++ "/*")], + case file:del_dir(Dir) of + ok -> + ok; + {error, enoent} -> + ok; + Else -> + Else + end. + diff --git a/prototype/tango/src/tango.erl b/prototype/tango/src/tango.erl new file mode 100644 index 0000000..f74492a --- /dev/null +++ b/prototype/tango/src/tango.erl @@ -0,0 +1,219 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% A prototype implementation of Tango over Corfurl. + +-module(tango). + +-include("corfurl.hrl"). + +-export([pack_v1/4, unpack_v1/2, + add_back_pointer/2, + add_back_pointer/3, + scan_backward/4, + scan_backward/5, + pad_bin/2, + append_page/3, + back_ps2last_lpn/1, + append_lpns/2]). + +-define(MAGIC_NUMBER_V1, 16#88990011). + +-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])). + +%% TODO: for version 2: add strong checksum + +pack_v1(Stream_BackPs, Options, Page, PageSize) + when is_list(Stream_BackPs), is_list(Options), is_binary(Page), + is_integer(PageSize), PageSize > 0 -> + Stream_BackPsBin = term_to_binary(Stream_BackPs), + Stream_BackPsSize = byte_size(Stream_BackPsBin), + OptionsInt = convert_options_list2int(Options), + PageActualSize = byte_size(Page), + pad_bin(PageSize, + list_to_binary([<>, + <>, + <>, + Stream_BackPsBin, + <>, + Page])). + +unpack_v1(<>, Part) -> + if Part == stream_list -> + binary_to_term(Stream_BackPsBin); + Part == page -> + Page + end. + +pad_bin(Size, Bin) when byte_size(Bin) >= Size -> + Bin; +pad_bin(Size, Bin) -> + PadSize = Size - byte_size(Bin), + <>. + +add_back_pointer(StreamNum, BackPs, NewBackP) -> + case proplists:get_value(StreamNum, BackPs) of + undefined -> + [{StreamNum, [NewBackP]}]; + IndividualBackPs -> + [{StreamNum, add_back_pointer(IndividualBackPs, NewBackP)} + |lists:keydelete(StreamNum, 1, BackPs)] + end. + +add_back_pointer([A,B,C,_D|_], New) -> + [New,A,B,C]; +add_back_pointer([], New) -> + [New]; +add_back_pointer(BackPs, New) -> + [New|BackPs]. + +convert_options_list2int(Options) -> + lists:foldl(fun(t_final_page, Int) -> Int + 1; + (_, Int) -> Int + end, 0, Options). + +scan_backward(Proj, Stream, LastLPN, WithPagesP) -> + scan_backward(Proj, Stream, LastLPN, 0, WithPagesP). + +scan_backward(Proj, Stream, LastLPN, StopAtLPN, WithPagesP) -> + lists:reverse(scan_backward2(Proj, Stream, LastLPN, StopAtLPN, + 0, WithPagesP)). + +scan_backward2(_Proj, _Stream, LastLPN, StopAtLPN, _NumPages, _WithPagesP) + when LastLPN =< StopAtLPN; LastLPN =< 0 -> + []; +scan_backward2(Proj, Stream, LastLPN, StopAtLPN, NumPages, WithPagesP) -> + case corfurl:read_page(Proj, LastLPN) of + {ok, FullPage} -> + case proplists:get_value(Stream, unpack_v1(FullPage, stream_list)) of + undefined -> + if NumPages == 0 -> + %% We were told to start scanning backward at some + %% LPN, but that LPN doesn't have a stream for us. + %% So we'll go backward a page and see if we get + %% lucky there. + scan_backward2(Proj, Stream, LastLPN-1, StopAtLPN, + NumPages, WithPagesP); + true -> + %% Oops, we pointed into a hole. That's bad. + %% TODO: fixme + {gah_fixme, lpn, LastLPN, unpack_v1(FullPage, stream_list)} + end; + [] -> + if WithPagesP -> + [{LastLPN, unpack_v1(FullPage, page)}]; + true -> + [LastLPN] + end; + BackPs -> + if WithPagesP -> + %% ?D({bummer, BackPs}), + [{LastLPN, unpack_v1(FullPage, page)}| + scan_backward2(Proj, Stream, + hd(BackPs), StopAtLPN, NumPages + 1, + WithPagesP)]; + true -> + SkipLPN = lists:last(BackPs), + AddLPNs = [LPN || LPN <- BackPs, + LPN /= SkipLPN, + LPN > StopAtLPN], + [LastLPN] ++ AddLPNs ++ + scan_backward2(Proj, Stream, + SkipLPN, StopAtLPN, NumPages + 1, + WithPagesP) + end + end; + Err -> + %% ?D({scan, LastLPN, Err}), + Err + end. + +%% Hrm, this looks pretty similar to corfurl_client:append_page. + +append_page(Proj, Page, StreamList) -> + append_page(Proj, Page, StreamList, 5). + +append_page(Proj, _Page, _StreamList, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +append_page(#proj{seq={Sequencer,_,_}, page_size=PageSize} = Proj, + OrigPage, StreamList, Retries) -> + try + {ok, LPN, BackPsList} = corfurl_sequencer:get_tails(Sequencer, 1, + StreamList), + %% pulse_tracing_add(write, LPN), + StreamBackPs = lists:zip(StreamList, BackPsList), + Page = tango:pack_v1(StreamBackPs, [t_final_page], + OrigPage, PageSize), + append_page1(Proj, LPN, Page, StreamList, 5, OrigPage) + catch + exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} + when Reason == noproc; Reason == normal -> + NewSeq = corfurl_client:restart_sequencer(Proj), + append_page(Proj#proj{seq=NewSeq}, OrigPage, StreamList, Retries); + exit:Exit -> + {{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit} + end. + +append_page1(Proj, _LPN, _Page, _StreamList, 0, _OrigPage) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +append_page1(Proj, LPN, Page, StreamList, Retries, OrigPage) -> + case append_page2(Proj, LPN, Page) of + lost_race -> + append_page(Proj, OrigPage, StreamList, Retries - 1); + error_badepoch -> + case corfurl_sequencer:poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + append_page1(NewProj, LPN, Page, StreamList, Retries - 1, + OrigPage); + Else -> + {Else, Proj} + end; + Else -> + {Else, Proj} + end. + +append_page2(Proj, LPN, Page) -> + case corfurl:write_page(Proj, LPN, Page) of + ok -> + {ok, LPN}; + X when X == error_overwritten; X == error_trimmed -> + %% report_lost_race(LPN, X), + lost_race; + {special_trimmed, LPN}=XX -> + XX; + error_badepoch=XX-> + XX + %% Let it crash: error_unwritten + end. + +back_ps2last_lpn([]) -> + 0; +back_ps2last_lpn([H|_]) -> + H. + +append_lpns([], BPs) -> + BPs; +append_lpns(LPNs, BPs) -> + lists:reverse(LPNs) ++ BPs. + diff --git a/prototype/tango/src/tango_dt.erl b/prototype/tango/src/tango_dt.erl new file mode 100644 index 0000000..7a60ffe --- /dev/null +++ b/prototype/tango/src/tango_dt.erl @@ -0,0 +1,171 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_dt). + +-behaviour(gen_server). + +%% API +-export([start_link/5, stop/1, checkpoint/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(LONG_TIME, 30*1000). + +-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])). + +-type lpn() :: non_neg_integer(). + +-record(state, { + page_size :: non_neg_integer(), % Corfurl page size + seq :: pid(), % sequencer pid + proj :: term(), % projection + stream_num :: non_neg_integer(), % this instance's OID number + cb_mod :: atom(), % callback module + last_fetch_lpn :: lpn(), % + all_back_ps :: [lpn()], % All back-pointers LIFO order! + i_state :: term() % internal state thingie + }). + +-type callback_i_state() :: term(). +-type gen_server_from() :: {pid(), Tag::term()}. + +-callback fresh() -> callback_i_state(). +-callback do_pure_op(term(), callback_i_state()) -> term(). +-callback do_dirty_op(term(), gen_server_from(), callback_i_state(), + StreamNum::non_neg_integer(), + Proj0::term(), PageSize::non_neg_integer()) -> + {Reply::term(), New_I_State::callback_i_state(), + Proj::term(), LPN::non_neg_integer(), NewBackPs::list()}. +-callback play_log_mutate_i_state([binary()], boolean(), callback_i_state()) -> + callback_i_state(). + +start_link(PageSize, SequencerPid, Proj, CallbackMod, StreamNum) -> + gen_server:start_link(?MODULE, + [PageSize, SequencerPid, Proj, CallbackMod, StreamNum], + []). + +stop(Pid) -> + gen_server:call(Pid, {stop}, ?LONG_TIME). + +checkpoint(Pid) -> + gen_server:call(Pid, {sync_checkpoint}, ?LONG_TIME). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init([PageSize, SequencerPid, Proj, CallbackMod, StreamNum]) -> + LastLPN = find_last_lpn(SequencerPid, StreamNum), + {LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, 0, StreamNum), +?D({self(), LPNs}), + BackPs = tango:append_lpns(LPNs, []), + LastFetchLPN = tango:back_ps2last_lpn(BackPs), + I_State = play_log_pages(Pages, CallbackMod:fresh(), CallbackMod, false), + {ok, #state{page_size=PageSize, + seq=SequencerPid, + proj=Proj, + cb_mod=CallbackMod, + stream_num=StreamNum, + last_fetch_lpn=LastFetchLPN, + all_back_ps=BackPs, + i_state=I_State}}. + +handle_call({cb_dirty_op, Op}, From, + #state{proj=Proj0, cb_mod=CallbackMod, stream_num=StreamNum, + page_size=PageSize, i_state=I_State}=State)-> + {AsyncType, I_State2, Proj1, _LPN} = + CallbackMod:do_dirty_op(Op, From, I_State, StreamNum, + Proj0, PageSize), + State2 = State#state{i_state=I_State2, + proj=Proj1}, + if AsyncType == op_t_async -> + {reply, ok, State2}; + AsyncType == op_t_sync -> + State3 = roll_log_forward(State2), + {noreply, State3} + end; +handle_call({cb_pure_op, Op}, _From, #state{cb_mod=CallbackMod} = State) -> + State2 = #state{i_state=I_State} = roll_log_forward(State), + Reply = CallbackMod:do_pure_op(Op, I_State), + {reply, Reply, State2}; +handle_call({sync_checkpoint}, From, + #state{proj=Proj0, cb_mod=CallbackMod, stream_num=StreamNum, + page_size=PageSize, i_state=I_State}=State)-> + CheckpointOps = CallbackMod:do_checkpoint(I_State), + %% CheckpointBackPs = [], + {_OpT, I_State2, Proj1, _LPN} = + CallbackMod:do_dirty_op(CheckpointOps, From, I_State, StreamNum, + Proj0, PageSize), +?D({sync_checkpoint, _LPN}), + %% TODO: Use this LPN so that we can tell the corfurl log GC + %% that we have created some dead bytes in the log. + {reply, ok, State#state{i_state=I_State2, + proj=Proj1}}; +handle_call({stop}, _From, State) -> + {stop, normal, ok, State}; +handle_call(_Request, _From, State) -> + Reply = whaaaaaaaaaaaa, + {reply, Reply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_last_lpn(SequencerPid, StreamNum) -> + {ok, _, [BackPs]} = corfurl_sequencer:get_tails(SequencerPid, + 0, [StreamNum]), + tango:back_ps2last_lpn(BackPs). + +fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum) + when LastLPN >= StopAtLPN -> + LPNandPages = tango:scan_backward(Proj, StreamNum, LastLPN, + StopAtLPN, true), + {_LPNs, _Pages} = lists:unzip(LPNandPages). + +play_log_pages(Pages, SideEffectsP, + #state{cb_mod=CallbackMod, i_state=I_State} = State) -> + I_State2 = play_log_pages(Pages, I_State, CallbackMod, SideEffectsP), + State#state{i_state=I_State2}. + +play_log_pages(Pages, I_State, CallbackMod, SideEffectsP) -> + CallbackMod:play_log_mutate_i_state(Pages, SideEffectsP, I_State). + +roll_log_forward(#state{seq=SequencerPid, proj=Proj, all_back_ps=BackPs, + stream_num=StreamNum, + last_fetch_lpn=StopAtLPN} = State) -> + LastLPN = find_last_lpn(SequencerPid, StreamNum), + {LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum), +?D({self(), LPNs}), + NewBackPs = tango:append_lpns(LPNs, BackPs), + LastFetchLPN = tango:back_ps2last_lpn(NewBackPs), + play_log_pages(Pages, true, + State#state{all_back_ps=NewBackPs, + last_fetch_lpn=LastFetchLPN}). diff --git a/prototype/tango/src/tango_dt_map.erl b/prototype/tango/src/tango_dt_map.erl new file mode 100644 index 0000000..2179302 --- /dev/null +++ b/prototype/tango/src/tango_dt_map.erl @@ -0,0 +1,83 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_dt_map). + +-behaviour(tango_dt). + +-export([start_link/4, stop/1, + set/3, get/2, + checkpoint/1]). + +%% Tango datatype callbacks +-export([fresh/0, + do_pure_op/2, do_dirty_op/6, do_checkpoint/1, + play_log_mutate_i_state/3]). + +-define(DICTMOD, dict). + +-define(LONG_TIME, 30*1000). + +start_link(PageSize, SequencerPid, Proj, StreamNum) -> + gen_server:start_link(tango_dt, + [PageSize, SequencerPid, Proj, ?MODULE, StreamNum], + []). + +stop(Pid) -> + tango_dt:stop(Pid). + +set(Pid, Key, Val) -> + gen_server:call(Pid, {cb_dirty_op, {o_set, Key, Val}}, ?LONG_TIME). + +get(Pid, Key) -> + gen_server:call(Pid, {cb_pure_op, {o_get, Key}}, ?LONG_TIME). + +checkpoint(Pid) -> + tango_dt:checkpoint(Pid). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fresh() -> + ?DICTMOD:new(). + +do_pure_op({o_get, Key}, Dict) -> + ?DICTMOD:find(Key, Dict). + +do_dirty_op(Op0, _From, + I_State, StreamNum, Proj0, ___TODO_delme_PageSize) -> + Op = if is_list(Op0) -> Op0; + true -> [Op0] % always make a list + end, + Page = term_to_binary(Op), + {{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]), + {op_t_async, I_State, Proj1, LPN}. + +do_checkpoint(Dict=_I_State) -> + [{o_start_checkpoint}|[{o_set, X, Y} || {X, Y} <- ?DICTMOD:to_list(Dict)]]. + +play_log_mutate_i_state(Pages, _SideEffectsP, I_State) -> + lists:foldl(fun({o_set, Key, Val}=_Op, Dict) -> + ?DICTMOD:store(Key, Val, Dict); + ({o_start_checkpoint}, _Dict) -> + fresh() + end, + I_State, + lists:append([binary_to_term(Page) || Page <- Pages])). + diff --git a/prototype/tango/src/tango_dt_queue.erl b/prototype/tango/src/tango_dt_queue.erl new file mode 100644 index 0000000..e1f0dbb --- /dev/null +++ b/prototype/tango/src/tango_dt_queue.erl @@ -0,0 +1,131 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_dt_queue). + +-behaviour(tango_dt). + +-export([start_link/4, stop/1, + is_empty/1, length/1, peek/1, to_list/1, member/2, + in/2, out/1, reverse/1, filter/2, + checkpoint/1]). + +%% Tango datatype callbacks +-export([fresh/0, + do_pure_op/2, do_dirty_op/6, do_checkpoint/1, + play_log_mutate_i_state/3]). + +-define(LONG_TIME, 30*1000). + +-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])). + +start_link(PageSize, SequencerPid, Proj, StreamNum) -> + gen_server:start_link(tango_dt, + [PageSize, SequencerPid, Proj, ?MODULE, StreamNum], + []). + +stop(Pid) -> + tango_dt:stop(Pid). + +is_empty(Pid) -> + gen_server:call(Pid, {cb_pure_op, {o_is_empty}}, ?LONG_TIME). + +length(Pid) -> + gen_server:call(Pid, {cb_pure_op, {o_length}}, ?LONG_TIME). + +peek(Pid) -> + gen_server:call(Pid, {cb_pure_op, {o_peek}}, ?LONG_TIME). + +to_list(Pid) -> + gen_server:call(Pid, {cb_pure_op, {o_to_list}}, ?LONG_TIME). + +member(Pid, X) -> + gen_server:call(Pid, {cb_pure_op, {o_member, X}}, ?LONG_TIME). + +in(Pid, Val) -> + gen_server:call(Pid, {cb_dirty_op, {o_in, Val}}, ?LONG_TIME). + +out(Pid) -> + gen_server:call(Pid, {cb_dirty_op, {o_out}}, ?LONG_TIME). + +reverse(Pid) -> + gen_server:call(Pid, {cb_dirty_op, {o_reverse}}, ?LONG_TIME). + +filter(Pid, Fun) -> + gen_server:call(Pid, {cb_dirty_op, {o_filter, Fun}}, ?LONG_TIME). + +checkpoint(Pid) -> + tango_dt:checkpoint(Pid). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fresh() -> + queue:new(). + +do_pure_op({o_is_empty}, Q) -> + {ok, queue:is_empty(Q)}; +do_pure_op({o_length}, Q) -> + {ok, queue:len(Q)}; +do_pure_op({o_peek}, Q) -> + {ok, queue:peek(Q)}; +do_pure_op({o_to_list}, Q) -> + {ok, queue:to_list(Q)}; +do_pure_op({o_member, X}, Q) -> + {ok, queue:member(X, Q)}. + +do_dirty_op(Op0, From, + I_State, StreamNum, Proj0, ___TODO_delme_PageSize) -> + {AsyncType, Op} = transform_dirty_op(Op0, From), + Page = term_to_binary(Op), + {{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]), + {AsyncType, I_State, Proj1, LPN}. + +do_checkpoint(Q=_I_State) -> + [{o_start_checkpoint}|[{o_in, X} || X <- queue:to_list(Q)]]. + +play_log_mutate_i_state(Pages, _SideEffectsP, I_State) -> + lists:foldl(fun({o_in, Val}=_Op, Q) -> + queue:in(Val, Q); + ({o_out, From, Node, WritingPid}, Q) -> + {Reply, NewQ} = queue:out(Q), + if Node == node(), WritingPid == self() -> + gen_server:reply(From, {ok, Reply}); + true -> + ok + end, + NewQ; + ({o_reverse}, Q) -> + queue:reverse(Q); + ({o_filter, Fun}, Q) -> + queue:filter(Fun, Q); + ({o_start_checkpoint}, _Q) -> + fresh() + end, + I_State, + lists:append([binary_to_term(Page) || Page <- Pages])). + +transform_dirty_op({o_out}, From) -> + %% This func will be executed on the server side prior to writing + %% to the log. + {op_t_sync, [{o_out, From, node(), self()}]}; +transform_dirty_op(OpList, _From) when is_list(OpList) -> + {op_t_async, OpList}; +transform_dirty_op(Op, _From) -> + {op_t_async, [Op]}. diff --git a/prototype/tango/src/tango_dt_register.erl b/prototype/tango/src/tango_dt_register.erl new file mode 100644 index 0000000..89c4209 --- /dev/null +++ b/prototype/tango/src/tango_dt_register.erl @@ -0,0 +1,80 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_dt_register). + +-behaviour(tango_dt). + +-export([start_link/4, stop/1, + set/2, get/1, + checkpoint/1]). + +%% Tango datatype callbacks +-export([fresh/0, + do_pure_op/2, do_dirty_op/6, do_checkpoint/1, + play_log_mutate_i_state/3]). + +-define(LONG_TIME, 30*1000). + +start_link(PageSize, SequencerPid, Proj, StreamNum) -> + gen_server:start_link(tango_dt, + [PageSize, SequencerPid, Proj, ?MODULE, StreamNum], + []). + +stop(Pid) -> + tango_dt:stop(Pid). + +set(Pid, Val) -> + gen_server:call(Pid, {cb_dirty_op, {o_set, Val}}, ?LONG_TIME). + +get(Pid) -> + gen_server:call(Pid, {cb_pure_op, {o_get}}, ?LONG_TIME). + +checkpoint(Pid) -> + tango_dt:checkpoint(Pid). + + +fresh() -> + undefined. + +do_pure_op({o_get}, Register) -> + {ok, Register}. + +do_dirty_op(Op0, _From, + I_State, StreamNum, Proj0, ___TODO_delme_PageSize) -> + Op = if is_list(Op0) -> Op0; + true -> [Op0] % always make a list + end, + Page = term_to_binary(Op), + {{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]), + {op_t_async, I_State, Proj1, LPN}. + +do_checkpoint(Register=_I_State) -> + [{o_start_checkpoint},{o_set, Register}]. + +play_log_mutate_i_state(Pages, _SideEffectsP, OldRegister=_I_State) -> + lists:foldl(fun({o_set, Val}=_Op, _OldVal) -> + Val; + ({o_start_checkpoint}, _OldVal) -> + fresh() + end, + OldRegister, + lists:append([binary_to_term(Page) || Page <- Pages])). + diff --git a/prototype/tango/src/tango_oid.erl b/prototype/tango/src/tango_oid.erl new file mode 100644 index 0000000..b99e700 --- /dev/null +++ b/prototype/tango/src/tango_oid.erl @@ -0,0 +1,192 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_oid). + +-behaviour(gen_server). + +%% API +-export([start_link/3, stop/1, + new/2, get/2]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% Tango datatype callbacks (prototype) +-export([fresh/0, + do_pure_op/2, do_dirty_op/4, play_log_mutate_i_state/3]). + +-define(SERVER, ?MODULE). +-define(OID_STREAM_NUMBER, 0). + +-define(LONG_TIME, 30*1000). + +-define(D(X), io:format(user, "Dbg: ~s = ~p\n", [??X, X])). + +-type lpn() :: non_neg_integer(). + +-record(state, { + page_size :: non_neg_integer(), % Corfurl page size + seq :: pid(), % sequencer pid + proj :: term(), % projection + last_fetch_lpn :: lpn(), % + all_back_ps :: [lpn()], % All back-pointers LIFO order! + i_state :: term() % internal state thingie + }). + +start_link(PageSize, SequencerPid, Proj) -> + gen_server:start_link(?MODULE, + [PageSize, SequencerPid, Proj], []). + +stop(Pid) -> + gen_server:call(Pid, {stop}, ?LONG_TIME). + +new(Pid, Key) -> + gen_server:call(Pid, {new, Key}, ?LONG_TIME). + +get(Pid, Key) -> + gen_server:call(Pid, {get, Key}, ?LONG_TIME). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init([PageSize, SequencerPid, Proj]) -> + LastLPN = find_last_lpn(SequencerPid, ?OID_STREAM_NUMBER), + {LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, 0, ?OID_STREAM_NUMBER), + BackPs = tango:append_lpns(LPNs, []), + LastFetchLPN = tango:back_ps2last_lpn(BackPs), + I_State = play_log_pages(Pages, fresh(), ?MODULE, false), + {ok, #state{page_size=PageSize, + seq=SequencerPid, + proj=Proj, + last_fetch_lpn=LastFetchLPN, + all_back_ps=BackPs, + i_state=I_State}}. + +handle_call({new, Key}, From, + #state{proj=Proj0, i_state=I_State}=State) -> + Op = {new_oid, Key, From, 0}, + {_Res, I_State2, Proj1, _LPN} = + do_dirty_op(Op, I_State, ?OID_STREAM_NUMBER, Proj0), + %% Let's see how much trouble we can get outselves in here. + %% If we're here, then we've written to the log without error. + %% So then the cast to roll forward must see that log entry + %% (if it also operates without error). So, the side-effect of + %% the op ought to always send a reply to the client. + gen_server:cast(self(), {roll_forward}), + {noreply, State#state{i_state=I_State2, + proj=Proj1}}; +handle_call({get, _Key}=Op, _From, State) -> + State2 = #state{i_state=I_State} = roll_log_forward(State), + Reply = do_pure_op(Op, I_State), + {reply, Reply, State2}; +handle_call({stop}, _From, State) -> + {stop, normal, ok, State}; +handle_call(_Request, _From, State) -> + Reply = whaaaaaaaaaaaa, + {reply, Reply, State}. + +handle_cast({roll_forward}, State) -> + State2 = roll_log_forward(State), + {noreply, State2}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_last_lpn(SequencerPid, StreamNum) -> + {ok, _, [BackPs]} = corfurl_sequencer:get_tails(SequencerPid, + 0, [StreamNum]), + tango:back_ps2last_lpn(BackPs). + +fetch_unread_pages(Proj, LastLPN, StopAtLPN, StreamNum) + when LastLPN >= StopAtLPN -> + LPNandPages = tango:scan_backward(Proj, StreamNum, LastLPN, + StopAtLPN, true), + {_LPNs, _Pages} = lists:unzip(LPNandPages). + +play_log_pages(Pages, SideEffectsP, + #state{i_state=I_State} = State) -> + I_State2 = play_log_pages(Pages, I_State, ?MODULE, SideEffectsP), + State#state{i_state=I_State2}. + +play_log_pages(Pages, I_State, CallbackMod, SideEffectsP) -> + CallbackMod:play_log_mutate_i_state(Pages, SideEffectsP, I_State). + +roll_log_forward(#state{seq=SequencerPid, proj=Proj, all_back_ps=BackPs, + last_fetch_lpn=StopAtLPN} = State) -> + LastLPN = find_last_lpn(SequencerPid, ?OID_STREAM_NUMBER), + {LPNs, Pages} = fetch_unread_pages(Proj, LastLPN, StopAtLPN, ?OID_STREAM_NUMBER), + NewBPs = tango:append_lpns(LPNs, BackPs), + play_log_pages(Pages, true, State#state{all_back_ps=NewBPs}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(oid_map, { + next :: non_neg_integer(), + map :: dict() + }). + +-define(DICTMOD, dict). + +fresh() -> + #oid_map{next=1, + map=?DICTMOD:new()}. + +do_pure_op({get, Key}, #oid_map{map=Dict}) -> + ?DICTMOD:find(Key, Dict). + +do_dirty_op({new_oid, _Key, _From, _NumOfAttempts}=Op, + I_State, StreamNum, Proj0) -> + Page = term_to_binary(Op), + {{ok, LPN}, Proj1} = tango:append_page(Proj0, Page, [StreamNum]), + {ok, I_State, Proj1, LPN}. + +play_log_mutate_i_state(Pages, SideEffectsP, I_State) -> + lists:foldl(fun({new_oid, Key, From, _NumOfAttempts}=_Op, + #oid_map{map=Dict, next=Next}=O) -> + {Res, O2} = + case ?DICTMOD:find(Key, Dict) of + error -> + Dict2 = ?DICTMOD:store(Key, Next, Dict), + {{ok, Next},O#oid_map{map=Dict2, + next=Next + 1}}; + {ok, _} -> + {already_exists, O} + end, + if SideEffectsP -> + gen_server:reply(From, Res); + true -> + ok + end, + O2 + end, + I_State, + [binary_to_term(Page) || Page <- Pages]). + diff --git a/prototype/tango/test/corfurl_flu_test.erl b/prototype/tango/test/corfurl_flu_test.erl new file mode 100644 index 0000000..1c198b4 --- /dev/null +++ b/prototype/tango/test/corfurl_flu_test.erl @@ -0,0 +1,135 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_flu_test). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-endif. + +-include("corfurl.hrl"). + +-define(M, corfurl_flu). + +-ifdef(TEST). +-ifndef(PULSE). + +startstop_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + {ok, _} = ?M:status(P1), + ok = ?M:stop(P1), + {'EXIT', _} = (catch ?M:stop(P1)), + + {ok, P2} = ?M:start_link(Dir), + 0 = ?M:get__mlp(P2), + 0 = ?M:get__min_epoch(P2), + ok = ?M:stop(P2), + + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +basic_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + Epoch1 = 1, + Epoch2 = 2, + Epoch3 = 3, + LPN = 1, + Bin1 = <<42:64>>, + Bin2 = <<42042:64>>, + + error_unwritten = ?M:read(P1, Epoch1, LPN), + error_unwritten = ?M:trim(P1, Epoch1, LPN), + error_unwritten = ?M:trim(P1, Epoch1, LPN+77), + + ok = ?M:write(P1, Epoch1, LPN, Bin1), + error_overwritten = ?M:write(P1, Epoch1, LPN, Bin1), + error_overwritten = ?M:fill(P1, Epoch1, LPN), + LPN = ?M:get__mlp(P1), + 0 = ?M:get__min_epoch(P1), + 0 = ?M:get__trim_watermark(P1), + {ok, LPN} = ?M:seal(P1, Epoch1), + 2 = ?M:get__min_epoch(P1), + + error_overwritten = ?M:write(P1, Epoch2, LPN, Bin1), + ok = ?M:write(P1, Epoch2, LPN+1, Bin2), + Epoch2 = ?M:get__min_epoch(P1), + + error_badepoch = ?M:read(P1, Epoch1, LPN), + {ok, Bin2} = ?M:read(P1, Epoch2, LPN+1), + error_unwritten = ?M:read(P1, Epoch2, LPN+2), + badarg = ?M:read(P1, Epoch2, 1 bsl 2982), + + error_badepoch = ?M:seal(P1, Epoch1), + {ok, _} = ?M:seal(P1, Epoch2), + error_badepoch = ?M:seal(P1, Epoch2), + + error_badepoch = ?M:read(P1, Epoch1, LPN), + error_badepoch = ?M:read(P1, Epoch1, LPN+1), + {ok, Bin1} = ?M:read(P1, Epoch3, LPN), + {ok, Bin2} = ?M:read(P1, Epoch3, LPN+1), + + error_badepoch = ?M:trim(P1, Epoch1, LPN+1), + ok = ?M:trim(P1, Epoch3, LPN+1), + error_trimmed = ?M:trim(P1, Epoch3, LPN+1), + %% Current watermark processing is broken. But we'll test what's + %% there now. + ExpectedWaterFixMe = LPN+1, + ExpectedWaterFixMe = ?M:get__trim_watermark(P1), + + ok = ?M:fill(P1, Epoch3, LPN+3), + error_trimmed = ?M:read(P1, Epoch3, LPN+3), + error_trimmed = ?M:fill(P1, Epoch3, LPN+3), + error_trimmed = ?M:trim(P1, Epoch3, LPN+3), + + Epoch3 = ?M:get__min_epoch(P1), + ok = ?M:stop(P1), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +seal_persistence_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + 0 = ?M:get__min_epoch(P1), + Epoch = 665, + {ok, LPN} = ?M:seal(P1, Epoch-1), + Epoch = ?M:get__min_epoch(P1), + ok = ?M:stop(P1), + + {ok, P2} = ?M:start_link(Dir), + Epoch = ?M:get__min_epoch(P2), + + ok = ?M:stop(P2), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/tango/test/corfurl_pulse.erl b/prototype/tango/test/corfurl_pulse.erl new file mode 100644 index 0000000..d00c2d7 --- /dev/null +++ b/prototype/tango/test/corfurl_pulse.erl @@ -0,0 +1,950 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_pulse). + +-ifdef(TEST). +-ifdef(PULSE). + +-compile(export_all). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eqc/include/eqc_statem.hrl"). + +-include("corfurl.hrl"). + +-include_lib("eunit/include/eunit.hrl"). + +-compile({parse_transform, pulse_instrument}). + +-compile({pulse_skip,[{prop_pulse_test_,0},{clean_up_runtime,1},{delete_dir,1}]}). +%% -compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). + +%% Used for output within EUnit... +-define(QC_FMT(Fmt, Args), + io:format(user, Fmt, Args)). + +%% And to force EUnit to output QuickCheck output... +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)). + +-define(MAX_PAGES, 50000). +-define(MY_TAB, i_have_a_name). +-define(MY_KEY, ?MY_TAB). +-define(PROJECTION_DIR, "./tmp.projection." ++ os:getpid()). +-define(SEQUENCER_NAME, 'corfurl pulse seq thingie'). + +-record(run, { + proj, % Projection + flus % List of FLUs + }). + +-record(state, { + is_setup = false :: boolean(), + num_chains = 0 :: integer(), + chain_len = 0 :: integer(), + page_size = 0 :: integer(), + run :: #run{} + }). + +%% Model testing things: +%% Define true to fake bad behavior that model **must** notice & fail! + +-ifndef(TRIP_no_append_duplicates). +-define(TRIP_no_append_duplicates, false). +-endif. +-ifndef(TRIP_bad_read). +-define(TRIP_bad_read, false). +-endif. +-ifndef(TRIP_bad_scan_forward). +-define(TRIP_bad_scan_forward, false). +-endif. +-ifndef(TRIP_bad_fill). +-define(TRIP_bad_fill, false). +-endif. +-ifndef(TRIP_bad_trim). +-define(TRIP_bad_trim, false). +-endif. + +initial_state() -> + #state{}. + +gen_page(PageSize) -> + binary(PageSize). + +gen_seed() -> + noshrink({choose(1, 20000), choose(1, 20000), choose(1, 20000)}). + +gen_sequencer_percent() -> + frequency([{10, choose(1,100)}, + {5, choose(90,100)}]). + +gen_sequencer() -> + frequency([{100, standard}, + {50, {gen_seed(), gen_sequencer_percent(), choose(1, 2)}}]). + +gen_approx_page() -> + %% EQC can't know what pages are perhaps-written, so pick something big. + noshrink(?LET(I, largeint(), abs(I))). + +gen_scan_forward_start() -> + oneof([1, gen_approx_page()]). + +gen_stop_method() -> + oneof([stop, kill]). + +command(#state{run=Run} = S) -> + ?LET({NumChains, ChainLen, PageSize}, + {parameter(num_chains), parameter(chain_len), parameter(page_size)}, + frequency( + [{50, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize, gen_sequencer()]}} + || not S#state.is_setup] ++ + [{50, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} + || S#state.is_setup] ++ + [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} + || S#state.is_setup] ++ + [{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{10, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}} + || S#state.is_setup] ++ + [])). + +%% Precondition, checked before a command is added to the command sequence. +precondition(S, {call, _, setup, _}) -> + not S#state.is_setup; +precondition(S, {call, _, _, _}) -> + S#state.is_setup. + +%% Next state transformation, S is the current state and V is the result of the +%% command. +next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize, _SeqType]}) -> + S#state{is_setup=true, + num_chains=NumChains, + chain_len=ChainLen, + page_size=PageSize, + run=Res}; +next_state(S, _, {call, _, append, _}) -> + S; +next_state(S, _, {call, _, read_approx, _}) -> + S; +next_state(S, _, {call, _, scan_forward, _}) -> + S; +next_state(S, _, {call, _, fill, _}) -> + S; +next_state(S, _, {call, _, trim, _}) -> + S; +next_state(S, _, {call, _, stop_sequencer, _}) -> + S. + +eqeq(X, X) -> true; +eqeq(X, Y) -> {X, '/=', Y}. + +postcondition(_S, {call, _, setup, _}, #run{} = _V) -> + true; +postcondition(_S, {call, _, append, _}, V) -> + case V of + {ok, LPN} when is_integer(LPN) -> true; + {special_trimmed, LPN} when is_integer(LPN) -> true; + error_badepoch -> true; + _ -> eqeq(V, todoTODO_fixit) + end; +postcondition(_S, {call, _, read_approx, _}, V) -> + valid_read_result(V); +postcondition(_S, {call, _, scan_forward, _}, V) -> + case V of + {ok, LastLSN, MoreP, Pages} -> + true = is_integer(LastLSN), + true = LastLSN > 0, + true = (MoreP == true orelse MoreP == false), + [] = lists:usort([X || {_LPN, Pg} <- Pages, + X <- [valid_read_result(Pg)], X /= true]), + true; + _ -> + eqeq(V, {todoTODO_fixit,?LINE}) + end; +postcondition(_S, {call, _, FillTrim, _}, V) + when FillTrim == fill; FillTrim == trim -> + case V of + ok -> true; + error_trimmed -> true; + error_unwritten -> true; + error_overwritten -> true; + _ -> eqeq(V, {error, FillTrim, V}) + end; +postcondition(_S, {call, _, stop_sequencer, _}, _V) -> + true. + +valid_read_result(Pg) when is_binary(Pg) -> true; +valid_read_result(error_unwritten) -> true; +valid_read_result(error_trimmed) -> true; +valid_read_result(V) -> eqeq(V, {todoTODO_fixit,?LINE}). + +run_commands_on_node(LocalOrSlave, Cmds, Seed) -> + AfterTime = if LocalOrSlave == local -> 50000; + LocalOrSlave == slave -> 1000000 + end, + event_logger:start_link(), + pulse:start(), + delete_dir(?PROJECTION_DIR), + error_logger:tty(false), + error_logger:add_report_handler(handle_errors), + event_logger:start_logging(), + X = + try + {H, S, Res, Trace} = pulse:run(fun() -> + catch ets:new(?MY_TAB, [public, set, named_table]), + ets:insert(?MY_TAB, {?MY_KEY, undefined}), + %% application:start(my_test_app), + %% receive after AfterTime -> ok end, + {H, S, R} = run_parallel_commands(?MODULE, Cmds), + %% io:format(user, "Yooo: H = ~p\n", [H]), + %% io:format(user, "Yooo: S = ~p\n", [S]), + %% io:format(user, "Yooo: R = ~p\n", [R]), + receive after AfterTime -> ok end, + Trace = event_logger:get_events(), + %% receive after AfterTime -> ok end, + [{_, ThisRun}] = ets:lookup(?MY_TAB, ?MY_KEY), + [clean_up_runtime(ThisRun) || ThisRun /= undefined], + %% stop pulse controller *after* clean_up_runtime(). + catch exit(pulse_application_controller, shutdown), + {H, S, R, Trace} + end, [{seed, Seed}, + {strategy, unfair}]), + Schedule = pulse:get_schedule(), + Errors = gen_event:call(error_logger, handle_errors, get_errors, 60*1000), + {H, S, Res, Trace, Schedule, Errors} + catch + _:Err -> + {'EXIT', Err} + end, + X. + +prop_pulse() -> + prop_pulse(local). + +prop_pulse(LocalOrSlave) -> + ?FORALL({NumChains, ChainLen, PageSize}, + {choose(1, 3), choose(1, 3), choose(1, 16)}, + begin + P = ?FORALL({Cmds, Seed}, + {with_parameters([{num_chains, NumChains}, + {chain_len, ChainLen}, + {page_size, PageSize}], parallel_commands(?MODULE)), + pulse:seed()}, + begin + case run_commands_on_node(LocalOrSlave, Cmds, Seed) of + {'EXIT', Err} -> + equals({'EXIT', Err}, ok); + {_H, S, Res, Trace, Schedule, Errors} -> + CheckTrace = check_trace(Trace, Cmds, Seed), + ?WHENFAIL( + S = S, % ?QC_FMT("\nState: ~p\n", [S]), + measure(schedule, length(Schedule), + conjunction( + [{simple_result, equals(Res, ok)}, + {errors, equals(Errors, [])}, + {events, CheckTrace} ]))) + end + end), + P + end). + +prop_pulse_test_() -> + Timeout = case os:getenv("PULSE_TIME") of + false -> 60; + Val -> list_to_integer(Val) + end, + ExtraTO = case os:getenv("PULSE_SHRINK_TIME") of + false -> 0; + Val2 -> list_to_integer(Val2) + end, + io:format(user, "prop_pulse_test time: ~p + ~p seconds\n", + [Timeout, ExtraTO]), + {timeout, (Timeout+ExtraTO) + 60, + fun() -> + ?assert(eqc:quickcheck(eqc:testing_time(Timeout,?QC_OUT(prop_pulse())))) + end}. + + +%% Example Trace0 (raw event info, from the ?LOG macro) +%% +%% [{32014,{call,<0.467.0>,{append,<<"O">>}}}, +%% {32421,{call,<0.466.0>,{append,<<134>>}}}, +%% {44522,{result,<0.467.0>,{ok,1}}}, +%% {47651,{result,<0.466.0>,{ok,2}}}] + +check_trace(Trace0, _Cmds, _Seed) -> + %% Let's treat this thing like a KV store. It is, mostly. + %% Key = LPN, Value = error_unwritten | {ok, Blob} | error_trimmed + %% + %% Problem: At {call, Pid, ...} time, we don't know what Key is! + %% We find out at {return, Pid, {ok, LSN}} time. + %% Also, the append might fail, so the model can ignore those + %% failures because they're not mutating any state that and + %% external viewer can see. + %% WARNING: Trace0 + lamport_clocks means Trace0 is not strictly sorted! + Trace = add_LPN_to_append_calls(lists:sort(Trace0)), + + Events = eqc_temporal:from_timed_list(Trace), + %% Example Events, temporal style, 1 usec resolution, same as original trace + %% + %% [{0,32014,[]}, + %% {32014,32015,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, + %% {32015,32421,[]}, + %% {32421,32422,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, + %% {32422,44522,[]}, + %% {44522,44523,[{result,<0.467.0>,{ok,...}}]}, + %% {44523,47651,[]}, + %% {47651,47652,[{result,<0.466.0>,{ok,...}}]}, + %% {47652,infinity,[]}] + + Calls = eqc_temporal:stateful( + fun({call, _Pid, _Call} = I) -> [I] end, + fun({call, Pid, _Call}, {result, Pid, _}) -> [] end, + Events), + %% Example Calls (temporal map of when a call is in progress) + %% + %% [{0,32014,[]}, + %% {32014,32421,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, + %% {32421,44522, + %% [{call,<0.466.0>,{append,<<134>>,will_be,2}},{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, + %% {44522,47651,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, + %% {47651,infinity,[]}] + + AllLPNsR = eqc_temporal:stateful( + fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; + ({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN; + ({call, _Pid, {read, LPN, _, _}}) -> LPN; + ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN; + ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN; + ({call, _Pid, {goo_write, LPN, _Pg}}) -> LPN + end, + fun(x) -> [] end, + Calls), + %%io:format("Calls ~p\n", [Calls]), + %%io:format("AllLPNsR ~p\n", [AllLPNsR]), + %% The last item in the relation tells us what the final facts are in the + %% relation. In this case, it's all LPNs ever mentioned in the test run. + {_, infinity, AllLPNs} = lists:last(eqc_temporal:all_future(AllLPNsR)), + + %% Use the following atoms to denote transitions ("Ttn") by an LPN: + %% w_0 = not written yet, error_unwritten + %% w_1 = written successfully, {ok, binary::()} + %% w_ft = fill trimmed, error_trimmed + %% w_tt = trim trimmed, error_trimmed + + Mods = eqc_temporal:stateful( + fun({call, Pid, {append, Pg, will_be, LPN}}) -> + {mod_working, w_1, LPN, Pg, Pid}; + ({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) -> + %% This is a special case for the model. We know that + %% a write raced with a trim and lost (at least some of + %% the time inside the chain). But the transition that + %% we model in this case is a special w_ type that is + %% is trated specially by the dictionary-making + %% creation of the ValuesR relation. + {mod_working, w_special_trimmed, LPN, Pg, Pid}; + ({call, Pid, {fill, LPN, will_be, ok}}) -> + {mod_working, w_ft, LPN, fill, Pid}; + ({call, Pid, {trim, LPN, will_be, ok}}) -> + {mod_working, w_tt, LPN, trim, Pid}; + ({call, Pid, {read, LPN, will_fail, error_trimmed}}) -> + {mod_working, w_tt, LPN, read_repair_maybe, Pid} + end, + fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})-> + [] + end, + Events), + + %% StartMod contains {mod_start, Ttn, LPN, V} when a modification finished. + %% DoneMod contains {mod_end, Ttn, LPN, V} when a modification finished. + %% This is a clever trick: Mods contains the start & end timestamp + %% for each modification. Use shift() by 1 usec to move all timestamps + %% forward/backward 1 usec, then subtract away the original time range to + %% leave a 1 usec relation in time. + StartMod = eqc_temporal:map( + fun({mod_working, Ttn, LPN, Pg, _Pid}) -> + {mod_start, Ttn, LPN, Pg} + end, + eqc_temporal:subtract(Mods, eqc_temporal:shift(1, Mods))), + DoneMod = eqc_temporal:map( + fun({mod_working, Ttn, LPN, Pg, _Pid}) -> + {mod_end, Ttn, LPN, Pg} + end, + eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), + StartsDones = eqc_temporal:union(StartMod, DoneMod), + + %% TODO: A brighter mind than mine might figure out how to do this + %% next step using only eqc_temporal. + %% + %% We create a new relation, ValuesR. This relation contains + %% {values, OD::orddict()} for each time interval in the relation. + %% The OD contains all possible values for a particular LPN at + %% that time in the relation. + %% The key for OD is LPN, the value is an unordered list of possible values. + + InitialValDict = orddict:from_list([{LPN, [error_unwritten]} || + LPN <- AllLPNs]), + ValuesRFun = + fun({TS1, TS2, StEnds}, Dict1) -> + Dict2 = lists:foldl( + fun({mod_start, w_1, LPN, Pg}, D) -> + orddict:append(LPN, Pg, D); + ({mod_start, WType, LPN, _Pg}, D) + when WType == w_ft; WType == w_tt -> + case lists:member(error_trimmed, + orddict:fetch(LPN, D)) of + true -> + D; + false -> + orddict:append(LPN, error_trimmed,D) + end; + ({mod_start, w_special_trimmed, LPN, Pg}, D)-> + orddict:append(LPN, Pg, D) + end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), + Dict3 = lists:foldl( + fun({mod_end, w_1, LPN, Pg}, D) -> + Vs1 = orddict:fetch(LPN, D), + %% We've written a page. error_unwriten is + %% now impossible; any other binary() is + %% also impossible. However, there may be + %% a trim operation that's still in flight! + Vs2 = [V || V <- Vs1, V /= error_unwritten, + not is_binary(V)], + orddict:store(LPN, [Pg|Vs2], D); + ({mod_end, WType, LPN, _Pg}, D) + when WType == w_ft; WType == w_tt -> + orddict:store(LPN, [error_trimmed], D); + ({mod_end, w_special_trimmed, LPN, Pg}, D) -> + orddict:store(LPN, [Pg,error_trimmed], D) + end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), + {{TS1, TS2, [{values, Dict3}]}, Dict3} + end, + {ValuesR, _} = lists:mapfoldl(ValuesRFun, InitialValDict, StartsDones), + + InitialTtnDict = orddict:from_list([{LPN, [w_0]} || LPN <- AllLPNs]), + {TransitionsR, _} = + lists:mapfoldl( + fun({TS1, TS2, StEnds}, Dict1) -> + Dict2 = lists:foldl( + fun({mod_end, Ttn, LPN, _Pg}, D) -> + %% orddict does not discard duplicates + orddict:append(LPN, Ttn, D); + (_, D) -> + D + end, Dict1, [X || X={mod_end,_,_,_} <- StEnds]), + {{TS1, TS2, [{transitions, Dict2}]}, Dict2} + end, InitialTtnDict, StartsDones), + + %% Checking reads is a tricky thing. My first attempt created a temporal + %% relation for the 1usec window when the read call was complete, then + %% union with the ValuesR relation to see what values were valid at that + %% particular instant. That approach fails sometimes! + %% + %% The reason is honest race conditions with a mutation: the model doesn't + %% know exactly when the data was written, so a valid value may have been + %% added/removed from the ValuesR relation that aren't there for the + %% 1usec window that intersects with ValuesR. + %% + %% Instead, we need to merge together all possible values from ValuesR + %% that appear at any time during the read op's lifetime. + + PerhapsR = eqc_temporal:stateful( + fun({call, _Pid, {goo_write, LPN, Pg}}) -> + {perhaps, LPN, Pg} + end, + fun(x)-> [] end, + Events), + {_, _, Perhaps} = lists:last(eqc_temporal:all_future(PerhapsR)), + %%?QC_FMT("*Perhaps: ~p\n", [Perhaps]), + Reads = eqc_temporal:stateful( + fun({call, Pid, {read, LPN, _, _}}) -> + {read, Pid, LPN, []} + end, + fun({read, Pid, LPN, V1s}, {values, Values}) -> + {ok, V2s} = orddict:find(LPN, Values), + NewVs = lists:umerge(lists:sort(V1s), + lists:sort(V2s)), + %% Throw an exception (which is equivalent to a no-op) + %% if there are no differences: if we make multiples + %% of the exact same thing, stateful() will get confused. + false = NewVs == V1s, + {read, Pid, LPN, NewVs}; + ({read, Pid, LPN, Vs}, {result, Pid, Pg}) -> + %% case lists:member(Pg, Vs) orelse + %% lists:member({perhaps, LPN, Pg}, Perhaps) of + case lists:member(Pg, Vs) of + true -> + []; + false -> + case lists:member({perhaps, LPN, Pg}, Perhaps) of + true -> + %% The checking of the Perhaps list in + %% this manner is not strictly + %% temporally valid. It is possible + %% for the {perhaps,...} event to be + %% after the event we're checking here. + %% TODO work is to make this check 100% + %% temporally valid. + io:format(user, "Yo, found ~p ~p in Perhaps\n", [LPN, Pg]), + []; + false -> + [{bad, read, LPN, Pid, got, Pg, + possible, Vs}] + end + end + end, eqc_temporal:union(Events, ValuesR)), + BadFilter = fun(bad) -> true; + (Bad) when is_tuple(Bad), element(1, Bad) == bad -> true; + (_) -> false end, + BadReads = filter_relation_facts(BadFilter, Reads), + + %% Property: For all LPNs, the transition list for K must be one of the + %% following four (4) acceptable transition orderings. + {_, _, [{transitions, FinalTtns}]} = lists:last( + eqc_temporal:all_future(TransitionsR)), + FinaTtns_filtered = filter_transition_trimfill_suffixes(FinalTtns), + InvalidTransitions = orddict:fold( + fun(_LPN, [w_0], Acc) -> + Acc; + (_LPN, [w_0,w_1], Acc) -> + Acc; + (_LPN, [w_0,'w_t+'], Acc) -> + Acc; + (_LPN, [w_0,w_1,'w_t+'], Acc) -> + Acc; + (LPN, BadTtns, Acc) -> + [{LPN, BadTtns}|Acc] + end, [], FinaTtns_filtered), + + ?WHENFAIL(begin + ?QC_FMT("*Trace: ~p\n", [Trace]), + ?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), + ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), + ?QC_FMT("*ValuesR: ~p\n", [eqc_temporal:unions([ValuesR, StartsDones])]), + ?QC_FMT("*Calls: ~p\n", [Calls]), + ?QC_FMT("*BadReads: ~p\n", [BadReads]), + ?QC_FMT("*Perhaps: ~p\n", [Perhaps]) + end, + conjunction( + [ + {all_calls_finish, + eqc_temporal:is_false(eqc_temporal:all_future(Calls))}, + {no_invalidTransitions, + InvalidTransitions == []}, + {no_bad_reads, + eqc_temporal:is_false(eqc_temporal:all_future(BadReads))}, + %% If you want to see PULSE causing crazy scheduling, then + %% change one of the "true orelse" -> "false orelse" below. + %% {bogus_no_gaps, + %% true orelse + %% (AppendLPNs == [] orelse length(range_ify(AppendLPNs)) == 1)}, + %% {bogus_exactly_1_to_N, + %% true orelse (AppendLPNs == lists:seq(1, length(AppendLPNs)))}, + {true, true} + ])). + +add_LPN_to_append_calls([{TS, {call, Pid, {append, Page}}}|Rest]) -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + {ok, LPN} -> + {TS, {call, Pid, {append, Page, will_be, LPN}}}; + Else -> + {TS, {call, Pid, {append, Page, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([{TS, {call, Pid, {OpName, LPN}}}|Rest]) + when OpName == fill; OpName == trim -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + ok -> + {TS, {call, Pid, {OpName, LPN, will_be, ok}}}; + Else -> + {TS, {call, Pid, {OpName, LPN, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([{TS, {call, Pid, {read, LPN}}}|Rest]) -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + Page when is_binary(Page) -> + {TS, {call, Pid, {read, LPN, will_be, Page}}}; + Else -> + {TS, {call, Pid, {read, LPN, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([X|Rest]) -> + [X|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([]) -> + []. + +trace_lookahead_pid(Pid, [{_TS, {result, Pid, Res}}|_]) -> + Res; +trace_lookahead_pid(Pid, [_H|T]) -> + trace_lookahead_pid(Pid, T). + +%% Presenting command data statistics in a nicer way +command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> + Fun. + +%% Convenience functions for running tests + +test() -> + test({20, sec}). + +test(N) when is_integer(N) -> + quickcheck(numtests(N, prop_pulse())); +test({Time, sec}) -> + quickcheck(eqc:testing_time(Time, prop_pulse())); +test({Time, min}) -> + test({Time * 60, sec}); +test({Time, h}) -> + test({Time * 60, min}). + +check() -> + check(current_counterexample()). + +verbose() -> + verbose(current_counterexample()). + +verbose(CE) -> + erlang:put(verbose, true), + Ok = check(CE), + erlang:put(verbose, false), + Ok. + +check(CE) -> + check(on_output(fun("OK" ++ _, []) -> ok; (Fmt, Args) -> io:format(Fmt, Args) end, + prop_pulse(true == erlang:get(verbose))), + CE). + +recheck() -> + recheck(prop_pulse()). + +zipwith(F, [X|Xs], [Y|Ys]) -> + [F(X, Y)|zipwith(F, Xs, Ys)]; +zipwith(_, _, _) -> []. + +delete_dir(Dir) -> + corfurl_util:delete_dir(Dir). + +clean_up_runtime(#run{flus=Flus, proj=P}) -> + %% io:format(user, "clean_up_runtime: run = ~p\n", [R]), + #proj{seq={Seq,_,_}} = P, + catch corfurl_sequencer:stop(Seq), + [catch corfurl_flu:stop(F) || F <- Flus], + corfurl_test:setup_del_all(length(Flus)), + delete_dir(?PROJECTION_DIR), + (catch exit(whereis(?SEQUENCER_NAME), kill)). + +make_chains(ChainLen, FLUs) -> + make_chains(ChainLen, FLUs, [], []). + +make_chains(_ChainLen, [], SmallAcc, BigAcc) -> + [lists:reverse(SmallAcc)|BigAcc]; +make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> + if length(SmallAcc) == ChainLen -> + make_chains(ChainLen, T, [H], [lists:reverse(SmallAcc)|BigAcc]); + true -> + make_chains(ChainLen, T, [H|SmallAcc], BigAcc) + end. + +setup(NumChains, ChainLen, PageSize, SeqType) -> + (catch exit(whereis(?SEQUENCER_NAME), kill)), + lamport_clock:init(), + + N = NumChains * ChainLen, + FLUs = corfurl_test:setup_basic_flus(N, PageSize, ?MAX_PAGES), + {ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType), + Chains = make_chains(ChainLen, FLUs), + %% io:format(user, "Cs = ~p\n", [Chains]), + Proj = corfurl:new_simple_projection(?PROJECTION_DIR, + 1, 1, ?MAX_PAGES, Chains), + ok = corfurl:save_projection(?PROJECTION_DIR, Proj), + error_overwritten = corfurl:save_projection(?PROJECTION_DIR, Proj), + 1 = corfurl:latest_projection_epoch_number(?PROJECTION_DIR), + {ok, Proj} = corfurl:read_projection(?PROJECTION_DIR, 1), + Run = #run{proj=Proj#proj{seq={Seq, node(), ?SEQUENCER_NAME}}, + flus=FLUs}, + ets:insert(?MY_TAB, {?MY_KEY, Run}), + Run. + +range_ify([]) -> + []; +range_ify(L) -> + [H|T] = lists:sort(L), + range_ify(H, H+1, T). + +range_ify(Beginning, Next, [Next|T]) -> + range_ify(Beginning, Next+1, T); +range_ify(Beginning, Next, [Else|T]) -> + [{Beginning, to, Next-1}|range_ify(Else, Else+1, T)]; +range_ify(Beginning, Next, []) -> + [{Beginning, to, Next-1}]. + +filter_relation_facts(FilterFun, R) -> + [{TS1, TS2, lists:filter(FilterFun, Facts)} || {TS1, TS2, Facts} <- R]. + %% {TS1, TS2, Facts} <- Reads, Fact <- Facts, BadFilter(Fact)], + +filter_transition_trimfill_suffixes(Ttns) -> + [{X, filter_1_transition_list(L)} || {X, L} <- Ttns]. + +filter_1_transition_list([]) -> + []; +filter_1_transition_list(Old) -> + %% Strategy: Chop off all of the w_* at the end, then look at **Old** to + %% see if we chopped off any. If we did chop off any, then add back a + %% constant 'w_t+' as a suffix. + New = lists:reverse(lists:dropwhile(fun(w_tt) -> true; + (w_ft) -> true; + (w_special_trimmed) -> true; + (_) -> false + end, lists:reverse(Old))), + Suffix = case lists:last(Old) of + w_ft -> ['w_t+']; + w_tt -> ['w_t+']; + w_special_trimmed -> ['w_t+']; + _ -> [] + end, + New ++ Suffix. + +log_make_call(Tag) -> + log_make_call(self(), Tag). + +log_make_call(Pid, Tag) -> + {call, Pid, Tag}. + +log_make_result(Result) -> + log_make_result(self(), Result). + +log_make_result(Pid, Result) -> + {result, Pid, Result}. + +pick_an_LPN(#proj{seq={Seq,_,_}} = P, SeedInt) -> + case (catch corfurl_sequencer:get(Seq, 0)) of + {ok, Max} -> + %% The sequencer may be lying to us, shouganai. + if SeedInt > Max -> (SeedInt rem Max) + 1; + true -> SeedInt + end; + _Else -> + pick_an_LPN(corfurl_client:restart_sequencer(P), SeedInt) + end. + +-define(LOG3(Tag, MkCall, PostCall), + begin + LOG__Start = lamport_clock:get(), + event_logger:event(log_make_call(Tag), LOG__Start), + LOG__Result = MkCall, + LOG__End = lamport_clock:get(), + PostCall, + event_logger:event(log_make_result(LOG__Result), LOG__End), + LOG__Result + end). + +-define(LOG(Tag, MkCall), ?LOG3(Tag, MkCall, okqq)). + +append(#run{proj=OriginalProj}, Page) -> + lamport_clock:init(), + lamport_clock:incr(), + Proj = get_projection(OriginalProj), + ?LOG3({append, Page}, + try + corfurl_client:pulse_tracing_start(write), + {Res, Proj2} = corfurl_client:append_page(Proj, Page), + put_projection(Proj2), + OtherPages0 = lists:usort(corfurl_client:pulse_tracing_get(write)), + OtherPages = case Res of + {ok, LPN} -> + OtherPages0 -- [LPN]; + _ -> + OtherPages0 + end, + put(zzzOtherPages, OtherPages), + perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end, + try + OPages = get(zzzOtherPages), + %%if OPages /= [] -> io:format("OPages = ~w\n", [OPages]); true -> ok end, + GooPid = {self(), goo, now()}, + [begin + event_logger:event(log_make_call(GooPid, {goo_write, OP, Page}), + LOG__Start), + event_logger:event(log_make_result(GooPid, who_knows), + LOG__End) + end || OP <- OPages] + catch XX:YY -> + exit({oops, ?MODULE, ?LINE, XX, YY, erlang:get_stacktrace()}) + end). + +read_result_mangle({ok, Page}) -> + Page; +read_result_mangle(Else) -> + Else. + +read_approx(#run{proj=OriginalProj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), + Proj = get_projection(OriginalProj), + LPN = pick_an_LPN(Proj, SeedInt), + ?LOG({read, LPN}, + try + {Res, Proj2} = corfurl_client:read_page(Proj, LPN), + put_projection(Proj2), + Res2 = read_result_mangle(Res), + perhaps_trip_read_approx(?TRIP_bad_read, Res2, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end). + +scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) -> + lamport_clock:init(), + lamport_clock:incr(), + Proj = get_projection(OriginalProj), + StartLPN = if SeedInt == 1 -> 1; + true -> pick_an_LPN(Proj, SeedInt) + end, + %% Our job is complicated by the ?LOG() macro, which isn't good enough + %% for our purpose: we must lie about the starting timestamp, to make + %% it appear as if each LPN result that scan_forward() gives us came + %% instead from a single-page read_page() call. + ?LOG({scan_forward, StartLPN, NumPages}, + try + TS1 = lamport_clock:get(), + case corfurl_client:scan_forward(Proj, StartLPN, NumPages) of + {{Res, EndLPN, MoreP, Pages}, Proj2} + when Res == ok; Res == error_badepoch -> + put_projection(Proj2), + PageIs = lists:zip(Pages, lists:seq(1, length(Pages))), + TS2 = lamport_clock:get(), + [begin + PidI = {self(), s_f, I}, + event_logger:event(log_make_call(PidI, {read, LPN}), + TS1), + Pm = perhaps_trip_scan_forward( + ?TRIP_bad_scan_forward, read_result_mangle(P), + EndLPN), + event_logger:event(log_make_result(PidI, Pm), TS2) + end || {{LPN, P}, I} <- PageIs], + Ps = [{LPN, read_result_mangle(P)} || + {LPN, P} <- Pages], + {ok, EndLPN, MoreP, Ps} + end + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end). + +fill(#run{proj=OriginalProj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), + Proj = get_projection(OriginalProj), + LPN = pick_an_LPN(Proj, SeedInt), + ?LOG({fill, LPN}, + try + {Res, Proj2} = corfurl_client:fill_page(Proj, LPN), + put_projection(Proj2), + perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end). + +trim(#run{proj=OriginalProj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), + Proj = get_projection(OriginalProj), + LPN = pick_an_LPN(Proj, SeedInt), + ?LOG({trim, LPN}, + try + {Res, Proj2} = corfurl_client:trim_page(Proj, LPN), + put_projection(Proj2), + perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end). + +stop_sequencer(#run{proj=OriginalProj}, Method) -> + Proj = get_projection(OriginalProj), + Seq = element(1,Proj#proj.seq), + try + corfurl_sequencer:stop(Seq, Method), + ok + catch _:_ -> + ok + end. + +get_projection(OriginalProj) -> + case get(projection) of + undefined -> + OriginalProj; + Proj -> + Proj + end. + +put_projection(Proj) -> + put(projection, Proj). + +perhaps_trip_append_page(false, Res, _Page) -> + Res; +perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 -> + io:format(user, "TRIP: append_page\n", []), + {ok, 3}; +perhaps_trip_append_page(true, Else, _Page) -> + Else. + +perhaps_trip_read_approx(false, Res, _LPN) -> + Res; +perhaps_trip_read_approx(true, _Res, 3 = LPN) -> + io:format(user, "TRIP: read_approx LPN ~p\n", [LPN]), + <<"FAKE!">>; +perhaps_trip_read_approx(true, Res, _LPN) -> + Res. + +perhaps_trip_scan_forward(false, Res, _EndLPN) -> + Res; +perhaps_trip_scan_forward(true, _Res, 10) -> + io:format(user, "TRIP: scan_forward\n", []), + <<"magic number bingo, you are a winner">>; +perhaps_trip_scan_forward(true, Res, _EndLPN) -> + Res. + +perhaps_trip_fill_page(false, Res, _EndLPN) -> + Res; +perhaps_trip_fill_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 -> + io:format(user, "TRIP: fill_page\n", []), + ok; % can trigger both invalid ttn and bad read +perhaps_trip_fill_page(true, Res, _EndLPN) -> + Res. + +perhaps_trip_trim_page(false, Res, _EndLPN) -> + Res; +perhaps_trip_trim_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 -> + io:format(user, "TRIP: trim_page\n", []), + ok; +perhaps_trip_trim_page(true, Res, _EndLPN) -> + Res. + +-endif. % PULSE +-endif. % TEST + diff --git a/prototype/tango/test/corfurl_sequencer_test.erl b/prototype/tango/test/corfurl_sequencer_test.erl new file mode 100644 index 0000000..3396a17 --- /dev/null +++ b/prototype/tango/test/corfurl_sequencer_test.erl @@ -0,0 +1,98 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_sequencer_test). + +-compile(export_all). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +-define(M, corfurl_sequencer). + +-ifdef(TEST). +-ifndef(PULSE). + +smoke_test() -> + BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".", + PageSize = 8, + NumPages = 500, + NumFLUs = 4, + MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end, + Del = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) || + X <- lists:seq(1, NumFLUs)] end, + + Del(), + FLUs = [begin + element(2, corfurl_flu:start_link(MyDir(X), + PageSize, NumPages*PageSize)) + end || X <- lists:seq(1, NumFLUs)], + FLUsNums = lists:zip(FLUs, lists:seq(1, NumFLUs)), + + try + [ok = corfurl_flu:write(FLU, 1, PageNum, <<42:(8*8)>>) || + {FLU, PageNum} <- FLUsNums], + MLP0 = NumFLUs, + NumFLUs = ?M:get_max_logical_page(FLUs), + + {ok, Sequencer} = ?M:start_link(FLUs), + try + {ok, _} = ?M:get(Sequencer, 5000), + [{Stream9, Tails9}] = StreamTails = [{9, [1125, 1124, 1123]}], + ok = ?M:set_tails(Sequencer, StreamTails), + {ok, _, [Tails9]} = ?M:get_tails(Sequencer, 0, [Stream9]), + + {ok, LPN0a} = ?M:get(Sequencer, 2), + {ok, LPN0b} = ?M:get(Sequencer, 0), + LPN0a = LPN0b - 2, + + {ok, LPN2a, _} = ?M:get_tails(Sequencer, 1, [2]), + {ok, LPN1a, _} = ?M:get_tails(Sequencer, 1, [1]), + {ok, _, [[LPN1a], [LPN2a]]} = ?M:get_tails(Sequencer, + 0, [1,2]), + {ok, LPN2b, _} = ?M:get_tails(Sequencer, 1, [2]), + {ok, LPN2c, _} = ?M:get_tails(Sequencer, 1, [2]), + {ok, _, [[LPN1a], [LPN2c, LPN2b, LPN2a]]} = + ?M:get_tails(Sequencer, 0, [1,2]), + {ok, LPN2d, _} = ?M:get_tails(Sequencer, 1, [2]), + {ok, LPN2e, _} = ?M:get_tails(Sequencer, 1, [2]), + + {ok, LPNX, [[LP1a], [LPN2e, LPN2d, LPN2c, LPN2b]]} = + ?M:get_tails(Sequencer, 0, [1,2]), + {ok, LPNX, [[LP1a], [LPN2e, LPN2d, LPN2c, LPN2b]]} = + ?M:get_tails(Sequencer, 0, [1,2]), % same results + LPNX = LPN2e + 1, % no change with 0 request + + ok + after + ?M:stop(Sequencer) + end + after + [ok = corfurl_flu:stop(FLU) || FLU <- FLUs], + Del() + end. + +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/tango/test/corfurl_test.erl b/prototype/tango/test/corfurl_test.erl new file mode 100644 index 0000000..45423eb --- /dev/null +++ b/prototype/tango/test/corfurl_test.erl @@ -0,0 +1,262 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_test). + +-include("corfurl.hrl"). + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). + +-define(M, corfurl). + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + + +setup_flu_basedir() -> + "./tmp." ++ + atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".". + +setup_flu_dir(N) -> + setup_flu_basedir() ++ integer_to_list(N). + +setup_del_all(NumFLUs) -> + [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || + N <- lists:seq(1, NumFLUs)]. + +setup_basic_flus(NumFLUs, PageSize, NumPages) -> + setup_del_all(NumFLUs), + [begin + element(2, corfurl_flu:start_link(setup_flu_dir(X), + PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) + end || X <- lists:seq(1, NumFLUs)]. + +-ifndef(PULSE). + +save_read_test() -> + Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", + PDir = Dir ++ ".projection", + Chain = [a,b], + P1 = ?M:new_simple_projection(PDir, 1, 1, 1*100, [Chain]), + + try + filelib:ensure_dir(Dir ++ "/ignored"), + ok = ?M:save_projection(Dir, P1), + error_overwritten = ?M:save_projection(Dir, P1), + + {ok, P1} = ?M:read_projection(Dir, 1), + error_unwritten = ?M:read_projection(Dir, 2), + + ok + after + ok = corfurl_util:delete_dir(Dir), + ok = corfurl_util:delete_dir(PDir) + end. + +smoke1_test() -> + PDir = "./tmp.smoke1.projection", + NumFLUs = 6, + PageSize = 8, + NumPages = 10, + FLUs = [F1, F2, F3, F4, F5, F6] = + setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + %% We know that the first LPN will be 1. + LPN_Pgs = [{X, list_to_binary( + lists:flatten(io_lib:format("~8..0w", [X])))} || + X <- lists:seq(1, 5)], + try + P0 = ?M:new_simple_projection(PDir, 1, 1, 1*100, + [[F1, F2, F3], [F4, F5, F6]]), + P1 = P0#proj{seq={Seq, unused, unused}}, + [begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + + [begin {ok, Pg} = ?M:read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], + + [begin + LPNplus = LPN + 1, + {ok, LPNplus, true, [{LPN, Pg}]} = ?M:scan_forward(P1, LPN, 1) + end || {LPN, Pg} <- LPN_Pgs], + {ok, 6, false, []} = ?M:scan_forward(P1, 6, 1), + {ok, 6, false, []} = ?M:scan_forward(P1, 6, 10), + [{LPN1,Pg1}, {LPN2,Pg2}, {LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}] = LPN_Pgs, + {ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = ?M:scan_forward(P1, 2, 2), + {ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} = + ?M:scan_forward(P1, 3, 10), + + %% Let's smoke read-repair: regular write failure + Epoch = P1#proj.epoch, + Pg6 = <<424242:(PageSize*8)>>, + + %% Simulate a failed write to the chain. + [F6a, F6b, F6c] = Chain6 = ?M:project_to_chain(6, P1), + NotHead6 = [F6b, F6c], + ok = ?M:write_page_to_chain([F6a], [F6a], Epoch, 6, Pg6, 1), + + %% Does the chain look as expected? + {ok, Pg6} = corfurl_flu:read(?M:flu_pid(F6a), Epoch, 6), + [error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) || + X <- NotHead6], + + %% Read repair should fix it. + {ok, Pg6} = ?M:read_page(P1, 6), + [{ok, Pg6} = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) || X <- Chain6], + + %% Let's smoke read-repair: failed fill + [F7a, F7b, F7c] = Chain7 = ?M:project_to_chain(7, P1), + NotHead7 = [F7b, F7c], + ok = corfurl_flu:fill(?M:flu_pid(F7a), Epoch, 7), + + %% Does the chain look as expected? + error_trimmed = corfurl_flu:read(?M:flu_pid(F7a), Epoch, 7), + [error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) || + X <- NotHead7], + + %% Read repair should fix it. + error_trimmed = ?M:read_page(P1, 7), + [error_trimmed = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) || X <- Chain7], + %% scan_forward shouldn't see it either + {ok, 8, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10), + + [F8a|_] = Chain8 = ?M:project_to_chain(8, P1), + ok = corfurl_flu:fill(?M:flu_pid(F8a), Epoch, 8), + %% No read before scan, scan_forward shouldn't see 8 either, + %% but the next seq should be 9 + {ok, 9, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10), + + ok + after + corfurl_util:delete_dir(PDir), + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +smoke_append_badepoch_test() -> + PDir = "./tmp.smoke2.projection", + NumFLUs = 6, + PageSize = 8, + NumPages = 10, + FLUs = [F1, F2, F3, F4, F5, F6] = + setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + %% We know that the first LPN will be 1. + LPN_Pgs = [{X, list_to_binary( + lists:flatten(io_lib:format("~8..0w", [X])))} || + X <- lists:seq(1, 5)], + try + LittleEpoch = 4, + BigEpoch = 42, + P0 = ?M:new_simple_projection(PDir, BigEpoch, 1, 1*100, + [[F1, F2, F3], [F4, F5, F6]]), + P1 = P0#proj{seq={Seq, unused, unused}}, + [begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + + [{ok, _} = corfurl_flu:seal(FLU, BigEpoch) || FLU <- FLUs], + {_LPN, Pg} = hd(LPN_Pgs), + {error_badepoch, _} = corfurl_client:append_page(P1, Pg), + + P2 = P1#proj{epoch=LittleEpoch}, + {error_badepoch, _} = corfurl_client:append_page(P2, Pg), + + ok + after + corfurl_util:delete_dir(PDir), + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +-ifdef(TIMING_TEST). + +forfun_test_() -> + {timeout, 99999, fun() -> + [forfun(Procs) || Procs <- [10,100,1000,5000]] + end}. + +forfun_append(0, _P, _Page) -> + ok; +forfun_append(N, #proj{seq={Seq, _, _}} = P, Page) -> + {ok, _} = ?M:append_page(Seq, P, Page), + forfun_append(N - 1, P, Page). + +%%% My MBP, SSD +%%% The 1K and 5K procs shows full-mailbox-scan ickiness +%%% when getting replies from prim_file. :-( + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.016815 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.547976 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 13.706686 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 33.516312 sec + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.350147 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.429485 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.643233 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 15.686058 sec + +%%%% forfun: 10 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 13.479458 sec +%%%% forfun: 100 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 14.752565 sec +%%%% forfun: 1000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 25.012306 sec +%%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec + +forfun(NumProcs) -> + PDir = "./tmp.forfun.projection", + io:format(user, "\n", []), + NumFLUs = 4, + PageSize = 8, + %%PageSize = 4096, + NumPages = 200*1000, + PagesPerProc = NumPages div NumProcs, + FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + try + Chains = [[F1, F2], [F3, F4]], + %%Chains = [[F1], [F2], [F3], [F4]], + P0 = ?M:new_simple_projection(PDir, 1, 1, NumPages*2, Chains), + P = P0#proj{seq={Seq, unused, unused}}, + Me = self(), + Start = now(), + Ws = [begin + Page = <>, + spawn_link(fun() -> + forfun_append(PagesPerProc, P, Page), + Me ! {done, self()} + end) + end || X <- lists:seq(1, NumProcs)], + [receive {done, W} -> ok end || W <- Ws], + End = now(), + io:format(user, "forfun: ~p procs writing ~p pages of ~p bytes/page to ~p chains of ~p total FLUs in ~p sec\n", + [NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]), + ok + after + corfur_util:delete_dir(PDir), + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +-endif. % TIMING_TEST +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/tango/test/pulse_util/event_logger.erl b/prototype/tango/test/pulse_util/event_logger.erl new file mode 100644 index 0000000..8633b99 --- /dev/null +++ b/prototype/tango/test/pulse_util/event_logger.erl @@ -0,0 +1,133 @@ +%%% File : handle_errors.erl +%%% Author : Ulf Norell +%%% Description : +%%% Created : 26 Mar 2012 by Ulf Norell +-module(event_logger). + +-compile(export_all). + +-behaviour(gen_server). + +%% API +-export([start_link/0, event/1, event/2, get_events/0, start_logging/0]). +-export([timestamp/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, { start_time, events = [] }). + +-record(event, { timestamp, data }). + + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link() -> {ok,Pid} | ignore | {error,Error} +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +start_logging() -> + gen_server:call(?MODULE, {start, timestamp()}). + +event(EventData) -> + event(EventData, timestamp()). + +event(EventData, Timestamp) -> + gen_server:call(?MODULE, + #event{ timestamp = Timestamp, data = EventData }). + +async_event(EventData) -> + gen_server:cast(?MODULE, + #event{ timestamp = timestamp(), data = EventData }). + +get_events() -> + gen_server:call(?MODULE, get_events). + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% Description: Initiates the server +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% Function: %% handle_call(Request, From, State) -> +%% {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% Description: Handling call messages +%%-------------------------------------------------------------------- +handle_call(Event = #event{}, _From, State) -> + {reply, ok, add_event(Event, State)}; +handle_call({start, Now}, _From, S) -> + {reply, ok, S#state{ events = [], start_time = Now }}; +handle_call(get_events, _From, S) -> + {reply, lists:reverse([ {E#event.timestamp, E#event.data} || E <- S#state.events]), + S#state{ events = [] }}; +handle_call(Request, _From, State) -> + {reply, {error, {bad_call, Request}}, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling cast messages +%%-------------------------------------------------------------------- +handle_cast(Event = #event{}, State) -> + {noreply, add_event(Event, State)}; +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling all non call/cast messages +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate(Reason, State) -> void() +%% Description: This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState} +%% Description: Convert process state when code is changed +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +add_event(#event{timestamp = Now, data = Data}, State) -> + Event = #event{ timestamp = Now, data = Data }, + State#state{ events = [Event|State#state.events] }. + +timestamp() -> + lamport_clock:get(). diff --git a/prototype/tango/test/pulse_util/handle_errors.erl b/prototype/tango/test/pulse_util/handle_errors.erl new file mode 100644 index 0000000..798f379 --- /dev/null +++ b/prototype/tango/test/pulse_util/handle_errors.erl @@ -0,0 +1,153 @@ +%%%------------------------------------------------------------------- +%%% @author Hans Svensson <> +%%% @copyright (C) 2012, Hans Svensson +%%% @doc +%%% +%%% @end +%%% Created : 19 Mar 2012 by Hans Svensson <> +%%%------------------------------------------------------------------- +-module(handle_errors). + +-behaviour(gen_event). + +%% API +-export([start_link/0, add_handler/0]). + +%% gen_event callbacks +-export([init/1, handle_event/2, handle_call/2, + handle_info/2, terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, { errors = [] }). + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @doc +%% Creates an event manager +%% +%% @spec start_link() -> {ok, Pid} | {error, Error} +%% @end +%%-------------------------------------------------------------------- +start_link() -> + gen_event:start_link({local, ?SERVER}). + +%%-------------------------------------------------------------------- +%% @doc +%% Adds an event handler +%% +%% @spec add_handler() -> ok | {'EXIT', Reason} | term() +%% @end +%%-------------------------------------------------------------------- +add_handler() -> + gen_event:add_handler(?SERVER, ?MODULE, []). + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever a new event handler is added to an event manager, +%% this function is called to initialize the event handler. +%% +%% @spec init(Args) -> {ok, State} +%% @end +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives an event sent using +%% gen_event:notify/2 or gen_event:sync_notify/2, this function is +%% called for each installed event handler to handle the event. +%% +%% @spec handle_event(Event, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_event({error, _, {_, "Hintfile '~s' has bad CRC" ++ _, _}}, State) -> + {ok, State}; +handle_event({error, _, {_, "** Generic server" ++ _, _}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, not_ready]}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, {merge_locked, _, _}]}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to read lock data from ~s: ~p\n", [_, {invalid_data, <<>>}]}}, State) -> + {ok, State}; +handle_event({error, _, Event}, State) -> + {ok, State#state{ errors = [Event|State#state.errors] }}; +handle_event(_Event, State) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives a request sent using +%% gen_event:call/3,4, this function is called for the specified +%% event handler to handle the request. +%% +%% @spec handle_call(Request, State) -> +%% {ok, Reply, State} | +%% {swap_handler, Reply, Args1, State1, Mod2, Args2} | +%% {remove_handler, Reply} +%% @end +%%-------------------------------------------------------------------- +handle_call(get_errors, S) -> + {ok, S#state.errors, S#state{ errors = [] }}; +handle_call(_Request, State) -> + Reply = ok, + {ok, Reply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% This function is called for each installed event handler when +%% an event manager receives any other message than an event or a +%% synchronous request (or a system message). +%% +%% @spec handle_info(Info, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event handler is deleted from an event manager, this +%% function is called. It should be the opposite of Module:init/1 and +%% do any necessary cleaning up. +%% +%% @spec terminate(Reason, State) -> void() +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Convert process state when code is changed +%% +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== diff --git a/prototype/tango/test/pulse_util/lamport_clock.erl b/prototype/tango/test/pulse_util/lamport_clock.erl new file mode 100644 index 0000000..fab7244 --- /dev/null +++ b/prototype/tango/test/pulse_util/lamport_clock.erl @@ -0,0 +1,48 @@ + +-module(lamport_clock). + +-export([init/0, get/0, update/1, incr/0]). + +-define(KEY, ?MODULE). + +-ifdef(TEST). + +init() -> + case get(?KEY) of + undefined -> + %% {Ca, Cb, _} = now(), + %% FakeTOD = ((Ca * 1000000) + Cb) * 1000000, + FakeTOD = 0, + put(?KEY, FakeTOD + 1); + N when is_integer(N) -> + ok + end. + +get() -> + get(?KEY). + +update(Remote) -> + New = erlang:max(get(?KEY), Remote) + 1, + put(?KEY, New), + New. + +incr() -> + New = get(?KEY) + 1, + put(?KEY, New), + New. + +-else. % TEST + +init() -> + ok. + +get() -> + ok. + +update(_) -> + ok. + +incr() -> + ok. + +-endif. % TEST diff --git a/prototype/tango/test/tango_oid_test.erl b/prototype/tango/test/tango_oid_test.erl new file mode 100644 index 0000000..9ce16f5 --- /dev/null +++ b/prototype/tango/test/tango_oid_test.erl @@ -0,0 +1,75 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_oid_test). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +-define(D(X), io:format(user, "Dbg: ~s = ~p\n", [??X, X])). + +-ifdef(TEST). +-ifndef(PULSE). + +tango_oid_smoke_test() -> + ok = tango_test:run_test("/tmp", "tango_oid_smoke", 4096, 5*1024, 1, + fun tango_oid_smoke_test_int/3). + +tango_oid_smoke_test_int(PageSize, Seq, Proj) -> + {ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj), + + ok = tango_oid:stop(OID_Map), + ok. + +tango_oid_one_test() -> + ok = tango_test:run_test("/tmp", "tango_oid_one", 4096, 5*1024, 1, + fun tango_oid_one_test_int/3). + +tango_oid_one_test_int(PageSize, Seq, Proj) -> + {ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj), + + try + K1 = foo, + K2 = bar, + OID_Num1 = 1, + error = tango_oid:get(OID_Map, "does not exist"), + + {ok, OID_Num1} = tango_oid:new(OID_Map, K1), + {ok, OID_Num1} = tango_oid:get(OID_Map, K1), + already_exists = tango_oid:new(OID_Map, K1), + %% The V2 put should *not* have clobbered the previous value + {ok, OID_Num1} = tango_oid:get(OID_Map, K1), + error = tango_oid:get(OID_Map, "does not exist"), + + {ok, OID_Num2} = tango_oid:new(OID_Map, K2), + {ok, OID_Num2} = tango_oid:get(OID_Map, K2), + + ok + after + tango_oid:stop(OID_Map) + end. + +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/tango/test/tango_test.erl b/prototype/tango/test/tango_test.erl new file mode 100644 index 0000000..0ddf1e0 --- /dev/null +++ b/prototype/tango/test/tango_test.erl @@ -0,0 +1,315 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(tango_test). + +-compile(export_all). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +-define(SEQ, corfurl_sequencer). +-define(T, tango). + +-define(D(X), io:format(user, "Dbg: ~s =\n ~p\n", [??X, X])). + +-ifdef(TEST). +-ifndef(PULSE). + +pack_v1_test() -> + [begin + Packed = ?T:pack_v1(StreamList, Options, term_to_binary(Term), Size), + StreamList = ?T:unpack_v1(Packed, stream_list), + TermBin = ?T:unpack_v1(Packed, page), + Term = binary_to_term(TermBin) + end || StreamList <- [[], [1], [1,2,4]], + Options <- [[]], + Term <- [foo, {bar, baz, <<"yo">>}], + Size <- lists:seq(100, 5000, 500)]. + +run_test(RootDir, BaseDirName, PageSize, NumPages, NumFLUs, FUN) -> + {FLUs, Seq, P1, Del} = corfurl:simple_test_setup( + RootDir, BaseDirName, PageSize, NumPages, NumFLUs), + try + FUN(PageSize, Seq, P1) + after + ?SEQ:stop(Seq), + [ok = corfurl_flu:stop(FLU) || FLU <- FLUs], + Del() + end. + +smoke_test() -> + ok = run_test("/tmp", "projection", + 4096, 5*1024, 1, fun smoke_test_int/3). + +smoke_test_int(PageSize, Seq, P1) -> + ok = ?SEQ:set_tails(Seq, [{42,4242}, {43,4343}]), + {ok, _, [4242, 4343]} = ?SEQ:get_tails(Seq, 0, [42, 43]), + + LPN_Pgs = [{X, ?T:pad_bin(PageSize, term_to_binary({smoke, X}))} || + X <- lists:seq(1, 5)], + [begin + {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) + end || {LPN, Pg} <- LPN_Pgs], + [begin + {ok, Pg} = corfurl:read_page(P1, LPN) + end || {LPN, Pg} <- LPN_Pgs], + + ok. + +write_forward_test() -> + ok = run_test("/tmp", "write_forward", + 4096, 5*1024, 1, fun write_forward_test_int/3). + +write_forward_test_int(PageSize, _Seq, P1) -> + StreamNum = 0, + NumPages = 10, + Pages = [term_to_binary({smoke, X}) || X <- lists:seq(1, NumPages)], + BackPs0 = [{StreamNum, []}], + {P2, BackPs1} = write_stream_pages(P1, Pages, PageSize, BackPs0, StreamNum), + {_P3, _BackPs2} = write_stream_pages(P2, Pages, PageSize, BackPs1, StreamNum, 3), + + ok. + +write_stream_pages(Proj0, Pages, PageSize, InitialBackPs, StreamNum) -> + write_stream_pages(Proj0, Pages, PageSize, InitialBackPs, StreamNum, 0). + +write_stream_pages(Proj0, Pages, _PageSize, InitialBackPs, StreamNum, Junk) -> + WriteJunk = fun() -> JP0 = <<"blah">>, + {{ok, _}, _} = tango:append_page(Proj0, JP0, + [StreamNum]) + end, + F = fun(Page, {Proj1, BackPs}) -> + if Junk band 1 /= 0 -> WriteJunk(); + true -> ok end, + {{ok, LPN}, Proj2} = + tango:append_page(Proj1, Page, [StreamNum]), + if Junk band 1 /= 0 -> WriteJunk(); + true -> ok end, + {Proj2, tango:add_back_pointer(StreamNum, BackPs, LPN)} + end, + {_Px, _BackPs} = Res = lists:foldl(F, {Proj0, InitialBackPs}, Pages), + %% io:format(user, "BackPs ~w\n", [_BackPs]), + Res. + +scan_backward_test() -> + ok = run_test("/tmp", "scan_backward", + 4096, 5*1024, 1, fun scan_backward_test_int/3). + +scan_backward_test_int(PageSize, _Seq, P1) -> + StreamNum = 0, + NumPages = 10, + PageSeq = lists:seq(1, NumPages), + Pages = [term_to_binary({smoke, X}) || X <- PageSeq], + BackPs0 = [{StreamNum, []}], + {P2, BackPs1} = write_stream_pages(P1, Pages, PageSize, BackPs0, StreamNum), + LastLPN = hd(proplists:get_value(StreamNum, BackPs1)), + + LastLPN=LastLPN, + [begin + ShouldBe = lists:seq(1, BackwardStartLPN), + ShouldBePages = lists:zip(ShouldBe, lists:sublist(Pages, BackwardStartLPN)), + + %% If we scan backward, we should get a list of LPNs in + %% oldest -> newest (i.e. smallest LPN to largest LPN) order. + ShouldBe = tango:scan_backward(P2, StreamNum, BackwardStartLPN, + false), + StopAtLimit = NumPages div 2, + StopAtKicksInAt = StopAtLimit + 2, + {StopAtLPN, ShouldBeLPNS} = + if BackwardStartLPN < StopAtKicksInAt -> + {0, ShouldBe}; + true -> + {StopAtLimit, [LPN || LPN <- ShouldBe, LPN > StopAtLimit]} + end, + ShouldBeLPNS = + tango:scan_backward(P2, StreamNum, BackwardStartLPN, StopAtLPN, + false), + + %% If we scan backward, we should get a list of LPNs in + %% oldest -> newest (i.e. smallest LPN to largest LPN) order + %% together with the actual page data. + ShouldBePages = tango:scan_backward(P2, StreamNum, BackwardStartLPN, + true), + ok + end || BackwardStartLPN <- lists:seq(1, NumPages)], + + ok. + +tango_dt_register_test() -> + ok = run_test("/tmp", "tango_dt_register", + 4096, 5*1024, 1, fun tango_dt_register_int/3). + +tango_dt_register_int(PageSize, Seq, Proj) -> + {ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj), + + {ok, Reg1Num} = tango_oid:new(OID_Map, "register1"), + {ok, Reg1} = tango_dt_register:start_link(PageSize, Seq, Proj, + Reg1Num), + {ok, Reg2Num} = tango_oid:new(OID_Map, "register2"), + {ok, Reg2} = tango_dt_register:start_link(PageSize, Seq, Proj, + Reg2Num), + + NumVals = 8, + Vals = [lists:flatten(io_lib:format("version ~w", [X])) || + X <- lists:seq(1, NumVals)], + [tango_dt_register:set(Reg, Val) || Reg <- [Reg1, Reg2], Val <- Vals], + LastVal = lists:last(Vals), + {ok, LastVal} = tango_dt_register:get(Reg1), + {ok, LastVal} = tango_dt_register:get(Reg2), + + %% If we instantiate a new instance of an existing register, then + %% a single get should show the most recent modification. + {ok, Reg2b} = tango_dt_register:start_link(PageSize, Seq, Proj, + Reg2Num), + {ok, LastVal} = tango_dt_register:get(Reg2b), + %% If we update the "old" instance of a register, then the "new" + %% instance should also see the update. + NewVal = {"Heh", "a new value"}, + ok = tango_dt_register:set(Reg2, NewVal), + C1 = fun() -> {ok, NewVal} = tango_dt_register:get(Reg2), % sanity check + {ok, NewVal} = tango_dt_register:get(Reg2b), ok end, + ok = C1(), + + ok = tango_dt_register:checkpoint(Reg2), + ok = C1(), + {ok, Reg2c} = tango_dt_register:start_link(PageSize, Seq, Proj, + Reg2Num), + {ok, NewVal} = tango_dt_register:get(Reg2c), + + [ok = tango_dt_register:stop(X) || X <- [Reg1, Reg2, Reg2b, Reg2c]], + ok. + +tango_dt_map_test() -> + ok = run_test("/tmp", "tango_dt_map", + 4096, 5*1024, 1, fun tango_dt_map_int/3). + +tango_dt_map_int(PageSize, Seq, Proj) -> + {ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj), + + {ok, Reg1Num} = tango_oid:new(OID_Map, "map1"), + {ok, Reg1} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg1Num), + {ok, Reg2Num} = tango_oid:new(OID_Map, "map2"), + {ok, Reg2} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg2Num), + + NumVals = 8, + Vals = [lists:flatten(io_lib:format("version ~w", [X])) || + X <- lists:seq(1, NumVals)], + Keys = ["key1", "key2"], + [tango_dt_map:set(Reg, Key, Val) || Reg <- [Reg1, Reg2], + Key <- Keys, Val <- Vals], + LastVal = lists:last(Vals), + C1 = fun(R, LV) -> [{ok, LV} = tango_dt_map:get(R, Key) || Key <- Keys], + ok end, + ok = C1(Reg1, LastVal), + ok = C1(Reg2, LastVal), + + %% If we instantiate a new instance of an existing map, then + %% a single get should show the most recent modification. + {ok, Reg2b} = tango_dt_map:start_link(PageSize, Seq, Proj, Reg2Num), + [{ok, LastVal} = tango_dt_map:get(Reg2b, Key) || Key <- Keys], + %% If we update the "old" instance of a map, then the "new" + %% instance should also see the update. + NewVal = {"Heh", "a new value"}, + [ok = tango_dt_map:set(Reg2, Key, NewVal) || Key <- Keys], + [ok = C1(R, NewVal) || R <- [Reg2, Reg2b]], + [ok = C1(R, LastVal) || R <- [Reg1]], + + [ok = tango_dt_map:checkpoint(R) || R <- [Reg1, Reg2, Reg2b]], + NewVal2 = "after the checkpoint....", + [ok = tango_dt_map:set(Reg2, Key, NewVal2) || Key <- Keys], + [ok = C1(R, NewVal2) || R <- [Reg2, Reg2b]], + [ok = C1(R, LastVal) || R <- [Reg1]], + + ok. + +tango_dt_queue_test() -> + ok = run_test("/tmp", "tango_dt_queue", + 4096, 5*1024, 1, fun tango_dt_queue_int/3). + +tango_dt_queue_int(PageSize, Seq, Proj) -> + MOD = tango_dt_queue, + {ok, OID_Map} = tango_oid:start_link(PageSize, Seq, Proj), + + {ok, Q1Num} = tango_oid:new(OID_Map, "queue1"), + {ok, Q1} = MOD:start_link(PageSize, Seq, Proj, Q1Num), + + {ok, true} = MOD:is_empty(Q1), + {ok, 0} = MOD:length(Q1), + + Num1 = 4, + Seq1 = lists:seq(1, Num1), + RevSeq1 = lists:reverse(Seq1), + [ok = MOD:in(Q1, X) || X <- Seq1], + {ok, Num1} = MOD:length(Q1), + {ok, {value, 1}} = MOD:peek(Q1), + {ok, Seq1} = MOD:to_list(Q1), + ok = MOD:reverse(Q1), + {ok, RevSeq1} = MOD:to_list(Q1), + ok = MOD:reverse(Q1), + + [{ok, {value, X}} = MOD:out(Q1) || X <- lists:seq(1, Num1)], + {ok, empty} = MOD:out(Q1), + {ok, []} = MOD:to_list(Q1), + + [ok = MOD:in(Q1, X) || X <- Seq1], + {ok, false} = MOD:member(Q1, does_not_exist), + {ok, true} = MOD:member(Q1, Num1), + ok = MOD:filter(Q1, fun(X) when X == Num1 -> false; + (_) -> true + end), + Num1Minus1 = Num1 - 1, + C1 = fun(Q, Expected) -> {ok, false} = MOD:member(Q, Num1), + {ok, true} = MOD:member(Q, Num1 - 1), + {ok, Expected} = MOD:length(Q), ok end, + ok = C1(Q1, Num1Minus1), + + {ok, Q2} = MOD:start_link(PageSize, Seq, Proj, Q1Num), + ok = C1(Q2, Num1Minus1), + ok = MOD:in(Q2, 88), + ok = C1(Q2, Num1), + ok = C1(Q1, Num1), + +?D(before_CPs_start), + [ok = MOD:checkpoint(Q1) || _ <- lists:seq(1, 4)], +?D(after_CPs_end), + [ok = C1(X, Num1) || X <- [Q1, Q2]], + {ok, Q3} = MOD:start_link(PageSize, Seq, Proj, Q1Num), + [ok = C1(X, Num1) || X <- [Q1, Q2, Q3]], +?D(before_Q4_start), + {ok, Q4} = MOD:start_link(PageSize, Seq, Proj, Q1Num), +?D({after_Q4_start, Q4}), + ok = MOD:in(Q4, 89), +?D(after_Q4_in), + Num1Plus1 = Num1 + 1, + [ok = C1(X, Num1Plus1) || X <- [Q1, Q2, Q3, Q4]], + + [ok = MOD:stop(X) || X <- [Q1, Q2, Q3, Q4]], + ok. + +-endif. % not PULSE +-endif. % TEST