From ab31a27823035b0ddcf60a1db8e76f0fe769db01 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 2 Mar 2015 17:25:12 +0900 Subject: [PATCH 01/70] Add APL v2 LICENSE file --- LICENSE | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e454a52 --- /dev/null +++ b/LICENSE @@ -0,0 +1,178 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + From 2bf76b57274e396cd60df2ecf527b102b973dc98 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 18 Feb 2015 14:42:27 +0900 Subject: [PATCH 02/70] Initial documentation import --- .gitignore | 3 +- doc/chain-self-management-sketch.Diagram1.dia | Bin 0 -> 6836 bytes doc/chain-self-management-sketch.Diagram1.pdf | Bin 0 -> 45367 bytes doc/chain-self-management-sketch.org | 672 ++++++++++++++++++ 4 files changed, 673 insertions(+), 2 deletions(-) create mode 100644 doc/chain-self-management-sketch.Diagram1.dia create mode 100644 doc/chain-self-management-sketch.Diagram1.pdf create mode 100644 doc/chain-self-management-sketch.org diff --git a/.gitignore b/.gitignore index 8e46d5a..2693865 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,9 @@ .eunit deps *.o -*.beam +ebin/*.beam *.plt erl_crash.dump -ebin rel/example_project .concrete/DEV_MODE .rebar diff --git a/doc/chain-self-management-sketch.Diagram1.dia b/doc/chain-self-management-sketch.Diagram1.dia new file mode 100644 index 0000000000000000000000000000000000000000..6e42b7e53e51f24b12245bd92d9f45ed2f3c9694 GIT binary patch literal 6836 zcmV;l8cXFLiwFP!000021MOW~bK6#ue%G(SDpeVK20lLbi#bZxPBJs8%I><7Ol?(O zR1gJIOelf{LRR#$zkRzwQsNC1MBu=Nd{pHqqz=(MWaIm~`}F1S-~M?$i9V+Je4I_s zP8p%6Q92!Fm*eT^?DXHi{^ONB{q~!azh91%H}G$iC)W|YVmil1XQx+1F?;j+_3iB~ zncU5jBFo8Sd_(5x>;Fq8ljJoldVTuMNffPL;4&!^d~W${QWW|4;-*NWX>y&Oon9ov zKSp_WGrc@rtXi%cW|J(BJ|>g1)2}|1Kc}ykmwCN$odegNC8P8rPm@2YFHPacxnnLp zOY`l^UeB`m7?u=wvz;Xs*TMg;ty!*`!=mZvo3Gx=uNH4<`NYGu_PxdidL$Croi<&6~; zSGgTu7FXwg9{Kjc|&bjT&oG<-Hb2O`6Ft!xu)VuSIcFuAN$m|*VX-C7Ef*QcPV0$+@<;Q^6ws)J6e9h zKsEVIdKx9eV*GLSXzeq}E`Cpk#d5R%m}IxZt0XU?SJAud&(jCH1gw(r<=N@~(9KV4 zdw*~>z{Z`AYnDCelpx$v8H?@DU>8BE1YBHfUq6Q}B-7C(y_dmI1}AR78Ie}Rim>AZ zrjzvge3<3aUBjWhU6n#gIN9CV3Q8r1;~L1iO2?zCqFNX~?%EewewpTv?Sv9e zH-C=&Zsu3n?fD*Sbsuk)dViFZS#o)~$-+DPQO8}mNdMgV4z}@UYfaTBQ5vHg>$dFH zU(zDV(;~l%hVWSx02yZytuSQ|fPIzakWGty(((D|LvlTy+`*enrt{NiUfdxn0iY~i z`mc2IF)hZ!WF0)8{tk9DdhZuyw-iq{6S@g?dx60aA()9J;gYLn(l%M}fcHdcGeKZ= zj;7b?{;xEh-dyi74-Od!`#7$Or(ZviQWZ=60$6{U_6D94Q&a6t@r7F0ztEsm5t%%;=xbb49t<%v7ovq%ul9V{Z0$s_p&)MYr^ zek1!#%%@xw+@iEQnkm2{I#2REyWN)da5IR;*FK-5)6wA#spdi+{m^R4Kt%(6hTCqw z!(gkwL|1U5F2TOYO6xIt_$!z+JG!^$wQTuXW-DL30+_9LR0m5Qw|IGDiJO`Y|9HY!IW1Qv21wxc5V@{hQpo zPT^ZL0Zjs$G(!`y2?(?!JTUFJD<3s!_BlKX`fa9x~rv4G!Ta+I*bxkAZGc7-cD z+dQxoVNi>JD*;y;;)-m5E6O*p{EFulYxQkQU~!Ez^c)dZ^B8V>X|?v5r?-*>P6X9@ zuqIh^$3Fj~o=n5C^lbTAE3@0^Dx;H+SZ|Qhe zXfGdvH7<7%g5a?*mZBJ8e1>j85?@HNC}|)Sangwxl#&Xjeda{8vdLOW5u8Y6ACe*@ z_m~u+3FiPC_el|qg`|jQo)oc>=Ww6n@NGzP@J*e<5zU;#Q)@w8pw13+{upb4%yG>(Su@#Pv2~6U9yv}Cqc%8B29A5?9QX4^ z5onQR(ZzTaD6TV#W0Eix^7uNXINn0E$x0<-W{9Jf`HeQ2r6dcI2GSAfq=>m8#&WO4 zT&--fW^TKZ>lA0a=C&InA1IC;8!HSH*B^>Yrhjauwx31ICzs`3FHbN+J(*`GZ*PjL zEMHsx`YgIm10Qz9hny2Gj17F)HXrUUXJ(CSycBvsOve(OFpos2(8pVdu>)<1hsSv~VZ5%YK+U8F_n*j*)9zPunca)RxD?w66W zl=*Cx2fa^8V#z^tXrw`twbH3XE;zSS&eVp)*i-^w+qq3mh^^U3tpu zwG=~I@WNb~cHq=N3Bo~7Vh(`h)d_52dvz#K5=K?U4c9saN+LIIZXQnZo*xA+nC>Jg z-8mq_6AW$60TAMoNm~469u@iBe*|9h&TC*nYi2Nr#6$Ov?s%xtIwoR4iO=|^ z!Zh7d%s}(9(_Jh$;a;7(#yhLSXo~QS_N3(g8$VWPc<%FRTBIntm_a*dqVYyN`g`@CdxFl(Ery5pZzeIjA!$`b2A#(`)nrTyT&Uk}OKX z7Qn_ShLfHIchYO3WE-2TcS=SLX9@!pIJVFlzlkDLXlk@!+&QV_u^op)qGWFpC4a#l z27%AK7lwl+tQ^jOQbqv{I=df0&`#82)6COhf_#+x_7Se3t( zrMui`kuXlqu-L;hbv#%RMDxEQQfv0Z`7_C60REn6c5a0%ej6kK>CaACf|<)9nL zc^g%4WnQcAwEwc*u#K4@7K|5#Q$2+$&+d%y>IGKp@W8@@4j^nqlpCqHN)X3<`hPWj zyhE=G{$Ie+7I0KH7&oyEqq18RZC*P}i4d_S+EO9>qNsM-#)>#=CBP^pQVAVnCOBup zD~#^n;vVQTL}OU^NlPhi=Mg|PxbvN*A)*!#^_e!nHe0qOZ{qgK*5}GspSN?_Le-8a z!|Yy-Qjo$`Ie}6QG+3m{NbK9elrWwPcbaXk<{3LRl5xkaW=t{7{GPE>VW^f`3aX4! z;f($HpRs4NaKzq4${3(g%o*WMhC}v_rsVSGWv8nz&Nqcw0+1nM2EU~RkH8UtUx#v! z53JM<+*a(eGWF3N{b7WVZ7_`QN9D>&)3%0PYni z#;~^BBC0qrAyMw}s4|1k<-880>~3)HGHEToL1Vc)7&&l+<5XQ->XZmb@g=#?`k2EV zUU4+kSyqZ^0VNhy;GUaaI86QIF}FlY#ReM80OLxroO_?Ey?>MY>j=5hahbYz>^&Lm zkAN!oM;)rcQO6-j@{20A1ym^ua(#Cw093Ky(g=o=FAbs!@V7;|x)YaX&n6F`4<`Xq z0;DvB6y4wCbf&DCV(GoF%q6C zl&m>39n6H^-DLGZczBmugn{tEMs6JTcMZb++tcK2hf#mgxDbR1BEX`HCGqNb6N5uL zKnk^%mqCAn<+JqYMo0BR{(E<~$Kxih#uCOAzGL>7^KQ*zonZX2l?Y+Sj(-kV(@U(` zN}h9dphm8sM){8L{ff5yZXUB}J`d24GBJ%Q&;0-xdvO$Rt7II5twIh)f_<$bd`%nI2Kc08D)Z zCeucoDaJk4nyFM za6SOL$!I*Nql5-v3c&QJItF0sBQP~i;Kw{fY@V+@H;^pB2uvn3zyyATDYZ2kFt~C7 zm-cC60Hz)SlcX&G6I0lORe44xQJP5{1K=RDw4gw02!ID{s-chpocah(jdQvs!+!V1 zCooZrc$ZE5Y3a?mGVRiU-Un|kAk$%O45-vcRBD_7E*&x0D#_nX|_0ta|T=0v(m3)A^aVHsfIoVXzC+0HBSH57ROt79_*&eqc|K%BmkKl zVOW`>2@57uK&HplF(A|Dj7$f5nn>Nkh#!W{I>75PM|ZraXFY+{q*+-=E~K*qkgT}2 zDB1F3NBz|7W?r?){YyEMv4Gn|Nb}gFN+(p^r8v}*)F#yO7#pp4XP0Fot=vE)Qc$-EjKRh!(u6wH5o zmrCk`OSvOlTK$xz5|8!7L2QGMNprqfDyR%d^aUW1tmwoH~@ujp)Eg*}u>IOR?oE1E<5#V5bChXlkdpCU(m90Oc-wzN*H_ z%Nl5T6*^E}%@hs1>#U45QO0OB(3TjcTp1rRe&ZvRo7}&YN?s@E0UOY`0bHu;EY#%i z^$f9&$=#9m0fL^tfnt2mQGcK0$z+mFvQeH~?}L)&?W2*pO?fQn^(+YF&{7V?iRzpY zp>V#sJi2>w&Dko=w^wJf3fe)6NyV9Em_aSL$H^*!=uIF{VY14alr+iDOU0|+lcC^#ErN!tZhbqCGYqmgU9OqI_yvZydZ))q7rqZ3aQK7}vuY1aSN^Xh_$gkMLAsh5p z=AHys;!7IxxHVORJ#BET@a>3vKTfVQFxvLGAdS=a4LI*Oa)pBrZJf{QB2C;%k^)KC z_s=l|8biQv7d{=4RyJA372#;g2+OIELMf+}^yrF!Hzz4`j55QFah8Q4kI#L`;}5fJ zc=clCCcKQko)m*<_2lp-&w+4Xo@99n6n3&&7QH@;?*Dr7;%1uuIfGj>y?mV~c^VCq zdHOPV99>L~#4{K{iQ_ypXKKaisL7Izi%A?|4hck9BDwU*l8(@oBmt5JToF^A!Gptk z^t8r$EZNE?>#!s^-;%Gfq)(ExgtIk{6x0TieC8ziR?;L|a9{-gf3f!e!@zT0@towu zn80)G^W3AyQjP1AlmdLFx=gn9T0M#LycTk-8fb|rzU6dn?e$oyh3(a0IZKSPmNQ9# z;|yoQhGVI4EcKak=Ktn+&G+Zi^mdkKvy&o=Qb3lwY?=nj^i7$oS;i_HMtust)GZ{g zBg*4^O26F(Fd=*)M*&}A#imo(?n3$H%jVmwo-DXy}7EgB1?nWwkNq$juI;wnm_i}7fb=240f+6YkbBFzJxc0i}9JnON6 zPrHq?HZL%xgjp?OPB59zbL}~`nu8>Uk}$WL3B@p1kssaduqL0jvB_GDiXCO`L!4E3 zjk7u~<$au$yKor!+~cf2M#*(Fo<={Xvt*1mTOZdBzBS!mEmr|7P$xq0Ed<{_f4}3GPodv)hu?mn1n9(V222w7-3J;`>;{7N{d=vegO#gUsUt#ewDidOs zPcGADzPxC4=hjt~)&>;oBZ@UHbrg~a$^l#8w^72UFSwEtQ5PV|cdr^cl^ zf@0fk6H7)k)y^k4tt4UREbRu;5G@6dr6Npd)(ijd;>~Wk;MrO9U+LPb@H?B#v-Rsd z_>1IbHeNJxTf7_}Z%ABLT&zF!@Zx+`qOoy_`)7mW=^s9;ZkGU`eS}Zj27HzvrU})` zdWO#>Tnek~Kw4t8g~x_arFwy$!#(IZp1;CuW6;f4*@rc#yUcE31PJ#kegMKg0%7C&4K4^%*!0W# zOhLVu!laV})IDceEFJc^7kxPIoZaPkJ4Y{uFC$Kwiryvjs|>!D;nfs$eKbzzarEPK zNTRosNmQ<#M|nC=^N;BzIeDLF9{~g}qnqhvnn!PE$?z(Tei{$c=^Q>HPG}s>(=BsMW`0rnSIE_Ax;j{YyH_>HUB;(1P1ZeaPjeBo}0E?d#78j2v$z7V45APo>a$Mf+8YHV2mbUdsY+QMit5lhcw#TZda#exVkVAz`ql$S8qhXj(hjD~!X zj#6!RHPIWUvDDEIAHsQt4^z*u=CekO!txrgP4X5(Bync2l4p7WNdb?#!J~az7!Fwb z2uO_!0I-rMwto8tVDg%Z4N+htF(SZ5vPAU5M&iLn>dHpiRm5N;^%0U9=lugll+8Yz zU%!n3CSgkvKENTU07|9*)(cb;0hPK!rCnVNsMJSPk}aT;1&lDndWI!hN?|1|VRLhM z;Q9eeGQd)Iu(YF$0hjuSORc2uW6;@a5w{?k#eyh~X|dFSIMEA8dKY5cU1siY>0v;m zJ|a>ZvtTSLg4cdpoLIp)Vv-psNsP7zQK=VErSBLGu+&wl_O3DpSn4AznHFG4ltCWt z7ng)(q6`^u1SY|(MWLo2O(j?`m^wtJ9eoVY)JJG)KG{GL@1Z47H-#3{BDRDR4KyNC zUxG{Dv2YaE88YpvV*sW;0#ozJ0^nxFUOU?;ws7RQ#wJ_1tUJ&K>!C@bmRmj8gNPDU|x$tWlwM^IC7og(jI*b(9}n0VlBj& z40e#=enU(~utnFbGQ_0OkprQkC&&~Wxeh~2+v*sAsgJlF(Z>KyeS{|2!t4pkl#7+$Gr4;ycqDiKpdTl4p&3dSb47QQFf>EyBOoua^hiC-yu2`uP7Wpp)-Z0sbLg!7Ck&LRc&7FEOp{GIh4yjk69~$wi&-S_$2a-A(V)Z*nV_>zRUFUxlu_yBDW& zy5D&kb9`(AUhkhN#tr%tIh!PXGg36JZS8S}l0|Y>>@wPH()aJ)HLGm8-g{ZkmI=G| zBYWT6U{~gho)4SvtT)V8fpTqH;E5c9niT4R@HgiSKRx?Qy;FZFutZ zd(-=jv(7_w_l?v}$$RzB@1%tIM-(RM5TZUuaP+^&U%tM)49J~J{m3(=SiiA-lhAoS zY`^PXCVcOFT_B7)z=X;Q*5dQ;jJ458$Jul{v60fW`oe!mcZyhJ1(GiW)l7z?0-JAnLL&)uwLy!23I_1*u$YzI-^9}1+zxtv%SlY*q^nU|Pt2`&W z?zg&M5^MgT`FO1-m1q%`{0(7g5YZ}xa<&`Cq$P2i za(*GvYdEpC*{kNhwM)CI3rflG3I41@Mn8S8j&WHQRO#>+j|)0p>D$wwYRq@qD$Cj_asAd|HKrho>cW6UY-K>@+@zU@96;#^$i z@&_k2v0_amw4YLm>6FD(mBl4--S&gm+9GXy`J=&uH*BirzWV_ghp@klSbOYbcfzkU)-}+49X$K1?e#@{)`M9 zoOz1IzPI6@Vc+e5&EFV#Ie3FcY|JyNSAEhfL0WL3pbd&8L|o^uGZohu{-e^vO~ZAP zL@MSpQMGS@Pv1{RYWWLfJk;~FJ?EQ3PfL0DgPDr_v zMcz>8i4}m&&MB~L{qj7$Zpu=)bj;wHtSonAQR4$`)lE%y=G>jxgtDI($Df%i|Fm%s z{dk1~p%>I@n0*1Js(Dz7LB|b8sImBgH7ReESS2pdWY(CKmkNvgoI6aGfeR;gvli)E zLbQh)Y#sktN=R3i3Kj>O;^Q+^EVeW=k9jJbDG z$udv#HlII*Bi>9*{$x);!6Y#2Y3d4Qf~U|5fe^T8_ljm5a0j(qOde!%%LQ($CqiEc zrqhW0x1eQ&cO0X)MjiTW?o#VRuEvZ;v*lgS81VB26|PZ!o8Vm_n}vO`?QO|xx!6sy zh=Ir?<5QX1-HmAJ57cu%rDs7TzF{9;K{K^L1W^z^!b+HPllDIcZUbjDoVbrCMy-q` zo+XPu>mZOIiaC58CCzm~ZKB#788cQZln$5M7OM5Y$Eg*ccI%`a6Cy-#SBg1GU-A^6 zwLaO~99iW0eiPrIjVKP8nsK6Aiq^0Ds*IoCN*q^Hg(C^(Q7%}1j9zWEbJFfhc#zQbJs=cfx}dmL8s86chxOYj(tIKG&O2^7d)H_ z*qG@4F9H{QP42$xSD@iH6@}(SW%j&ibA~2YS)5mBJ#F?s!PT(>b2~PQZ}R$E2fznS zrih^_F;s*(ZVD`DN9fU{^1Tvh6FoJ#k` zc21OgyyCP~cQWINO6JBudBpG)eX zMX>GJHGeeGWRXcz;Ad$bI4<|A$lZo2_bx>6%pSMBW2Q+Q0yMUn4ex42OrY>}LW142 z$YCFk`nYcl@K;MNMcd$>HoV59`0$;bWE9n2Liw7d4a*ykz@W%URllk}?cJXDCKYv} z@kbk3KE-{-a6+vXH_r>wBt)t9f>P4$+peFg^duPdhX>`8JKcnbV17Oyq`^(eIBZ>Gw>@!F4`GpHqQ!?Q1Tt8H0=jxW zy{Qq*KoU$Fi&wvT)dg1ycZMK(@}0i6gx$F0wMC`fV{NUL(B&??7o9)xZMTN>^wKrstBQK5ZVV6Z{-EIvRCD*%_HrWdEzZa3FHBifd z9hAyJh|-UdI4F|fTn<#>!?kjD7}*R zwFHn0Zj*;W>irgdsT$IeBuMhB1T97YLbdUndlp&a7fkbS5VuKdn1b+gz|^NmOIQ%m(r5W)`icx@b;MC3Fkk2rQ4 zdCfdE3EO3rS1izh4b`T5dr@Ad1;%WJ>5aVoNCF` z!J&2WpMnDId1G{rTW-Q(hFEwEN=zo66fSF8ic63JmWfa;gj{MkjVZ^xR00$KT&CA_ zY*a<|!NA0_w}J$PD}Z>wmQN7R!lvv-%n4{cRYzJ4loeM-{1GhHhN={zVu&61p3q(4 zD6Foj_C2EC_|F+=sn95ND(aEH^F*M>AfyUem`-XOBu+@72yaNdk=*2PTg2$Zms4~_ z>?vYC8G@P17nm+7{1Mlu)U7B+1SV|DA6S7-hx4k(Rt7^C<@4ygM!ryBX;h~Cp1AqK zsN-g>`q?KK0-e0$QP%oo1SE0E2~otbk!eQ+AEHNt%l$}3n* z`oYRjO$}T9iYp3+j-#|{F$S^&1K$L1wqQ*3(3ql#7;SmVlvgnQiXBO4w`eNFg)6zh zkuuQ3dLrMQ7seI$-yvP90n$~dVB%;>js`Npuzp9=Ycq7FaHW9E^wE0o62UqP#HXl} zj>NgpX>=F&Km1m6B)aL$(`5=1p3PS3rV&@tpce>A~gOOmdlz$`@tg)sJQxA6>+qljD7?lx|8IMes(&Tab zZ$B>zKv)rhXbZ~9uttF}jl)4K&0Ojebd}3tZL+?~QkvTDVunT)!OdpoUekF!^ZR1< zWm0W;N4sq3pz`rXwnc(j#qwD))#!GL{J=%(k(#(!Tq*O+D7D_|&RB|k z3tB=bd1s7GQJWzpomE8Ll;rla22-R#$Fy1gIq-n!s*!GIH>?N4fTId=YZ$v=1~5O7 z(~A7wuN2g1Jzrv~Yh7_$@HRL=vwI3KK_?;HsH&U`=^O?c146gq=X7eJAxd+=e0P^^V~<{lXr*MC$BpSCJcu@CBfsHq9uXIK5R!yy{{QZ3Kn_vnn)RmIbFpYd$p6sQxd(y!@t|R z!hw;gYs|P63e1(>L#`_T)8N?Hn4p?s7mG-<_-=;X9;Bqf9+^DhY_VivGD6q0 zvswttNritImQ8>+{5W#8DuhGb%(QQrfJ8RJG)zESa!me7<>MDP%4pBCf*!d=3gk)B6?0RjEStKHDdDA}jQ%VzY#8@VB z&=GR_nCW1d#encIFJKb*i4Mk)bTD8?^(3`D0j&?rO6?V&Jx+wT#7{OGn3I?wJUSCs zvIgUyTMDmGF91ZV_zQu6rZRhMRnI?S#5U(1dLW#c8SO#RD;8iC zG3%o8mNgr9zX_HB53^<#Tc9MALVbRR$dah`8+nU8h3nMtUqx_L!Hi0hirlSCUma73 z(tmz?Ct0ldAvb=v^&|N(Kr?6k(l7AV#Gb5O#-{(OON?)LFxI?!CvQ!3M&e4gE^`oj zS^T(dNvx3NP)9%hzJ4Pi_KLk;(i?3NmgXod{?)!b$MFo~`&yeqN`=u?&~A{tW_3q2 zo!XYk4^Z@Xw0Jw)opd`QlX6kCtw2Z(h^|JDUdFqE-9U6~FAMkjVAt?+VyR3qpQ0a| zHaWu*t0*4wuX`4@hWH4X+8jl_rEH1Fjnxjn+~$&_lPb!Io|YhCnZ`W_fAcSk>o=`P zt<4F^ZEM0uM3@^A0gKo2MrY$8b^Vz;s6Yfhk7(j5$&WW*@2opKU)1zutgRXBp*R|? zFGd3{OD@Az0|a)L;k9Vp5LMSPQX{ujQ^L%>C9eqGFYc+)%4Ix-)uG$b^co6q z-8e&XFeXI{b(07CUq#EydM*Quh1RI2ae~Q_&KcxX_^npdKlL_m7I3*v!v;WzA^Ntk zUH3|xD@{;qZ;j{QcEZg{23%^mn&4ETfHL(@XwTvWVred`B3J1-kLoKDkEy{vJ zWWjLE9Z=BAe=iELL~w{dk>`sS(N9V8LLx*+OvlbVC6*o~La^*69ErxNRERu zN*2?*_OyLzuC+j~Jy0zI2eTJ6=sj0WRmRfv#4Id5V`CCAiHh1^SF+orql$o*{9>k% zNx_nb_uk(nD%M&`;iQI>`V_zT3}>ej1Tk{y*s#3aPfjZshiRa!m=jJ zO2AWMjO^DV8cs7vd*%?P(b==N($V8f5)B%ySu|f|ID}m1IY*oAGnJZhe%4YnwF17j zd?MzWkp_X{R~{7yw(Bz(_1J~^L;)rIyriZH_v8Xb)xrg~h8-^G3SF{+6X3>f{k= znR0n?a_sLglI!OH8vl^Ig;unnU+(oG_UexwQ)Me-S2<882Rgs1quVgdt*C@Rq-f3Z zMfr7qg%>IkQT5HHH6Ta?;km3}kGOWBR<=S*KJC<*&##25?!h4Ae)!5jZ4qaj<3Nw>lah+RF&QZnQMRf0uM4P!Hq;5+OKfllO$1PYDrhs zrds(~{F-vM@(HEF{(`tMM(){>?h%HB?5UkyDP>peUNx0F8WyI(fo^xo6|idCwHR zBO0c|Y#-}!=)**RLQ+j0=wQiKf&q_Eb;9-3*WXOwloL3XzlRI<5U#^)3C$YF{*o@sL!4K;Wup^^Q1KNbzhnZZ^ zWZvwJ%`2WOrI(%0=Xxw#blayxeB8a{RryUS9u#F9N>~v>u|RUlR?huAY2S%olyjj- z#QN1ZT?0b`%CAJ4E23Fmc@K}1m+&IB?j|{+#;uaxVz&-+qoBJxi{DFa?013%x}Ugd zqVRO|yEa369Mk=3)NuKLSNg!@P4n3}TUTdccSM_8ZZ|AVpyv&a3UNVLUL1PzuMNHF z0vrSx6gIK@#f5;U1`o319D0GgV5_JPM86KrC6NWG(aIF!zq}u>hiqnS8)Z^9#ZFf> z?0r9?8#KQgev_23Vpt{^HVO&V-4|we=o0uH8k5s|Vn;eOTxq3Z46QE<#nd~uBdTud zclg(MpFrV43!&WliZ4z5PHBZANvmMv@AMZ>PR{x#8SQH#sO8;gZa3Bbh72*A)G|2t zSLPnSb{5LOW!bhc428CB`Q!$RxzVLuEhPtWLifGDm7JT`^3PH#5!U0*fagNVtjft9 zN9gIXng-W#nKAIt$S&9i{vj6T_L>aUX};7G3pj+u7>U%E|5(88?S8EDt= z0eViE%y%k5Alm}DtYcW(X_g%@%jKNB);xm0FVyswQEPC~Bm!U5>SBF!2@m}a7ZG&$ zqjas%koY^%7{oaWVo4T{3qE)G?BaFrWHdpZ|MOB?r%P?nl>BAp1E?srGMuM zl+;~z!X_CUM|2<2aVeelBo#KMgsa(g(A>?!HoGd7r_^0bMP2ZsQxTb=m}U$Gy%V28 z>o%b-RZ`!%XQ6^z66&hlQLna?4q34>iEocooWMTjOc#Z;@E_^7I0rw@`2F7QyFG+J z_S+SK3o^(lNTj4k;)mC_;hME|v3xb#Po!H*c{)r zW!sP;I{B;Dzt_c~c8`bH72hE>T!b;QtZ%sZ;Gz!40LHT!H%0o#VCD9U9Ys-7xe8xz z-+@mZ)`MV7Y>fY%tNCbsrg%Q{I-fNw3oFBC?`NCwzomba-R(>Obn*sffBzj!Y@7g0 z|4IcZnmF1zI~bWb0$Be=5VEy#`sjBAe5Q{+az!#G#uf&Gwr&6|`VRs-GXpIkvmoB&!MO<_?0owA9W6F>(*CuD17>!4(3 zU}OUL946$*0ATy8h?n;xAN8U8FIkKc!0}0ip%Y^QFnlWhutbaj!2ZwB|0rc(_@Mox z_1|eOhR?ZwaL9hll1|_=*JbmO=mO9Qo48mQnJ9`0{?93}tYlYZr6r7z$xgP;DHLJ= z*$s{dG+>5H934y{Ou|aT%&5G*8M z1a;t^O;b}au?Jz0-!Gj!X=$b6Lk)Llw7$meD_^_i$sOn4zsC`?36Vko!HCSUa{G{D z?t&yLH4I`izJrb4BvCR3GcaQ0LFZQzZx7U1-IeT7xBpB{A>YZv(U`#QV9t(gmtRXK z+boC#%hOIJ+b}Eef;8z=mPiNMz5g;h`;aLOM30uHH?~inXBEaMf`)w-b_D_il{moB z;eZhF$YGuh04u) zECNiXVV!0kcUpyada5zMXa3l|v`*lo^vW~h@vL%5elnRY5Sgv9Zgo;g*NFjiN2-~D z-_Vt$0@P>?!N0p%DR&M4x&jAkT+6&B2)5FVAJ~GUfQ>jgv2p~?`9@29!1kQuZ#&u@ z%yFOg>+D&Ts~D5dk3<5U$wXwgxTIK0cx(&~A!X9|^&aVcyNAPK{Ad5y%$y#~%b)`l zg@Z)sz%cXnAZ|bPSXyJ4!0&UL-?3OJAhCV2zM+{r>j8NOamcqwK#th*011(RlG^Zj zfOsEkAu+&=baj|l#nm{*FnYZ0KM}%ehudsQ3lJ;q-ND5eJCNmn#V%M6wCV*xiG_5^ zg<X-r4UBp}a@20ADd z7K;?m|2RcnibWMro+l*Fb3@{U(*~sz@RSQV1z?7F1@;a^P6e4YfL{b^@?EOs!h;g) z`?VqLhMozD*|V}K*@D4~(AgTMT+pHHow)&Q`@pMANO4Wkqs^hvBl<4+R{YWeqR`9VtK?TutC}ae z+1SSuBq5TXpJW(nm~I$m7_h}Ms6f^c?-$>UB3H(LSUW>jM1&(aTjV~|(kRo&xoomb zk1xhn_&l3%ICw*N(0K!GLS~}1`(+nz7iYI&R1u<&UjKnf5xXcdA+kL(=9+o0b#i-h zg|)^)_q~HM>5_R_3cJO+wZ4z`OaYelBLp&fOY_3>x(%`o3O0>M%Ht}gsR}d; zxQ5t`n(V_bdAGI5Ys0c6;w7Sf$|x=VUQ^a@n{1qf`?hCJ&RoILKAd0sM?<4hbUv4( zpL@isz&+!U8kQ$QJt7NZ6+`dq=~pG1ECpWyoZA=w~ZPm zidXl3RF%=JyL6v=bEWIlu41&ywol$6+z~&MKQln`gt&!tBHS-JxqN94^qxaLKGw_giHpXtXX&-&nHo_FF-|OEII$#QA7>I)H#DV2El#bbIsV@8-15AHmyR&Y)c0?mJBLT%c7=*a!o}?}gW%o0yQX$(XTQJ*a zv`pH`4JXSEKX%K0O;*S1^=jBt5#SQyuOq#Oq4t&SN>|_LwYu(tEhMyuJjd@3QSV1x z>O7dd#DS#*)(1`trwr(X8A()2pi69n$O-(|aIV+8^4lp(j7=<{WGb638@C8-d}y2% zBr^!FrPg8Mq3ERRXD*-@Y8$nfJ5xc;=bw&~{^F=%+MXKyCVeX&Nt!_xpaI)pS-)l` zW|A|iFowK$vy;AK^9%py_0MM?7E=~iXR|Jsx8Rpvma}gNRYCLB+AFQHt~$MKo3~xZ zG1dTU@uiDjqQ9a{aMx|Z*M2MHD%5uAn>U>A4GFdiDr|PrOlTok4O*dFovxZTR9UX1 z5bI9g8vfb}-;yKy6Mwv};jQ+#I(8R$+F5j0^z6Ff0_FN=^|0+)J?;4Nxn1p~OVvQjULVmS#yF;tv9!KZvc^(uuH>lj$PXtbeXsM#v-vb@d3a@^d)@l` zEN~R)AS5pV8eYfuk$d@XRRY$727fjy`zMd{kJ`G;zUzG#J{7-=n-mKYD;n;Lah~J) zy}fw6+cwBZ9i=Lx^XMyiCS?e&s5O*0LD(lak(}8KzN=F$Gzkp?p=OJ@tSkj zG^H7?nK%DuKCG#b)xuNSbN$eFHVc0OpN22v-RmauE-k%b)J$qJZUQ|sgCob)=SpxW z=rSS>0>&`>FhS%ins^~6o@BJs8XS%h!Ht*f&_bgn{2#BXM+Ahy`-9J2s-fOSZ zkY$Lvd}AK$uja2tC)@cHFmhDc<$P7Ia}NuqWt}%CxeNM}4WVW+!7V~`?@5m}a{b_w zWs}sRjiN_lRbrEokCCe$bvJ_-zamp+w}ZMV-e49Q_OcwKy*>316$|;=A*E&3iVw8IC(Vk$aF!%8BB0^@4u6awE4k-CMs8 zyng;aQTbmO`WK3`GBExFl0VVzC+!0mO9%-G8aSF51O8%IMS#vfV?MF@Kalo6(B1KW z2jd@70c;$Me^Isb2XbZr&`DVsJAPp9f8k%9&r4nZ#^wzFr%WLOCj%>6vwy_}j{gn_ z{=@N!wg0b1{DtRqO3sE(f5VAS%jf}gA~qlQ$1H3<kI2^|=^&nf>)l7N7sxR4~Z>Bj*2fBzT^444^AS=c`U23r*yi@!J182)u@ z?LU_NCH!OBKal_5;miMYwap}P)pCU&N%Z^{%`;6G%w5%a#TPj-(4X!}XqDHu5+7Tj z1LfOepY3|uWRwvox8T+Yk~BuUkZIB2YQ$E!i*#PmBkqPVC<03 z<1<1vpR1$Fug(6``V04*5Xuu56{uS|4hM}<8g4Jy*|_zxXI+#OhMd0s^L@~~(??C18(7TyLG$)U zN%#0DM<378A9S9?Ddmqx!!LPdz>+Og@yRoy<}jTj_ry1f%m07s@Pi_4HRR63;wkDU z-H*CSu;##1J01W>G#Q!y?|F{>14?n=l$_8LzVFfU^C!2mQ0}Z((QtTYvU{@_g28jEtW&n15va`}xC=kNx+_^ta>RjDLlE3jS2~ z$@oG4tl2*me$f6A_Nn+EWBxJXa~uN)!++5~``JFrzvX=Lb1;7x_`%Ei;VM6k`xpbG z^B;%#_dWdM5&rWj|J$4Td$u5JVEu6l@c-&|KCZ-?eV%>%9WVHC?g67k!@ozj09}VtV}*W7oEVTyL0-m`=k9Y-{%CNVPyJv5;Cw8H?c4? zcY&Za=u?ZIO`fY6D+>w zokK->Ic?{AU!3G(Qqj>_)p`A`Bt=u=-xCGdiv(e^^zlnobcB-rc6D*>tBxKv}qLF6|zF5=OK1y8O{1f9clz^ zY_JpWz~|p_6-v>F_P9M!O1Gzb5SXcoK%uemL;Xk(HjDO&;vB!s=I~=jbK_aCm-7k? zZ&C&h72<^?tFxP$?qZP&K`Bx;R2Mu;-R=DF!$BQ_N<6ZxxaIWsP9>*!l?S{G%x zwB&D?-~ljOMbqM5f?9YK5QzyZ+a7u-a7H|hrAhII`9&qgV4bqFwiy=b?@4%(LD-@~ z21F92g@qM?$_MRmqFM$X%q2EHs(U>}aYuzt0hrLru`tEpExt_RCHZKH$<=s^#zf%l zGHYmPwsj@?A^0T>l;Y@$B^$mtB|>S4LVFBt8~Nyri6DfXyky|V5lQ*+C57A-@qmen>0Fi2qMxhp9OopB6+-WelF}q+2S(Z zEHQOs*J0q+ynn2hykBj{ipO{MBKzyh+=f*zJKukl;=k#pW^8pj#KQ5cm0)_{T_pI0!aw`SICiw})g(R<@{>b>eztNST(Gcpc`u2wPM zrhP6;edMx3#i8A8+FAwjp{kmvx}Z9OmRx@hT)R@}_XUKt_&!42guEdqp#!^r-hJV! zbDcnhuIkW9r+Hpo?GzAnaQWF@F1lF6&lYql5>>w!IH?A-2#w%t2zZZzwL-RIY*|$C zP&qhaLI=pY;i~#efx;&90Cw49S4sInLGr~xK=`asFp2`QiN{zBfPvoq8X zkf^|wKBML%hvt|g_r4rI1kWhZHblUnQTc0tKX*abv!~1Jyx92|GvU_x2u`b0&q%{n zPVKAIi3$ZguiLvW#&g=Jr)W$n&%@Hb?82@JhP621*^Ck29(|D1?#5O)BqH0koPIeV z(u!wYiOL19z95cnxG18eu>~|m7$ow?l&GWT^hV=SCi7n3NdAulb2^#nRn?IGB^nvM zwO@m6eG_nfl?J;?zq$ml$;bxhrnhLlZyZ#MDTiZ?E^+xs9O|#HXf#jcp!RTHS~)yp z@Ys;HIcUv1o=fW0f3Q3m0bvdA2DEd-nikWWgJz< zR#r9}t-FD=(Xvi<%8h8idijQ+p4}btqy6^sDTJ+w348d;MgFFOd9EJN7PS#BLLmGd z_UfA$!j0(8n(43pJ06$xeJgFhx>0qb%!zfe_2C*8TOq9_>*T8Ws-{l!8Wp{uV09|% z{jbv4&zw>$b?wtO9U-gVj8GHQ+XqbtLmSM0b2jHXUm%3A5U@`W|1oS>)MAd&z@K=y zFFY({LjX$ub!Tos^nwS|d>cZgj_J_UpdbqnlV0M$UPhy z+eyhI(%qrN0cAG;7P+_0*%OKvUF-%^NP{&>29lbxPPo5~Cp|lpwjq=t>B@Qr7tDkG z^XUYRXGM>i&I8qxA%Q_>4ea6Lx9BYbHt zM`{!UYTIy*ez$Vi>u(T}Ry>}$@^iR#{K$=$cdZx^`{N!noNO~zJ%{!g!WdyP#*HMF z-~855b)TSr5!-%AmsDDscdQVU`B(bZMsjZ?TzF2Nb6+NmX_O%}p^7SdfkGz1;(AAc zF@4!SFmeN&cnPduN>Ix25(gCZJ)pEyD6q3rIsChQXgg(yUG+Z>Fe}~c?e_X{_jz{A zFJE>CxE5+Jc`%tOZ?|?r?k3z#7bw1+dA>7GJVJ2U>$&k=Y>H@9sMePhlS7Kyq z&7}vRFQw)#7lJr3$P0|EyggiV{VkB>v3zzI{8Z}epuh7uQ& zfc`SSG^u8S!1LCZOf%{2%d>j5&X?3)cZv9ixN@b_q;GrG8j^N4Wry#V+D_>UitOQ# zCZ2)87_MhmK3~A2LBN}A=*=m84vP|1g1$>C-X7^0?-TJPZ0KBgXauUpK}A;cih(-u8&vTLD45rcjhyyp$0^ul6k3L0wS#3-pykWns3_;{?*#Xhw<^yoNJ5BS7b zh%O~;<9W%8cd_l0hcb?h0b_m;$@A}X{Cm=(PM^&(pP#P03=Z(Oc?lzjlJe3z{0*R_ z&Vr=JUtEI=uqMWW#^C=DXS00!(x7ur4lq)SkX<_zPOlw3>^r=x1?AY%!A}LP4U5AA z#ia)|M8gbtgJc3)&uzuD1KNZ43Fve0aia-l>bUBedS#+Tl;b&?&_;$vq!o{_9rjF( zv_=)><+)orfI4cG7*yJ-ENZ>a_FrR7br- zXW~9M77vlR(&72Dw}j0^xpTng`6>3L_~mEK&N(liUqp4|M))_qchHy)|A?LYT{%A8 z+m+O4Eeg3@^7?a7lUEX|1_6_Lkb!d9jVy6~(+;Nms9aIg(lvv+H=T5znYCWCns5#0 z3ArqtbLT-cEeUA)*e#BLHB%N~>A+BiqRB}n1=Q@ZQoOiMi^HLCJ3kd2H79>>Js;24 z;VaB5v+wTOZ?n7T4TS4lcL zymdlzL=F zkTW@>^2VPY`7*GOr7PF69n4OwOWlQ%=E#_zsX1m-A^uL$kSpAa`THDgSBEz+jGQZw zU`+j};nBZP?bg~N)ca=i2!`h?U`uGb8TH;SR>rT)E?{7yj#~#NAlG~03!J_vqD|Ti zYN;LLp2nNXJR=Wy ztor9zR(Fk*ebtz(2t<_Wm?MeNB3$yTKH2 z@)ZFdX2$YuWiS;7VWet+Ayv6zBKR|Wk?z!XO7a-e?eqS#&nJ5!6IW{|g6E*9Mvpt1 zI$^P9SBhf(G#AO7V$- zd|mt+`yhw%8zz;88?QY5-=zl+R=f(k^WXl6P%3-09GD}kBxd2!AGxdA1)*30WsW0v zkDu-eojI}WPN-mB4-V2Y5z4M*9Gnmmy1>r|!Qj0pjn^JyhqTv@D8#ql!vF%$1??06 z_Ye^gvqP!}GfdLcF)?*)FdW4lsqIB>VqH*(Pv%vL_@=JdKnRHOE~dqXh)$;;z)uX|*P!&a~6eMRqJ`@?(?+uCD`4zKIh zOgqKSIwSV#>CM!ck<;!itW)>dI*^cnF3!%hG|*lx23f91+;8tb0^B6h2W<$N{iX{} z*<+V7)JElzGikKh+e1Zmf&B{#RtX9t(=QB;hSd!q76T)P&Ivm6ZUdYF(nx~=z@wT( z{Rq}b)2xGc%61XLWGG2)!edhq>qz_R`lh?axV;cSbYR7Cy>gvh6!P{j!tOv09Gzrz z4hwYABVw4=dYp_6rQV0_=giT3PKKj*&%mKYM+YRqmkcI=fXBVqEMh|4#-;s zwav%}h|aRiFdr|qG%QE#MI=XnHd-Qgz}QEQj1XhY&M+x@6U#ui(t8~va=ivnvNsh4 zMQmKuhgIWgGayMK_vKby5TW0kXZ1{(s{ZM59_w1wlcgeK{JC#v&09v$wNCINR2Ru5(cV#?r~!z+0q0( zy8%HWj<)tefZrfyhmy6 zCF$k0>nAD}=lv2xw;M=rwI@JSi~@=iv4^kT&(#Ep$L7Z}9!e%J1pono1TF? z0=P^9_e%aBQLBbteI4WVG-*$Y+Xz(`*b$Xx0QO3+VPCMI|F;z>zg0A8n1hKtDS{qr{^$+6#&&+R^8t z5OLBF(ZZz-Z^VlWbDByFf>293iTHVM9mM5=VwG+9#2t*%5Lxv2WF)ONEZtV^BsIBm9`vw5k&?WMi9Sm&BV zA!y7_f?Cj-j>$xjD>5N zaA2Jdygdb`@rVF;92Q+HRwQv+53ErneE3&?NGp=ONNtdfFU>Iw`$t1Mzg$ms?=E=% za8os`_nJxRW_qf3-MyMUoY~T4KcCu8bPMd>U&7`6S$ML|)()7heua(!_wse4B<~`N z>O)v`sNTj3%AWwkEf5g$P~JV1v8C=ZN~9+2Fh`SQa?0V2|?FQz3tg#(5e|-2oEH% zh7brMt_`lZO~O`gnhhBj+GikLVN?6TU~l8L^(O4h!4#C2c)xjD!_BlkQ;Z(V35-_p zX(%A%V5{3Sw;zwD9O^-U&>hF$uUw0B37h7uV|sim-jbeCLQO*2V^`sU{^tUpzFb$? z4fgUL2$C5>zDgXi?#7SI3G&58RvmGuUKGJ3qezc7&27X`+2(a3#Yh@JtMwT>0*KIM zHIOw*f-jWN;vYZz3??{WX*|D6ImEz#5oF1u9{XmIup2dfhdV_GUN-jCx}V~BDe5gy z{vw??zg+rfeRJc8so3(x!Xdsz+>~Xlvwvl&dsr$XM3CaOm%FW`i016SE zc}^l#jl}+OJ>fjtB_K3zeUMhb+rVmJOO5#JV`{> zK`m3=Hg_LHV1h*Ik3FM;@(S^%UY!CsHW3I#`irPtzh#swki*p-@M8Y}Vh;jvJyt07 zm*`^=&)W@(ZqZkNsjpQ^o%cv5@D-$_p z69#V)$r{wS#{}w^>c3vNP(VPb$g8@$Uj#M;xE^q%_^Xw_1QNtxJ+=t6gjWr+oJhNb zyF*i}%WX)X@Lw*XQ{GB->37<9+1I79dUTkk1SNB;GNCe)LvV{1h41rNQXPb2dI3kU z=@&IPF>PlV@ZZE9h{JpbSVs?nkDx&N?NKA?dc>$x50Xu%Mu@!3P=h;@$!{jwV6V7Be$5Gc!w;EXiV~7F%F3 zv&GEJ%*@Qp%oejOia6PIC9+~I!M z&TlfCTtg>U7ICa52A{WGgvJmTHaT7Cikn4uGfdJz`%T1Qe*^E`cW}-*kaj$_#AO`u zq;4l~f(ZU~kHom5FKkw52tPvkFa;9YPnlh$;-~&Z=R?PtXU;nH(9Qy~Pa}nYLxurJ z^A#MX6R%jF)_lSB@o{;WY*S%TfQCt-IBw_(AzTX{c1n}?VM1lHSL+KXGRi;-5mhN& z+rYegso==50xk;ZQ}!k*TrL@<5k!Nk2u}qC!auSg3ZA*&g0Cox zQ_~EYM3KPQBqMWOuCCD-`D4nGCj_=vKl0IMP3Qqcv14N z8|lAGc1Aw#Hvf9m?bJkGc6k5hoIi@0Jp1*;C5ZtxT%Nd^xjFjl)uq*y{=YD_bvSYB0jMs7|_t+ym7;TtGq> zr2i|o@RkZ&L#$i6TV`&F2-H2ZcM}}v5L9AKcO6`FC5=zOK0NXP<3tD_rQIYKI(c9W z4v2^4Y`)<#PFOOM1}y~U9d7WsPdYm7Ls*C*Fo5DAvFmGaoW>BtQr0Z&fGfsMUM4eV+G_&+&bh1wyTu`%D#ExxylnHHoJ7sL40t;5Jx-u zTym~=G{VX=5tzalv;mJ^o zR8QYvKu*~|rSh={(s5?T5xEI$iHrrnCNThmcbn8szkoNlnD0phiRI)cjB)!n$Exx` zHkq?%?g|J6MVm4aVT z4Y}lbFoO4Jg_Cq!BR-S4Otz8sT_wYOrE`yZ228O`Nv(-NkMwd4@kc)6-?ssrs;Pia zu#y&uFBu^HkWUtlQN9>dhTOVhN<-ruCwcfdT7gg{LAHZKha|kI+q_C8y#+B;Qw9eK zEoQOm5hi`r!PpdXOYsS|;&&7Yu`!4?W-UOFX% zRgf6t(le3AXJa5nc5kV43ET>0#b_lGmcg+|9M2AL9sg95rUd|4OY}nH7M+5k^mp4^ zMC~Nj>pbRGiOXc!(vzwL?ssZOOGK}02PaqM0y__5`S38QcCCq?xx{-^;d?8YtEWKY zI`9q+-FcJ`6(DDO;RC~WnFfvIB)(zc?^4&%^mieevow&ASh`^H41Qx6&F*LPgdvYN|e1Z&(SglpYy_ripP-r zHDb!k9$#OhD^N9@hEedTe7Fj|2r8E#b^AG3DSB%fIfgV-+4e>}y#)t}mv#=q1B{ad zEFYC?L-%Y}-+U7C79Nu=Z!XrA{rhpm^J)(eDNiFTU>?|^He3HmY)9DlY%5MBH?=)5 z$JSdEL4p61rrq~1t-Qp8^WPek{RaIVA(KOexNEdpz0%k#rczjcNv`1QNUm>s-tygS zC<2H^}FtwYK_{9($LDr4ef^5M-v8dMf+5D({w*a?kBX0pHG(um4asf$DX9LEy3*1(+#& zr8go*0^22tSO@tqSHF>A@Dj7Rvo?pK>!ZcEVzjWeHQgS~8+eEh=t0~qw(+??vM)Xb zG#=nP6o|c1P#6j`^vtHkE7ys`P7-4$_4_66czO>V{e6xq`&U`~yJ_63TQx7|JC$^X z8eMo@s_&iO;$9LxggYq+UqK52M{j3)Ywef~?-})q9bl#KjYV z%OLzjtOIuoW##n?=BnUqNHtK&#DL52`bUn=wP}w5rW$Ll1?9!QqioPb8VvNMPTUUK zlL+Cm{f4Vm6~AI1pq{YIl-uzhXt}v>ZBxVF*Qs;|cEro`VD&G)*KeA9A~8HZc0l!J zs`)`}P~#VXU<{!e8=h_ER^hx%)?M>qdQv40$^wgBVVvNHJLA@DD0HPQw&920{U~=v zZ7i+^Z%c+>H@=}=I#fixz&*WEb(Q9QqaXJXAiBl=B}+Asa~T6Rtt^6f6r?7J7r(Q8WP%r0~t>59#}v$h8w4ZBzkzMKTWlIMNgRh^NS0a*9sRM6$z z7IVymFAU>$QfXpw+J$*wMZahw~7Udxf@R%!gNVd@3^qd0wzGuEfK65K_h0xY4*}pc@QuA|8)#KsXrpL_qX6 z5ZP*ep9Zvjpl%Yxp@gdfk#2a@%Rpc8VTE}Oflcj>)6QDQs&0j0~mEIt_(J%99`! z+yI#?CJgKQ79V@_tF~Cj8=oS-TKmYqMqOTwlhB7cha?f~$Sv0-kBD4UuwS%hKem0x z*F#>wM%cG@W`jx_k@C+R?azW|rGwh*^ zX6}m=gpE5eH*)+z^}lsb4MiZ5 z!!?2W5#Fv)bZbs+*|ehX+`Yk~?m+y5Ca>w7KFpHd?})1%O4ZGJr42k4x!quTpoQ^S zdKEr|BT1Bhc4={=(Unze)r1fHD;vS!o9;5QMr3XucYu1r6BJ5v zG(X`;o_nYM6j29X(P5PX@{P~UE*5zbTAtF8LOp1MH^(aMJ{cRZrRd=Woq9SRq5zpi z0+yEH4S{ zw7CNcx=7iHyxLS}W}l5C8BWAq9_Yo3bxjsef3h3kXA2zj3i9-URI^=ib6v&4a z%gh7+WdBL;M$wVt<@?6}3OYx~;o_^)5B?CfFL{OYSV6Enuy~Ca?GAjPMttFd)ru?Q zzcC5wc_)1|ApFqSUF?`Urxy@&uC@$*w;|{a3zaC6&V{Si=Zw_}D%Zz)sr10*p{rur zBGn@#PctgCLWRj5nkqJtgHv)}BF>X3TL?L~dxY_jSs<8QHMfMF$jppkyzfcR8nlA2 zxjqgkjBrh(H+rt?TV}Q(Z41F&a=UT_V8c$@mNbP8A7tP|X7_WD=|KeTI>^b-&YYY+7To^8dam-u#c zX{t6AbmQfKOlHi%wi0~DR4_bY6)@w9bf-hP`5@jqvYju_6n!KY?E#4Tfqw3=ED6^K zHs)!I!(JY7<=w&-pkUt8mAkQoQ*LeS`&L@YW_Okag^p@nQfz%ItTJ$VU;i|0kGef; z=`6lAX(rghiqghvcP(3fkDj+vudLg$x%AY=(p3c-g{uI%FERGg(7DJ^pj{EaM7z&L zC5^(jMcXt*b=|PzrtA9cQ(ySO)l+S~vIiZ=FQhp|XdAKURJOHH2w81`J_JZR3D>eY zUDb`B7oymeGd@=#N}54Y#ng;pN%@+^ex)(&7m@YzDHTGLZepwhg8MO%P^rptQm8{q z^|Pqov`XA&6Nw8J4j8{R&_A!f?H)%cKCWspG%EUphA5@cYLebY0ojk3@pGp&V@w8y zUkTUEm#(I&wwESBiR{v4$4B!n=*k|IX6=7dd6a%-O*L5nwVxSI=J=Csa1AeoK z3M7p^A*gjsr#pzYEO{u#MIeeWfv1b`UW7Q z$P8aBD<^;uB1?erhCgbFt&=7jm5SL$LEQe zXnMBUt6|G)=sMXIRblghcHGTfWKO7~*H$%~X-_B$gf6dA?f|KEJ_qGS&6%j*jM3_W zW#&0J<2WQ19tswfCMGT?j5-GU3@l_Rwrz9#J-83_J3`Qy7e>&SwW1qt5UuxtUGMhjJ1>IT z`rx+kduE@X^(?#?0~G4|vTeSktPXqI z?dE&KNc1ZJdn>pqDcj)MveLOk944YVbXF5A#M^xP;r#u5X#OnP&F@04Sia}AhLY&8 z*A7c~8KTm;gmcYQ(ly)&yc{{9)=-|m4k;HbU>E{g;durJf*3ZRq?obiP=8q^R(Pgtk-u2g}*oi=4j5Qp@(y7yE!FF=fet8(v7 z^at#WtV<()gom#>0mkY~qzf}tRz zue!w0rnvC;-qsZ!-EW0kDDdAE4Pe}qDCJ?_*Y~#GQ(lby!I}$w-|b5?fq&e1xl}+r zBtk*AZ*!?bEk*=&X@l(Erc{ra(~AtcLVZ9tvcP4fL=7T8#fRMp-X!Y@oLI-bZ%9NY zSw2sH<*Pqm4T;7*19ir66_glG0)sxg&iM4%_u8MAxW^NEI+AeXFzBAl%U52H!`ImP z^MwaWKXl)1G{ENu@oH1qSQ&O&9P81Ys8yJl-{T}(Z8OxWUr0~qk@1*F-Gs^~9+sNi z*c+dHXWf)P--Kw!l)?*)KA;|A7BxCKt-#a92~Kc!5GrQZh5F-395VZE$K0=r)lUug z_{%zrZ}In}&uWxCS5U_K3GTMrE}`blW4c^)oNF&jXx1>a*&5y zZ4a>V{33QbK^xuPayGURE+6E*%{Re6pO${hYfZuj)%2J>BhRi&rT%>MQ?V`x&4zn`om#g+%ZZ-k(;4m@0~U z>;nwhfxKApfZ@ui&Iyh>obXkchSd}%4F(bW%zF`LNe(yz6Y!SuriVm;O$SmaNXm}o z_CcgcfQ*6(7lQuGQ3pySka>2(CpJ^(B{a9IFC-zx`W#%{hlYu zI1?l-*PZ!A*S8Ymin=JY!xq)siJcvN@J@yB=GzF#=Lwb z4#Mk}=bHA{IC7pW?}hloh$wan?+q%`tv98e`~XRK<&TX^nchae>VJQp!;uj{=Xx*b>h~|{eIgl zo}nFI7=2UnyNE<2sa<9WQ}2KsoH?Kz6Be`|?lN9yzvhb|>~YcBVO@LPDhD{RJi7ln z^`YGTu|*Q~O0CHOPxvExX0druw(EV%LJd)aAqerB%T^71Gc9*Gd_-_>m}_9AoIq#u zLdDK{#@3>5ro8hGGP%zTtFvX7DMdy#aj+(7sAInG+sBXowhlVnEt|5}nzG0JhkVsB zoUy15UgSS`34&Jb%KKvK3`o^9_{LPe;;nW4Xq$Lc-gQhLb@+VL;Y^-c9e`50Dh#cn zBMPmuF7#1EKFUXZ{iDwRQD2Z3QGt(_PxxqN_wf>rTzNmoAM?|!;wqF%4ko=jvK#qN zG`E>r%pA2`+5g`gGK-9w-V&)QG@i&9Xf5QJNAGspdJKvJEJ1YzDsl*C?RM@#JuO}k z{>4=LOR8e!_`6l|XJ-8MasJDh_TO10|HB0OKWmk+{4Ltyzq3kM{2Jx4>^iM(VEPoS&{%(9Qv#@bpo}n@)YR+CC&1DVU3AA)ZMuYuG0D>5TcauLJ1TU~u zs9`>3QBW>cvj?aI?<{2zZgP6@ySALKy=FdV*-!D`cY=TbNgYArIKV!swlHMHcYt-f z!2)%7DV*<*1*Wd*f_$C830 zKt*BO6QRr{a1d=CDPC2|(0Y79G-+Nuo84rWk)P-#VO7hVg0-l+rzj%ugq!5lse>pJHt>OLjO3VKn(- zFs?N`#bga~hMXy+M%z|78eh#AZsnnJ@6{bKsx4Br*UW-eN>@ckLxwMAT}$%0N2p|6)eL1jE?OF!fpNv^vZ9?bS_~^T5*u+G^-+75g2(5fEiP>dRg0gt2V}9)B8Zd%`_iR!GoES6ptm5Y8ojBn#M!`pFODNcs$+zFjB@t|z|tLS;<(1rp*w zKbz^HCf*yzVHyFV9vzR&2Zp^nGz!#E)}mmb5rghpKLwTjj-cEU5_ee=MJgK=Lm0$6 zCT>Q)_~q!cHnRf#euF0W8Z!%CXjvh5md)2V4FHSphK{BpkF4sH!S_PTPiLK;vm=m% z$@0wOox*cPrmge)G~aL6)^8H37T$|?!h_oSk5P+fyUlBTn5>K9 z#@`>w4wal^6AQfAdF=Sl;3!xHSp_8~^%Uo^sWIMND6{eupTib*BV?xI1v}Y_NahsI zOr=JY*b-JLuj`IZ^9(JR4SHlAjY(8682g2nb#gyRL8Q%sX z#EQj2!_1^tg9~W86zsC>2MohvVTM*Uj9w$+lprel2JC3-E`(p6 z-p|R&O(icgH}O*Hki*lSs3Ps2Mc`8Nu&cPql;u*gHB>#FeqR%Oo&k(zbJDW&lvNpz z<`+17=14h#Q6{0e)1I4eDy!Yc=3{d<&`GC}hTn&}7RYV9|6C(sbB#hrGfv^=!$<5XHd~Es{zc^%|KON|FoXFd{EkCG;Q|(S&A3+UIpmI3s^l-5&-Dd!Kiz5jdEiRR?S5|kB1COQ_aC# zey4DqJ}=?v3FHrU8#nG`a*hxN@Cn)N*U2{`_gv1d|5prTr~Vu zH;q%cVoip0V}nU$?dP2XFt8*>CAro2XKVxFGB3`kB!mLYakrZg(4hC%?G?fV$!w*kpKa%pbkG$qQf}AN`m-{;HtQ+_8^#2BturF(wga3Y z8mg&OY$xm|j`b?vqyhN_8c`GdW&D!nX)?m# zTYBTv$u3oPg%fie(qA}Gf8JO&Rp{_CRVR)n2_y*&9NHU@IC*@TX#(J{))+2T;`0V# zNXo^m=z7T>NJV+dhL{dK0XIV?w?soiQm_jnk&8`dDwwuWT^OjtjZ}kGteWR95wCkY zARVjbYsCnJ=rab&*)~e5>v~&=b%)$d`FXDHSTjD+$o^u7uP6XHHS8%a*dqdxiVdf~ znFgZ>FYtr?_87fNOb8Q<`}{31RpA+hzgV=5%g(DHA!+QuPMs4^&03?eIce;tG_2eF z`L))QTyDrZK75$U#{Np172^Jha=gP@d`mIo6pg3p6&s1y!9#2i6_&Yb71l$O;d+j?*Xbd%7_QqV73T=TvZt+6zku`$~vDj~H5{NX8` zvTw;VIA8CI{8RiaK_44u;BZ)v{47mR_vpV0sR1SpvDWp#*pY*Uc14RpW-(nu>y>EC zOaV82pJv16uayZ@WWK)03&Z4d?gXcc3|54rE9~SL1slyE8(>UWU{opYpsCM}4O=-@ z+04nUnXhE|j}~rQ+xp18p$b_|g{AjjrslI9^ zudMqdK@6ZRX*ykUgYxHeV5DgK>@)@)=C?-8%xZbi5~I+Tg=xcP)rM6h<}M45f%0kv zytq4xT}kJQM6v|&`*aA;QwJTQ6=)xaZ>fC+Q0k|$OL9)5{a2xp{r3>AWnxk8T(YQj zM?{zF@=jlj)L6I8U>Hi3A+G?ErWqnszc!RpQ#I{|dNrhWR3})x}^E!?YJDlll3MtMnt%i8faX{q+LST0CPkbfgRWnEv_ppbu(i)!6UxhiLA11ZB9y*!tc;+z!cmT-)Bar)$4DudrxC*P7Ut&G%%%^IdB|$UEH0M2 ziJCB}Gt_bO;O3&=lqIzokgD3EG*yDP5x#HLHiSz|-4=k$`O?3Fm8jOkluEm@-F+-B z7-B6YG>r#z#y${^eTa4M?3&}N9DBXNV1hAB9qZ?`)8pyc^^7}`5#Lg8)vh3#U)5qO zUF1TiVd+`cY9o!3k7_Z?9$`(`(L&Eeo2-_(^0FsWCqiARr?R_h$-2w37TMSVOP_*b z`GuoCRBc5ld=<`Ai@DB%_>EmJWh9#4!kyld5p^3(T zz4T2B)K!Y-gsT>|(CaV?2ZX3X5!@EXaZH+pL@o&`QIGv zC(vgJnNgAjtLo$CD~l^9(_42+OVdYN-_? zYyC#7%)$ocNR1T3oN*Y2V*BNgrSKDK5$(2sLjljL`l&ZtrGTeAA9tnCxXWdTIvp?*DI{@*bRO%__6X_ zuk8%vmA*`&SA%T%c~-PfXmCLoDZq?1b!*r8z+j54-)@gIxX?D=$x#WB6yY zK3}Td38h+aPHw&y=O~NW&az1>m^>b(pW)#3#tHD3tl z^k-S-2ct;V+!P&o-3i^uDPJO@&-2gh4|56&B5?VXfWZ5cM90A*>%o#_cLQOW!j<|S z+IrCemA+m#Yh^~6i5mud3c)9;0mUr=CgE$0Gm?fZxX*OcbkoY5mo5CchFSmiE3AHZ zjS}k}Cup$dT+Fb)j&}wDJ?wbJBDkP_FFJO{biPek<6sueoQGW>K2NezdiEB-9{-ff z6^h*L`ea~KY&G=e&g={5YGDMDkss@Z2j(8DW0EKuoSt)o%kwFMvoTsuHa%Kax=1ctR0=`bB=`!lH-ne* zRR~^sNmA>wC38)LVd$`t%ZX%_)n`3m%%x*_APp+6w+LXCox;Xjvy2fV`&GcX#gMfn zX?4dXytt>bE~+l6kfp>a&|vn1M2nHxWH6mo!sZFtv}_suh+P05S>h#Jg3ebv!sLKP zgx225&El4gQkNI(^I zGxPY7>&l6lUp;Hq=|OJ$UBn?HAx#y)%paiXIr-Qv<1FYg)bC2WT6IFxW_3R5Y+!Ar z%WFRqKKQe}4mtxmn{M9~Q)H8%Ak1QfC;&MZY3y~;p-S!bG$9q$I4QBwj>CqFF*pOi_Qop>?G7Hef{F|4JtkX`3%tDeo#Fr zPO7@z9DZU11i+}A2wG0K(a%Z)eD@wJU8QqS$htV6OCNh579MqfP^mELi!^?d{^F{N zinA5meXI=v`AQ$rkk4nDDi0KoM2ti zduV}9-H9~FQ0A#Md@6({ADO9SG985g_oRd7Ku7b> zwBPZ&d^Se~d+N#?Roi5rG3oP`Py)d<^X5yTo{niW^JdJuggG-0+4%BiGl6eba6ge( z%v_ZRZd9yF4DRBn#8NujHv{crvE$>%RJqSY`Hmt#T47teO{*9!{sc(L%8KWOjz+@B z?ZMLJpxyVO2+Jk=;?J7QfX1f;^XB6m0~9Gbd;uuy?v+uR*d1}}h;H>cuD`y17UlIH zWV~Ytyf>nm2>0wE09K*H!7aRG6J^=6*`Z_ZA)H3xhvdv|@I-Y&AwT=smyfkUY}Q=T z0guuS`MvKW#oh&KlieoNBdl%Mgbels?854CbGt{K@kWGPpa?4L-r4v?!ozatbNeO@G(-DN!f1+tDX3)O<4EmV3R z=K*`xx&XDNeYtR%BR&kA5b;UeRbCiN%oh7>29zp^7kLc-0_}w$P>;$f(SM7m-N(Zd zO1B$3UY$k&gqp%QKV@79At}~)PrMeKdrk6=DCv1vfJ;1b`_r-piBE>&&dQZyX_#ij z6vwhi2l|kNpTrCuPn1=*$zW%0R-yBZ?rz7H_E+$_Ky0H%CdAs32g^egCx#zm8)Iks zx+d?vUOkaEW@|uAe(r(pVQLq<5A^L~EYX;lm%XePB;MI<9PmF^eOXltB9pZo>KnsWB^#K(Pl z)*D`#W#ka3ldR&dpHVGswG{9Pa~`ecAUA6P4V8VMCROZ^Cg_T9ebF*EDe06&_tt5^FR z2=ct$kbd%7L*r1OSfv80Q*+t3UMJ4Mn((_t2YNWpDbtD)S!Dl4zVFFj09%=a5Y8%G z+|KnaYm)&gTAH@M_}+Jzt`KqAVb@_f#l@00sfSuuwfY>?w32_u4^1BiR-FB;zHN=N z6r`9MGPin{)K9)zbaF&o3>=t@fND+u6|bbzWLTKov~nP%LiU;>*Drk zFo=6m1W_xKui}2~P-^XcWtCVt`dLC?Y$bUcJ$qK%!|kBjp#M54yzYWVw7ArwuA&iYiUZ8QIQ6%fH_$LdbCyu%>t9nxt=AS#A?nX3O>E;WasiR1c23ctCP-G<6%t z0bW*v>!{T6Hd~5$gKNO|tfvtK%;GC;FG!tcK=pDW3&yF{H{7V#y|Gt4)N+C4Kz=&x zQ(!Z`GNRA~m=|~||M3IW_Rl9(ve01*mQXx2mv0A)7hdz5jU44FT!Ykh?M^k8@rNXP zEliMOVW=JB{Q}itjXlCGQz;cz#O>d0n2hBCjv)^I-BYanGjM!jdZ_6xGX6K5hFWA+ zz}8po>{dPWL|T?Ch2+KEhv-xwyI$Dav&@|}#y~Cf5u|u82}lfWSpK4z0T&O4 z-g*2_7=)VNl8`f5202-{(OWW5B$~I?M4B%RkgQs%{EH%N-GF=NVh+t%dM7?wn`nF6 zj4_h(*@s&j*DGr;Bqs|b^5Es(Ei6lw}zRQ(HdpWLa<`_wqY z%g?~aSy&?LcjIB+K?+}O-N0YY{lr=Kw89&%%+by|XnWT~Tpb@V+QZi#*jEAXQPIxt zV0PWCE`058MFICy(hTnqH!$NsF3U-qjC1dAPlP6uc!GyYo7v?FQzuw^7?&K{jq&i= z#OiOEPk}QpF8VL~M!JEP^Ri5HW;U$X$mPbBz98HY4j|~h5ueMdWqA`-F3qN+9DuHY zuLt!zl++oPD&ns9d9W7sR-E7v^*Xeot#(DC{Tuv+W|@5{NkKd;xRheWO4;r(%D=mvcuVDnTb|K^6+? z%9sNU+|=*(f`?^(LGM;a)azwo^|GlYYDf>w+wMPKlTr`WEv0^$h(wSp@6J-R9@GLy z@wr=rs@E$XlK0Wa3rt_rfw^?uTl`oZhbANrxZ&FJSr9<>=z@4H`GQaJm@5uD3Vt%0 z0lQdHHkFy*-?m94+AV3`jasxInpu>NtjN4)OPZ2QG%wEtuS7)SM_|Inwdu-$rGUgd z@R)JPImwFEd8^FRb&1Bk+3hCrv7%mIT(!sWqnoV1$W0PL9zTRXR~n{^1H z!~PkUMklWbf5TAAc-(P%6nsdrlhX%<{)39Q1fq+Ka4+7ctS9v{i%qCntE#JxQgiJekf1vw&wr4?_KRg*4XO&rgnzCYCH z{hAV=?Y+;5!`nz%#9&Ayt*LLUnSIyFZ-e>4Szo@fUi);D{=hw^Gq)ZxRB^heOO;Q8!OB+c+?SiZPSbe;xveZn zvmfITZ=tcRrI0|-cZ=w7XqAaW)pJ4+yG;D796(UUy>Vt@C=(Z!wqH{?^6_HG93ySl zaQ|SDE>i>i9hN@4(fD7`1b<~Wek`lN{`Ww~-_d^m^+EUlKDhEHG4elvE3#h{q$E`S zJ-G54q4^Ks%0DwTe@i0#2t53=^1{D+5d9xwShc}i#U(#tSpDO@{}IF5@sAi*GdS+( zUF!hu>9{S3n|DIQ3U9=i2QbdEB3XH{$ry1gU)>_*Dpgh_a}~sbzS4v*4&bsy@lpKkc_8?JdY2m z)+I$E@(|mKH>1XGh2zLMvo4jMnJf>_{y z{!s;!IyU!KtPHI$XjEtwVWjzBSg<_<0LphHH!M#u9mT&s9U0%CyD6-MO^+SX-sFC; zElD$KN1u!tMHus{ro%=7{s3{^9L$6ctd8)L(3!C#n2zRO1AjW%u7rKmX>WFFG4p6ADt-WQwWEdj4xAg74b>;WySq=C^SQlWtoU~_ zDGuWi2=a(?0<)&Tb(@ojd0o53v5zmhGwQbQgJdT%+;Vm6><`bNY!bE~YjOVP7hxz({mXl%q19^SqdHy%)tE~SS<@(d%ADqmefYg8D zSbp;@e=#n9*p`o|&)-bUKezi6dioEh2Idbl^S3b7-+liIP5nC?^Re%*kkvoq|Ba*h z=kb5<|JVofo7wr@|F2{GHOBAb|7LOi^#7-0u(GrNIk(^K|D5AT-+y9o{#^cly*2%7 z2IqhH(Ec@p^Zzn*_2d2ekGHx%8~(b~zixGZqp1Fl;IOlQgw+091m}0?YW%?C!+85) z=F0OZz?#E-UtstXh%0;x7YesB3K`a!=tr)eNPsxFPjE3&kT;|`_eba|xloO=UvNVx zCQWVyI{oR=yw~%mA24l?{MS~W3znt?pN`Ka+pj0bdCys=`0mF^!jTknRQrkg<`W0r zAis0oK>O!nLowT%&?R%9qqN=X__H(;=iiN|w^=}Rb-JMN)u84K_th23Wj5Aavg3*b ztjnu$!P3lAi}0hpL*19u9ng24fYd@kp=)ft^|f3V$%_A~hiHC>Ib$48^H>78q?Vx& z8K~;%_}-Iy4D02#iQi)L9eU#(gQOUQ_>RsQBpY?Q+KP>#12<%4Jve|2XRE-3c*dQI z6*C)74Y(3cTLp8P*##y?Yc{IqGoH4+-E~APGhB)9`n2aQ=UT{lpktk*8iFVzge&q+ zV0RmoA}Kr%V^rk;ybH$HMV61W#-E-)tyG#)ql%ePTnJDV3VgprfFV>3!JRY}F9@T~ zYj81``Xb=K4;9bTP3gn;z>xrie+@>|h8javDYD|NohHceWUQaljkeq7v;NY!y<>`?D{nM}da-gE7@@t96tKk}h7S84hK6 z`0;(Ah<2RiFXideiN8TFyy|e%@C#|w!d%V6B0iiBLAR=@e8J}gbZl1 zxL3Rc=n(h10+{qsvK^=kTT6~$Z`*uEGrGS87A&JAIpv&c(d8T8=!v^wM{#>oJ13{fGK(vc)M9|mG`b>qOSH^+ zFcB=vC!1O*a!^F_wndztuOKN(r;$XYuo^rbOcU`_{Ke2Kmg0UwWZ$`r@RW^WzXS{l zlSPY!W3%`Mt-Gad-CN`BF{*@CwVr2@!GH*1cHWr*BnI4ChILS+uX$ARJfj7V52%n3$)y}(za z?(WB>E7GZY$F**CZV4*&NmQRB&@eQVkencF^<(|R`!Q?}=BVmPJU;pALn*qt#LU+Ua z3?0;agF=I)tZp>;^y#*V-74nm{zO_VFM>0CgZa7En_fx&#@Op)3M#$yFX4)%!LE9`~fGoK;ILbtXG427#pZJIxAayMT2Ctv33`g^XUNvIdV& zLX{cUy{i*bsh6Lmtg}e{+tejvBwTC5q*0UE!1}9lEy9axhN(*nC7stZgouz@d;7iH zS^=Q)nJFk7`BR&Lh_o3|mONS6yA09%&cID2@>u*dX<+L%+GFfH5LO(V6Ly%CtC72S zzr;qC>D9ouSmcKGm zS&Rh}xG?$oCcWM@5CjjM%E6KGz7tYc8KA04;AadGEgkL@z+DobGnOtp05R!z3l#(s z8xwP6pzhKW=DGZ}X{Ex4EJ+{K~5A~u9 zy)c0;Dk<~p8Z3tZU?-w%q;gHNM#cRaG)z-&9j&7A^Fggvrz;mH_aa0y0Q}1|?9I6t z1DoU|e{a?S^qAH|bwPkGoec(k4Hq1NtWdjXo)a_!=B&611LvfjSjKP)oY53vVxh8j z+UX}At!#kQh_TSPGFXbY4fSJBt2Sw^cZ_V@YzBc=1aSM$?&8-vhR8{-oe>Rc2?;b4 z3g>Npi@Qci!z{da2MajYuTm>)85OyD+rO5zfarR&rID{O)urKAN#(wgxl{*dfWfwB z5Kc|(C0;s1e{0Sg$3Ec8JKzGKfuoD6#z>6R`4f|8i>OB)s(WQhB4tMyVn?cl;OS$t z59MZCsEa!KqKx+KrZA&8D`NJJ<$nAJVBH z%dEllN&2Z%&^!UrTWA_ha_)GbBk1*Dneqp3zOV>@5Oo@B9Sz;1Lky2x$JX z;|x?|P){rn^5tLyGK5=MRU@3H4nA1d5${}#q;@N#79d{ni^^}#{JDHtOxm{+ALscToGPQ7ackTJIiQtXv zP2*)wPypxsbs)i>aGm!}9Pjaz0uRgu${ey?-lGgScf_v8;?!b}<zi5icr9I1 zweQhK_L_OQ+?4^?YS;w@eOax#okkM9B)S-o8RsHiB1e^UwWTk!0h(*Ps)2DJnBi98 zS~^8tyhI5h7O^*4Y?3$Y>c~N+VCFMhrgN%UN7>HggF*Y-8~<8}9KgMju`z_7XBpE3 zga3rMnCh9U)n(_)uSUTE-tV z0tc(RhFtuTJ9MfY)Fb9G4Yi-bTQ5FOgpHfM(Ae`lie8c~aBl|rNZ&~VPnhkRp{7vT zDVonZrBOEcV}hma*}|Gf$)KV&w%#{KH4EC*32P}%PjD}2_7o#hy7s2irl*qoXH3dJ z@S8a+35uhPUjx3>@S%Z{wNd9FqZBjW_-4z;3$epgoZ6G@Tm289a3BbR%HW_xq!d}Fh_RxXwK%lyB-2GwZ@TA4J-D)61 z=SbL%5;DsCIR4J28LcU__Mw-Rw-kxSY?{f9S5$>4MiKg-qWtnm zz>G$y3#z{c^k9#qKqbP)sf9y%K?s}UX(z#?En(A`rdxQO7g?(jI z9Np3`ZUck6OA?%626uM}2^J(UxCHkQ+=4^!00DwaaCditCqN*B1Rpe5&^tNbch0$Q za@V@O`p4{N)vjID)4P9kKlPNrjU72;Z%?aeYxLWALU|REOi+a3x?r7ju9vuqbo6G7 z-#VRW(+~p_{bhNp`xMjy_?IZsMWNe7@8GU-2?{LMI~OaI>=E1&I*AjZYp3 zAder&`R-~Me+su>%D+W^cwFs&4rG2$36+ermOc? z$Twm4UStQRiI+lXpN0xta5RcjLX(U}%BtsgRSpNs`er`*W9R8d-7THJPs|R2zfv;S zMHZw+J_@E)$pWg_MGJ&hBZH?g#=3V>8k(()X6Hd{gBWV)uOx__1rnbaWvjBrZe)yxLZWQ4{hY#6`bTnfHf5Tf#pUBgA7%0xY0OSL z77hn=8309FBNcXKrHwFx(MSz;ak89HfZ&_Jp4GdJ{Dq5y>{&gdmlO}jGh0{1QMb;Q z=e3xNSnJb9EgzBjL3P~?7KyNag7Ane=!-6FcgYh}O{Q>bY%{4SSA>)m*3aQbW(gJj zY;WxgzrB2#Tc1YR$%V>9fub!vuOndCG`OlHSdsSzC^?$QZiFN`PS+035%zD0@Tqdf6+Cw_U>5P31;k3jbSSP{=WapbWqPhqjD?wXHd(6^AGEddwLmrBGEfFg1Whq2%JTN&kpY$+H?NcZ`2wD*oczEdOG#iW39MXlkvoM8pJPBYtdXsB?cx`DGZMtd`JN zM$$LoTytNZA(fIyDh^($$XYLDfgh$t&oUImd0Kk`q>n^#tluS&F6${DB_*)4D5Z}e z!SHSgv3=E^roD1eTF6OeVq}xABJQ)pjtFMw|BSXB7B92K-}(q%FI`k(fV#k#Ysc93 zxv#L1*T%40)KY0+6f-Tl)m$df;5J$IKwtnrS3ygKHKvO!B*nMbftslEXP$>3=l#`A zjw8Logi|4n;_kXWFn)EK*mW?ztAU-gA}P6k?ef*lZa8*b_>cLy zv3XmKx+kLfd(#o0{m)2#am?fU<*bxd|Q2y}F*{_Gv@SVcS3PMo%>I}5@#E#kt8=6D~E;q%8){Ro5)FZK%I3k%9 ze-9e5LEfg*4&s+;7-$&TZmLg{_;$Nw(k0$?Th#t;#bS^-UhFDAKkt27OPcw^bHT^o z0llo>wWvxAo7QJkZzTLq4n8FT+4vThE`@)x?TCpwPq6qnH*N9*v@+zQXW{j7SB2x4 zt4ItQRSNAI6G68gSip!ujAhb!J0otILfo^@{1+AZDCuQb3>l$SFM)B({0|sU(lOuI zF@qcv&3J-39B|jo^gjDZe!}%UcoHNH5%lBbCT^-*ta(=x^8Ql8J8P$^{jNWuK9i2R zE$+?E$6A?_5mVY%Hd$H7WQ8{5aI;JEV-tx!sPNc#&Ww>9u29o^ts-R~>4tiX4l6*e!%1E-s6XlM(|t65*62=@y)Y(i$*DFU)|L6V_qI~sxT2hKLm5CAJnL@Pn@*v|XF%K+ z9Yto=;@B?xrPMDbJl>W!a`(;S5})aqtkhKWICs$bPOk`0T5|WKOJeI2KECv~fM7MG zki!IxZ?TtNglcnCtnNg;6>EF7sY45-;`Yt`DF!&vWb5+kfQffFArEFW+WJTsR6F`o zr@HW-wiC#LM{ulQc9iDo%XJMS?3(Q^>Or*$v9>dnsr=ps`L z_y+9Tq`UTK8|S_Yej`Rt>l0Ywkt6BiO?8Rmjr%M!O2y4xfAK-nX7>%5xm1KUQTYH` zJ^Ma0o6hBPSmqIrL$w`;6Ouv`L|(v~7L!X7*%(tvBtn(4vfp_k$KvD#OBdZb2_72M z*k7JD+&F)0&SJ#O;SO-TU%T=$35arPJL}0XX?MNwc*Zv@`p&zPy4A-H_4DoFrMFmx z;MKck(rnCY>=CQ1FI9kI#;MTRg3{R=51lT6A)*-vp-s_66HEd|KZJbWT^Pkp+CAe8 zFsTNyPO_db9S-uMsrGIHoAB(J+#) zF-lZhDXoOSf{98dvQgEp2Wx;LmiX~v(8t7jr`4NEMa>OOvPvU5J^5zzYEN*So zI^g!1hx655)lUFY#(~GBgViM`op4)@l~%{ zI)yQv$HJkzZ)**bu6q}0GQoPsqelUC1=3lw!+At?o>9`DNVCj47Lk@l(=7H+6!2J`j4M)Xht>#6XQvzA>*<+@}X4{KfY+0JU9rK=g6T> ztn=ImzR5cHE{XW!Ci?32I0{v#sB`SR&+Rho)E+xtEDP9m|e-r zK!S=T#DUenfvebBbJ)t7y9TB(xTC?mVK=pUjLWg7is$|O&S3byBR7}Hs&ZB(`!shj z?9(}AsimgoL$$=g)q#f_o#=WLcX|ybO{Ifi5s*#uQ>W$f=S+eAsNuqIicSl(zM(WZ zX@aMbO(l)J1mC1rliE?QyEHV&#oCwpR#{Xd)rT_YG}7O_w2oO)4`x$7nkBx|iJo0U z^A%ph{W4J5lt#SeDlEj+BD`AqV;k=(c+NH4N!q^$5Om6IUbb>D7jy3A>EZZ&TnG2P z5(E6LlfQr8yzTgBJl~Y_5^J(|bP15x?)o2U!eOKDPTW6pgmza_i^1s-brg+z2 znWflip-+=1@_=0k4%eg}K+SK%f%OEYrXEypEl{9l*}C~w3>anB`)K0u9Lf^WYSMiV zL;lYNuDSSAE~bY8t`5-AY>)6NBB)JU?Mf~hx9yyCJDGoTr<7MhNgZmBkv-u8i;O-N zc~~?Y{c7`P;!pdc?@e}tt$;-Q7UC`{{;sWJ{;GDA#oOyilKY0i2#(W)*S%F^UiB}& zid+a$P*rgW%)h5ngM<~1KZAByQZJ1<)HyadG#ttMI)oeW%@~+Vnp<+T=n2<`e~b0p z5MO>NnntxoquYk}qf@6``Sk7UJ)v*!prhSv2cG5m#y1Igx7(LQ(b(VoJjiJyg){;P zooPgwWynbEFJ)X8$JYyu7O#_P|DYYYdD1I%UNMKOs78K z-59j|qX3)gifS8HZWNDh(9G_XAEM1t;~}nm$D(a``zusIZF-Yi479UFc*cq1x`{Y_ z&h$X^eI0@s4~!9L(#Xb0LAj zY=mNHs``r^A;WbfrXAi1CziX~+lnVn_m}$sFW^Zhd})F1As1i#VZd$3Sv~ZI$^ZT; zeC4ERy?FiPYVv|#!gcqASkyl3q6~E|*wiUMsuNE)T zm$LnQF2O%Nw^u&5t?&p5CkH3#H+^Trc6<^|xUH(9!dng)&w@3{sZ6U#l0`ZEd@hCb zeSTLM^F8)ME9FFa zWmZd|6Kgt+Ln$PH5faSu6g{lybpK*W>Qve1F-M7xC9?(+RNU-*Qu8~+2A)c)XfFdN zEHcB6dvo)vc8~mvI@^aWvopPX#Twh5+eGvYN0S=cyW#mE!_R|Vep@Y@{IiOrTX8vc zCOxY71d}(*3JZ)_G(?O72|V^Mbf;>2$$+Ky+U}0xM?)pWzQ59PnQ#K1^}L5%wAYH) zud)1e$ZT=B^zs71*v)miJ4-U{<^A1_G^+SNohK4GAPQ%EiZW<9?L1Is%975mA3d&H zEYLj797h3vo4-K8@+unUUWq?{9dGMQG)8u%>FchLx4=(Ndh^__0L{N^_W;?vUT+c5 z=AB`dZblALy)2a;&b^OXromhlUO$GCV)MQvK@K|n>E_toFp5m+j3)Q`9V2lzcGhzz za9(h!<}bmf_v~bhY7I>JCA44cf^78cgsco0VP|s}%%9ZD>%2ZQrb>&D>NIfwc14C`*yMY%-8Sqb0|McgvwWC+q6RyC)7V=CsR8YmR9tFMpcTn)*0_#V*W3%StoHqIcKHYyjzGiAj1x{v2hJ%dAAIm#{efJRs&7Vo;w!Ik zI(sMgl$oyr|10ZbagOpM{bw#tw=QpvmHZB6^L6{v+GSGBz?z-B22Iu8b{l(F#Wm{V zJ5tlK_}eWTk7}^p!dEWlOikc8*5Cd2XE3Si`gX%6ZUmNp(QpFNLG3m-W`v0dS$-`QqD$h9%2>#)->;Vw^=qkOTFu@2WVnorJ~6Gf-*S8{~Jg>bYZ zj^cB45ag55WCzw?eIA+BiS=tp1JM3#W6c#NT-9n02~)~?ZP zv^vqR%cqSOTjPAv%vk|`w&cw-XSCJ~->{A$!{71ReR0bOMpYCe^LbcApQI^le9j6I zmf-`2Xojc5Wc6up@r27!a(dAw1G`$p=A*eR2W2DjxH4`o}KZ1k@s ziZY*t;)J@B5~J3ud%=Ra7y3`?R<1gkaUqjvOSGJ=Hx@kjZAnNZLm_rUG;%zYXi+b@ zo_+eR5eMBv|0ELg&BuD3bF3<0CE$J7p=z&fDjI%<3fh7;XwIa9m?+J?)trz;iwY;q z0h8|YXCq0WbfN?8h?mrQi}w@@S4uJC*oc>4J*_>Sgw3t&1IVp`9YS~7Zw^PUh!z4D z2FF`Z1x&GK-PGVp-CPl_mJn~P{xMgkjo`a{28!zTsn{XpT~rqql;c&OkZ#Sc#mGt#Q$!%DueKKB-YMo>dh_@;9w=hfA&=Pxtbs@m0%!YjQ8?wdPl zU*gfrob(s=#NrevN55k09T3aowxRgCiqVVQiG^pjkOGlDlOiC55=U#5^YWnR<{hBN zdDyar9VE=N|CqDMd1vEnj#S0xEib!BaE``>WGX{g(Rs$t(Ose3Bfks7YOPu=VANRBN98psR_zj=1Us4MGGphe z&vlxjfgR#XROfYPH_s^={GCMW7k3YP$iiu}Dn}XZI&9R`Pvl))W8!lC znkizMPv1yyIFpQfZf%J_n%ISb4kcNWKo)8QHHDE14y3=7vS0ct>vHD2w_;$~XL_MP zPo(@d6dWRk=ECTn83(RQ2BcwtXU#7P^DWAmX+>!XYSb0Wsvpy0@(Te*Wk|qPBY3d_ z!$i}DsAwF--(9-mmjkMcU|jbvj|KQ8;~VbH;i-7tj73Z_9lg1wFs}3kVb{6YViU?b zy5tF`lj;rSmZlF!3le!}@XXAiig?L;$rDNVLQjIo8-Mvbg@>u)!_i5Al@*Da*s0yE z9;`kY_G>4(#pfQu|DDMYPlLU9=%&|ae^=7gw_JE7Izr%^&Po0_U`;OIr?1%R)m4lg zcBoRV;NJ0F$jwID_e84;#W;-R95<<%yHhF~g*7#T>dlTTlN)L2gcg*iGm=-b{mbY% z0cTNfoZ4NkM}=1dlZKJ_$kq%J0R<8Go?7{rxI!kvRCaoKY0e44FhKy*bRp7;#u>{} zn15Vn@8x=B=L|X)_>Tu)gl{@&JD0XF>xal(s$oVpWeVj=Is* zcgdpAj01}!-$2N7CXRFzL>o^mZt!-?lFWBEkxeMtBcm#6;a?M9bKJidqCl-n7oOLf zx(r}AQRwB|RV&ITyQ&O2!bqQWS;(C1={vS*N0E#Y-0)JSYRTYBDEz^F`sn$jnnfz_ zQLR^P_VQ~->Z-_qS+DjZci7FUrSH@y8$tu@I$z3c`Lt;-0*Q?pYD^cl%I>7qZ&6+e z`QGFbDheA(SDcIS0b!Cuh)?9QaW;`D1xW)I#KKZ9A3K5TGA72&$E(tv7V?}IpyZe6 z^du5-nvqq}U5l86d7WKI1JS0w$w+bg(+)P^aZhBbP=DMyAuO1uegmC5PQB=zf80t5 z`5k(7U^R9yyH7K)^ZZId40Uo)bJhIVPdRAQ5CtHu-LC}iS07Adv!Jtmbd;p~I78W7 z0YsecdDS_XWnKx=(~sk?)7>Old+kZrM%&r8*zxMg4Vhjq-sWD5VY99yZPSO0(T<7i z4ZTz9D~)^m`*;~Sibvz*U;6g4kl)`4>!6_0lNFx<7A5Jo9Z_3ANo^eySaF|bPA0DU*Yo7O?U=58wkNMgQN?R|Ou|ROkXs$(fFT|Z z)O$xUs2XZAeFZMxwdm^<*X(Eh`fU5ToXW?6U8B6^Gq9k{J0042uDYeN_Z$$&u@>bN zqRm}yu&e@=h%+XYfX@IYrjbp28Bi~`wycjb5M0mOQ#LXyq7ZLeQl^$#pny>3tid~u z8;@C}MSAjDN|Xz95adXyZd+71LQE}^J`zh?kP&C8c2teYE(#y(a=tydFetUVyQC6T zs(yWX*0=|3`GqXS-QiH=mGrf5W!S}^BDKQt)lPT$O;9fAGqMs}^DL?kN#&OCJy0i? z2~_T&0bwAIb=YwqsVwXMF**q3Yt5Tw6HUn+kI9cqDI^-dx7otW5S+3v{h^=He2-{7 zd?)&3cfO`}`>QjpuqP67dM6u*W)*&{sq>Nu$u!zlpo5}8oeM*(rjU`OL`?f*YO^d>NwW!oRcz;No|ReYr(hk zpnW?Pj(dTCZ^yO{_jV7Q437m$jPjtb4b25iosCdeu~*QXlBtt0qh_h8G5drT-D$kh zTATQLvD~f|1IwSK(1vITQ!+z!Wb&z3lV96gx<(i+VwEHP@O5>U9a7F+9#qk+E? zYBy`lr{59_6c)r=`bnH06T)lycCb-E1W|$(IOkIPiK`{{)Ns9%I=`zb;f7;X6ZsfR zt-Xx8DK6Z4jVNI?{Y^DH13C**dE&Y|LBbh-)Sd$R16^Mjs&ikGJd#6R@Y?zVLAa_v zRygJZ18y2n>NRcn7}D0Ws5Qn1aa{ZkVBaRf{1j;!=?5OtrwY^Nb;-~*s3clniX>K_ z4r;>7h&8*wmMF94t*(f*rG1dtEzI1z|F^kPpl4|Cn$`m&?$7JBbdnzYRc1i<-Sr!ZGgk$=0>SIk>=FihIf9zmJ{20;D?4j2x&d-GmZkCDS<>- ze4|VY#nQJt82dG_nI6S&_otw&=qEP_oi@IQuJ;g{O)=

Z7xxrKBR!*&_Zf?%A}| zoyI9Ll`5hOtLWMi?)ho}2Yq#$ZD`yTqj6YMPmlY>GbR`3$-V%sP(?zb=Zx85XXMqY zU;C=36ak5r#;7va21-vg-ecJE+#UcA)tL$aSQ$_;RbYWG5;-t)iA-uQP}aaeNw6nw zzRvkQ>IavXa>M9%N#v+Dptx;lX#3RUC6$anK;{{x3|mUV2N~xAni+;GRVajIhgdG* z3lm9jE2x{$NB_luOg*9QwddP2B8xyL;_)BvmO}JHi63ZbfDiPX5!y&>;&&LBb^~qG zeF-0e`*Rnskn3@0M3myC%`jDNRu5VIbCXCn&AlmI*9dFF&sQv5u4(LIk7$Kf=FUTp zn1{B9)-**#&L}-S_Y(Fn`2BGtBa0a6@3FJiO@MSxXikze+57h4MSeEO1-eQxzg0MA zGq-)Qt#cXRn7^E{SP8mzZX6toj-r(jo~q zOY~uMJl7XKTh{_UZ6_BRFojme@4LP%E-%)D6iv=1q8(4v$8?qy9d;jMiugU6@{g`K zAth7oM46N~@oYBich1?R+*BRAE94=i3w89T;A5rsbDVq(GqpK=UDg}INIP_M0!$9~ za>r1-ijyWR^7Zzv_f{3WpI%#1a;&E2Ndwq38-8q9Gj&hqq4)%h$a^zp-^;$HyN1E2 zsf1Zf3|v5- z?rsgJ&QvUS;M$CHoqcMq?6KK3`aUJqUieP$Ti2CPb+I{MA&vG%Jl1qnHg9s)`x8&U z%!6dkfNE~vS(N+h;0NciU||T!SC*&z9h73pahL|(I;?5YHF#EiXAEfW!{Mg@|SzeH^fbhR*-;m$hg1=E#b*MRm z=5q#p23`<1;si5jc|cv=Uo$+FQ;>CXbhq?&XZz2je=FdznnP_}ofvqzdHJ|O3`VRr z?(WVaJUstGtzDtcHn!$&+)l36Mr{9``|qj>Ryh$ch?gG(0f9k$5HOg}00er1IR9h$ zo9+E?j_*@fCkqdAOIL<}#ruqbN7mI6>h6RX|BpJ~|BP*&9OapRpV24viQHp z|CR9bcD1y^L6E(La6tb#7zFtF5#FsB{s-gZgYY9L@xKoS$N$AZ2-Y>i^*=FT0U<<@ z#Xm6+2tm>Q0}~M7|7)&*pfDoKf6f&aK+u-|#DsVe6zG50fx$w&h=PxQU=UtG(BEqa zgZ|1xSOEN|KajAXz@Hc|h@bzj+=PV@^z?tOi?I7$Qt*#hctL^!e_jJ#UJ&Tdn0XOO ziGSt`1`7)Rxds@*_xr#8#~NU;u;8CDBkXwpvV#a9)ENJm3lS9lYh6CX-S~$cgjYc5 zPk#{b-?2b=h5pJHVfR-o5b)ozK)^zOuL0rxEnWJLeEIlzh5p9Cpg*qxA0HS|vhe3z zL?-{m++CrzuPj}0ers4hw|$M+GYGlLXHHJ;48QL*L|qY&vZEDZXaDbSIfTrp7}(s5 z*UB7mn+TafAy!Zj$jlsp2!SDB3qC#}ehUel|Gx>MU;(iW-P{q6mVa$RKExr6^XQSh IrUK6Y0c|9^AOHXW literal 0 HcmV?d00001 diff --git a/doc/chain-self-management-sketch.org b/doc/chain-self-management-sketch.org new file mode 100644 index 0000000..07cfd41 --- /dev/null +++ b/doc/chain-self-management-sketch.org @@ -0,0 +1,672 @@ +-*- mode: org; -*- +#+TITLE: Machi Chain Self-Management Sketch +#+AUTHOR: Scott +#+STARTUP: lognotedone hidestars indent showall inlineimages +#+SEQ_TODO: TODO WORKING WAITING DONE + +* Abstract +Yo, this is the first draft of a document that attempts to describe a +proposed self-management algorithm for Machi's chain replication. +Welcome! Sit back and enjoy the disjointed prose. + +We attempt to describe first the self-management and self-reliance +goals of the algorithm. Then we make a side trip to talk about +write-once registers and how they're used by Machi, but we don't +really fully explain exactly why write-once is so critical (why not +general purpose registers?) ... but they are indeed critical. Then we +sketch the algorithm by providing detailed annotation of a flowchart, +then let the flowchart speak for itself, because writing good prose is +prose is damn hard, but flowcharts are very specific and concise. + +Finally, we try to discuss the network partition simulator that the +algorithm runs in and how the algorithm behaves in both symmetric and +asymmetric network partition scenarios. The symmetric partition cases +are all working well (surprising in a good way), and the asymmetric +partition cases are working well (in a damn mystifying kind of way). +It'd be really, *really* great to get more review of the algorithm and +the simulator. + +* Copyright +%% Copyright (c) 2015 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +* TODO Naming: possible ideas +** Humming consensus? + +See [[https://tools.ietf.org/html/rfc7282][On Consensus and Humming in the IETF]], RFC 7282. + +** Tunesmith? + +A mix of orchestral conducting, music composition, humming? + +** Foggy consensus? + +CORFU-like consensus between mist-shrouded islands of network +partitions + +** Rough consensus + +This is my favorite, but it might be too close to handwavy/vagueness +of English language, even with a precise definition and proof +sketching? + +** Let the bikeshed continue! + +I agree with Chris: there may already be a definition that's close +enough to "rough consensus" to continue using that existing tag than +to invent a new one. TODO: more research required + +* What does "self-management" mean in this context? + +For the purposes of this document, chain replication self-management +is the ability for the N nodes in an N-length chain replication chain +to manage the state of the chain without requiring an external party +to participate. Chain state includes: + +1. Preserve data integrity of all data stored within the chain. Data + loss is not an option. +2. Stably preserve knowledge of chain membership (i.e. all nodes in + the chain, regardless of operational status). A systems + administrators is expected to make "permanent" decisions about + chain membership. +3. Use passive and/or active techniques to track operational + state/status, e.g., up, down, restarting, full data sync, partial + data sync, etc. +4. Choose the run-time replica ordering/state of the chain, based on + current member status and past operational history. All chain + state transitions must be done safely and without data loss or + corruption. +5. As a new node is added to the chain administratively or old node is + restarted, add the node to the chain safely and perform any data + synchronization/"repair" required to bring the node's data into + full synchronization with the other nodes. + +* Goals +** Better than state-of-the-art: Chain Replication self-management + +We hope/believe that this new self-management algorithem can improve +the current state-of-the-art by eliminating all external management +entities. Current state-of-the-art for management of chain +replication chains is discussed below, to provide historical context. + +*** "Leveraging Sharding in the Design of Scalable Replication Protocols" by Abu-Libdeh, van Renesse, and Vigfusson. + +Multiple chains are arranged in a ring (called a "band" in the paper). +The responsibility for managing the chain at position N is delegated +to chain N-1. As long as at least one chain is running, that is +sufficient to start/bootstrap the next chain, and so on until all +chains are running. (The paper then estimates mean-time-to-failure +(MTTF) and suggests a "band of bands" topology to handle very large +clusters while maintaining an MTTF that is as good or better than +other management techniques.) + +If the chain self-management method proposed for Machi does not +succeed, this paper's technique is our best fallback recommendation. + +*** An external management oracle, implemented by ZooKeeper + +This is not a recommendation for Machi: we wish to avoid using ZooKeeper. +However, many other open and closed source software products use +ZooKeeper for exactly this kind of data replica management problem. + +*** An external management oracle, implemented by Riak Ensemble + +This is a much more palatable choice than option #2 above. We also +wish to avoid an external dependency on something as big as Riak +Ensemble. However, if it comes between choosing Riak Ensemble or +choosing ZooKeeper, the choice feels quite clear: Riak Ensemble will +win, unless there is some critical feature missing from Riak +Ensemble. If such an unforseen missing feature is discovered, it +would probably be preferable to add the feature to Riak Ensemble +rather than to use ZooKeeper (and document it and provide product +support for it and so on...). + +** Support both eventually consistent & strongly consistent modes of operation + +Machi's first use case is for Riak CS, as an eventually consistent +store for CS's "block" storage. Today, Riak KV is used for "block" +storage. Riak KV is an AP-style key-value store; using Machi in an +AP-style mode would match CS's current behavior from points of view of +both code/execution and human administrator exectations. + +Later, we wish the option of using CP support to replace other data +store services that Riak KV provides today. (Scope and timing of such +replacement TBD.) + +We believe this algorithm allows a Machi cluster to fragment into +arbitrary islands of network partition, all the way down to 100% of +members running in complete network isolation from each other. +Furthermore, it provides enough agreement to allow +formerly-partitioned members to coordinate the reintegration & +reconciliation of their data when partitions are healed. + +** Preserve data integrity of Chain Replicated data + +While listed last in this section, preservation of data integrity is +paramount to any chain state management technique for Machi. + +** Anti-goal: minimize churn + +This algorithm's focus is data safety and not availability. If +participants have differing notions of time, e.g., running on +extremely fast or extremely slow hardware, then this algorithm will +"churn" in different states where the chain's data would be +effectively unavailable. + +In practice, however, any series of network partition changes that +case this algorithm to churn will cause other management techniques +(such as an external "oracle") similar problems. [Proof by handwaving +assertion.] See also: "time model" assumptions (below). + +* Assumptions +** Introduction to assumptions, why they differ from other consensus algorithms + +Given a long history of consensus algorithms (viewstamped replication, +Paxos, Raft, et al.), why bother with a slightly different set of +assumptions and a slightly different protocol? + +The answer lies in one of our explicit goals: to have an option of +running in an "eventually consistent" manner. We wish to be able to +make progress, i.e., remain available in the CAP sense, even if we are +partitioned down to a single isolated node. VR, Paxos, and Raft +alone are not sufficient to coordinate service availability at such +small scale. + +** The CORFU protocol is correct + +This work relies tremendously on the correctness of the CORFU +protocol, a cousin of the Paxos protocol. If the implementation of +this self-management protocol breaks an assumption or prerequisite of +CORFU, then we expect that the implementation will be flawed. + +** Communication model: Asyncronous message passing +*** Unreliable network: messages may be arbitrarily dropped and/or reordered +**** Network partitions may occur at any time +**** Network partitions may be asymmetric: msg A->B is ok but B->A fails +*** Messages may be corrupted in-transit +**** Assume that message MAC/checksums are sufficient to detect corruption +**** Receiver informs sender of message corruption +**** Sender may resend, if/when desired +*** System particpants may be buggy but not actively malicious/Byzantine +** Time model: per-node clocks, loosely synchronized (e.g. NTP) + +The protocol & algorithm presented here do not specify or require any +timestamps, physical or logical. Any mention of time inside of data +structures are for human/historic/diagnostic purposes only. + +Having said that, some notion of physical time is suggested for +purposes of efficiency. It's recommended that there be some "sleep +time" between iterations of the algorithm: there is no need to "busy +wait" by executing the algorithm as quickly as possible. See below, +"sleep intervals between executions". + +** Failure detector model: weak, fallible, boolean + +We assume that the failure detector that the algorithm uses is weak, +it's fallible, and it informs the algorithm in boolean status +updates/toggles as a node becomes available or not. + +If the failure detector is fallible and tells us a mistaken status +change, then the algorithm will "churn" the operational state of the +chain, e.g. by removing the failed node from the chain or adding a +(re)started node (that may not be alive) to the end of the chain. +Such extra churn is regrettable and will cause periods of delay as the +"rough consensus" (decribed below) decision is made. However, the +churn cannot (we assert/believe) cause data loss. + +** The "wedge state", as described by the Machi RFC & CORFU + +A chain member enters "wedge state" when it receives information that +a newer projection (i.e., run-time chain state reconfiguration) is +available. The new projection may be created by a system +administrator or calculated by the self-management algorithm. +Notification may arrive via the projection store API or via the file +I/O API. + +When in wedge state, the server/FLU will refuse all file write I/O API +requests until the self-management algorithm has determined that +"rough consensus" has been decided (see next bullet item). The server +may also refuse file read I/O API requests, depending on its CP/AP +operation mode. + +See the Machi RFC for more detail of the wedge state and also the +CORFU papers. + +** "Rough consensus": consensus built upon data that is *visible now* + +CS literature uses the word "consensus" in the context of the problem +description at +[[http://en.wikipedia.org/wiki/Consensus_(computer_science)#Problem_description]]. +This traditional definition differs from what is described in this +document. + +The phrase "rough consensus" will be used to describe +consensus derived only from data that is visible/known at the current +time. This implies that a network partition may be in effect and that +not all chain members are reachable. The algorithm will calculate +"rough consensus" despite not having input from all/majority/minority +of chain members. "Rough consensus" may proceed to make a +decision based on data from only a single participant, i.e., the local +node alone. + +When operating in AP mode, i.e., in eventual consistency mode, "rough +consensus" could mean that an chain of length N could split into N +independent chains of length 1. When a network partition heals, the +rough consensus is sufficient to manage the chain so that each +replica's data can be repaired/merged/reconciled safely. +(Other features of the Machi system are designed to assist such +repair safely.) + +When operating in CP mode, i.e., in strong consistency mode, "rough +consensus" would require additional supplements. For example, any +chain that didn't have a minimum length of the quorum majority size of +all members would be invalid and therefore would not move itself out +of wedged state. In very general terms, this requirement for a quorum +majority of surviving participants is also a requirement for Paxos, +Raft, and ZAB. + +(Aside: The Machi RFC also proposes using "witness" chain members to +make service more available, e.g. quorum majority of "real" plus +"witness" nodes *and* at least one member must be a "real" node. See +the Machi RFC for more details.) + +** Heavy reliance on a key-value store that maps write-once registers + +The projection store is implemented using "write-once registers" +inside a key-value store: for every key in the store, the value must +be either of: + +- The special 'unwritten' value +- An application-specific binary blob that is immutable thereafter + +* The projection store, built with write-once registers + +- NOTE to the reader: The notion of "public" vs. "private" projection + stores does not appear in the Machi RFC. + +Each participating chain node has its own "projection store", which is +a specialized key-value store. As a whole, a node's projection store +is implemented using two different key-value stores: + +- A publicly-writable KV store of write-once registers +- A privately-writable KV store of write-once registers + +Both stores may be read by any cluster member. + +The store's key is a positive integer; the integer represents the +epoch number of the projection. The store's value is an opaque +binary blob whose meaning is meaningful only to the store's clients. + +See the Machi RFC for more detail on projections and epoch numbers. + +** The publicly-writable half of the projection store + +The publicly-writable projection store is used to share information +during the first half of the self-management algorithm. Any chain +member may write a projection to this store. + +** The privately-writable half of the projection store + +The privately-writable projection store is used to store the "rough +consensus" result that has been calculated by the local node. Only +the local server/FLU may write values into this store. + +The private projection store serves multiple purposes, including: + +- remove/clear the local server from "wedge state" +- act as the store of record for chain state transitions +- communicate to remote nodes the past states and current operational + state of the local node + +* Modification of CORFU-style epoch numbering and "wedge state" triggers + +According to the CORFU research papers, if a server node N or client +node C believes that epoch E is the latest epoch, then any information +that N or C receives from any source that an epoch E+delta (where +delta > 0) exists will push N into the "wedge" state and C into a mode +of searching for the projection definition for the newest epoch. + +In the algorithm sketch below, it should become clear that it's +possible to have a race where two nodes may attempt to make proposals +for a single epoch number. In the simplest case, assume a chain of +nodes A & B. Assume that a symmetric network partition between A & B +happens, and assume we're operating in AP/eventually consistent mode. + +On A's network partitioned island, A can choose a UPI list of `[A]'. +Similarly B can choose a UPI list of `[B]'. Both might choose the +epoch for their proposal to be #42. Because each are separated by +network partition, neither can realize the conflict. However, when +the network partition heals, it can become obvious that there are +conflicting values for epoch #42 ... but if we use CORFU's protocol +design, which identifies the epoch identifier as an integer only, then +the integer 42 alone is not sufficient to discern the differences +between the two projections. + +The proposal modifies all use of CORFU's projection identifier +to use the identifier below instead. (A later section of this +document presents a detailed example.) + +#+BEGIN_SRC +{epoch #, hash of the entire projection (minus hash field itself)} +#+END_SRC + +* Sketch of the self-management algorithm +** Introduction +See also, the diagram (((Diagram1.eps))), a flowchart of the +algorithm. The code is structured as a state machine where function +executing for the flowchart's state is named by the approximate +location of the state within the flowchart. The flowchart has three +columns: + +1. Column A: Any reason to change? +2. Column B: Do I act? +3. Column C: How do I act? + +States in each column are numbered in increasing order, top-to-bottom. + +** Flowchart notation +- Author: a function that returns the author of a projection, i.e., + the node name of the server that proposed the projection. + +- Rank: assigns a numeric score to a projection. Rank is based on the + epoch number (higher wins), chain length (larger wins), number & + state of any repairing members of the chain (larger wins), and node + name of the author server (as a tie-breaking criteria). + +- E: the epoch number of a projection. + +- UPI: "Update Propagation Invariant". The UPI part of the projection + is the ordered list of chain members where the UPI is preserved, + i.e., all UPI list members have their data fully synchronized + (except for updates in-process at the current instant in time). + +- Repairing: the ordered list of nodes that are in "repair mode", + i.e., synchronizing their data with the UPI members of the chain. + +- Down: the list of chain members believed to be down, from the + perspective of the author. This list may be constructed from + information from the failure detector and/or by status of recent + attempts to read/write to other nodes' public projection store(s). + +- P_current: local node's projection that is actively used. By + definition, P_current is the latest projection (i.e. with largest + epoch #) in the local node's private projection store. + +- P_newprop: the new projection proposal that is calculated locally, + based on local failure detector info & other data (e.g., + success/failure status when reading from/writing to remote nodes' + projection stores). + +- P_latest: this is the highest-ranked projection with the largest + single epoch # that has been read from all available public + projection stores, including the local node's public store. + +- Unanimous: The P_latest projections are unanimous if they are + effectively identical. Minor differences such as creation time may + be ignored, but elements such as the UPI list must not be ignored. + NOTE: "unanimous" has nothing to do with the number of projections + compared, "unanimous" is *not* the same as a "quorum majority". + +- P_current -> P_latest transition safe?: A predicate function to + check the sanity & safety of the transition from the local node's + P_current to the P_newprop, which must be unanimous at state C100. + +- Stop state: one iteration of the self-management algorithm has + finished on the local node. The local node may execute a new + iteration at any time. + +** Column A: Any reason to change? +*** A10: Set retry counter to 0 +*** A20: Create a new proposed projection based on the current projection +*** A30: Read copies of the latest/largest epoch # from all nodes +*** A40: Decide if the local proposal P_newprop is "better" than P_latest +** Column B: Do I act? +*** B10: 1. Is the latest proposal unanimous for the largest epoch #? +*** B10: 2. Is the retry counter too big? +*** B10: 3. Is another node's proposal "ranked" equal or higher to mine? +** Column C: How to act? +*** C1xx: Save latest proposal to local private store, unwedge, stop. +*** C2xx: Ping author of latest to try again, then wait, then repeat alg. +*** C3xx: My new proposal appears best: write @ all public stores, repeat alg + +** Flowchart notes +*** Algorithm execution rates / sleep intervals between executions + +Due to the ranking algorithm's preference for author node names that +are small (lexicographically), nodes with smaller node names should +execute the algorithm more frequently than other nodes. The reason +for this is to try to avoid churn: a proposal by a "big" node may +propose a UPI list of L at epoch 10, and a few moments later a "small" +node may propose the same UPI list L at epoch 11. In this case, there +would be two chain state transitions: the epoch 11 projection would be +ranked higher than epoch 10's projeciton. If the "small" node +executed more frequently than the "big" node, then it's more likely +that epoch 10 would be written by the "small" node, which would then +cause the "big" node to stop at state A40 and avoid any +externally-visible action. + +*** Transition safety checking + +In state C100, the transition from P_current -> P_latest is checked +for safety and sanity. The conditions used for the check include: + +1. The Erlang data types of all record members are correct. +2. UPI, down, & repairing lists contain no duplicates and are in fact + mutually disjoint. +3. The author node is not down (as far as we can tell). +4. Any additions in P_latest in the UPI list must appear in the tail + of the UPI list and were formerly in P_current's repairing list. +5. No re-ordering of the UPI list members: P_latest's UPI list prefix + must be exactly equal to P_current's UPI prefix, and any P_latest's + UPI list suffix must in the same order as they appeared in + P_current's repairing list. + +The safety check may be performed pair-wise once or pair-wise across +the entire history sequence of a server/FLU's private projection +store. + +*** A simple example race between two participants noting a 3rd's failure + +Assume a chain of three nodes, A, B, and C. In a projection at epoch +E. For all nodes, the P_current projection at epoch E is: + +#+BEGIN_QUOTE +UPI=[A,B,C], Repairing=[], Down=[] +#+END_QUOTE + +Now assume that C crashes during epoch E. The failure detector +running locally at both A & B eventually notice C's death. The new +information triggers a new iteration of the self-management algorithm. +A calculates its P_newprop (call it P_newprop_a) and writes it to its +own public projection store. Meanwhile, B does the same and wins the +race to write P_newprop_b to its own public projection store. + +At this instant in time, the public projection stores of each node +looks something like this: + +|-------+--------------+--------------+--------------| +| Epoch | Node A | Node B | Node C | +|-------+--------------+--------------+--------------| +| E | UPI=[A,B,C] | UPI=[A,B,C] | UPI=[A,B,C] | +| | Repairing=[] | Repairing=[] | Repairing=[] | +| | Down=[] | Down=[] | Down=[] | +| | Author=A | Author=A | Author=A | +|-------+--------------+--------------+--------------| +| E+1 | UPI=[A,B] | UPI=[A,B] | C is dead, | +| | Repairing=[] | Repairing=[] | unwritten | +| | Down=[C] | Down=[C] | | +| | Author=A | Author=B | | +|-------+--------------+--------------+--------------| + +If we use the CORFU-style projection naming convention, where a +projection's name is exactly equal to the epoch number, then all +participants cannot tell the difference between the projection at +epoch E+1 authored by node A from the projection at epoch E+1 authored +by node B: the names are the same, i.e., E+1. + +Machi must extend the original CORFU protocols by changing the name of +the projection. In Machi's case, the projection is named by this +2-tuple: +#+BEGIN_SRC +{epoch #, hash of the entire projection (minus hash field itself)} +#+END_SRC + +This name is used in all relevant APIs where the name is required to +make a wedge state transition. In the case of the example & table +above, all of the UPI & Repairing & Down lists are equal. However, A +& B's unanimity is due to the symmetric nature of C's partition: C is +dead. In the case of an asymmetric partition of C, it is indeed +possible for A's version of epoch E+1's UPI list to be different from +B's UPI list in the same epoch E+1. + +*** A second example, building on the first example + +Building on the first example, let's assume that A & B have reconciled +their proposals for epoch E+2. Nodes A & B are running under a +unanimous proposal at E+2. + +|-------+--------------+--------------+--------------| +| E+2 | UPI=[A,B] | UPI=[A,B] | C is dead, | +| | Repairing=[] | Repairing=[] | unwritten | +| | Down=[C] | Down=[C] | | +| | Author=A | Author=A | | +|-------+--------------+--------------+--------------| + +Now assume that C restarts. It was dead for a little while, and its +code is slightly buggy. Node C decides to make a proposal without +first consulting its failure detector: let's assume that C believes +that only C is alive. Also, C knows that epoch E was the last epoch +valid before it crashed, so it decides that it will write its new +proposal at E+2. The result is a set of public projection stores that +look like this: + +|-----+--------------+--------------+--------------| +| E+2 | UPI=[A,B] | UPI=[A,B] | UPI=[C] | +| | Repairing=[] | Repairing=[] | Repairing=[] | +| | Down=[C] | Down=[C] | Down=[A,B] | +| | Author=A | Author=A | | +|-----+--------------+--------------+--------------| + +Now we're in a pickle where a client C could read the latest +projection from node C and get a different view of the world than if +it had read the latest projection from nodes A or B. + +If running in AP mode, this wouldn't be a big problem: a write to node +C only (or a write to nodes A & B only) would be reconciled +eventually. Also, eventually, one of the nodes would realize that C +was no longer partitioned and would make a new proposal at epoch E+3. + +If running in CP mode, then any client that attempted to use C's +version of the E+2 projection would fail: the UPI list does not +contain a quorum majority of nodes. (Other discussion of CP mode's +use of quorum majority for UPI members is out of scope of this +document. Also out of scope is the use of "witness servers" to +augment the quorum majority UPI scheme.) + +* The Simulator +** Overview +The function machi_chain_manager1_test:convergence_demo_test() +executes the following in a simulated network environment within a +single Erlang VM: + +#+BEGIN_QUOTE +Test the convergence behavior of the chain self-management algorithm +for Machi. + + 1. Set up 4 FLUs and chain manager pairs. + + 2. Create a number of different network partition scenarios, where + (simulated) partitions may be symmetric or asymmetric. (At the + Seattle 2015 meet-up, I called this the "shaking the snow globe" + phase, where asymmetric network partitions are simulated and are + calculated at random differently for each simulated node. During + this time, the simulated network is wildly unstable.) + + 3. Then halt changing the partitions and keep the simulated network + stable. The simulated may remain broken (i.e. at least one + asymmetric partition remains in effect), but at least it's + stable. + + 4. Run a number of iterations of the algorithm in parallel by poking + each of the manager processes on a random'ish basis to simulate + the passage of time. + + 5. Afterward, fetch the chain transition histories made by each FLU + and verify that no transition was ever unsafe. +#+END_QUOTE + + +** Behavior in symmetric network partitions + +The simulator has yet to find an error. This is both really cool and +really terrifying: is this *really* working? No, seriously, where are +the bugs? Good question. Both the algorithm and the simulator need +review and futher study. + +In fact, it'd be awesome if I could work with someone who has more +TLA+ experience than I do to work on a formal specification of the +self-management algorithm and verify its correctness. + +** Behavior in asymmetric network partitions + +The simulator's behavior during stable periods where at least one node +is the victim of an asymmetric network partition is ... weird, +wonderful, and something I don't completely understand yet. This is +another place where we need more eyes reviewing and trying to poke +holes in the algorithm. + +In cases where any node is a victim of an asymmetric network +partition, the algorithm oscillates in a very predictable way: each +node X makes the same P_newprop projection at epoch E that X made +during a previous recent epoch E-delta (where delta is small, usually +much less than 10). However, at least one node makes a proposal that +makes unanimous results impossible. When any epoch E is not +unanimous, the result is one or more new rounds of proposals. +However, because any node N's proposal doesn't change, the system +spirals into an infinite loop of never-fully-unanimous proposals. + +From the sole perspective of any single participant node, the pattern +of this infinite loop is easy to detect. When detected, the local +node moves to a slightly different mode of operation: it starts +suspecting that a "proposal flapping" series of events is happening. +(The name "flap" is taken from IP network routing, where a "flapping +route" is an oscillating state of churn within the routing fabric +where one or more routes change, usually in a rapid & very disruptive +manner.) + +If flapping is suspected, then the count of number of flap cycles is +counted. If the local node sees all participants (including itself) +flappign with the same relative proposed projection for 5 times in a +row, then the local node has firm evidence that there is an asymmetric +network partition somewhere in the system. The pattern of proposals +is analyzed, and the local node makes a decision: + +1. The local node is directly affected by the network partition. The + result: stop making new projection proposals until the failure + detector belives that a new status change has taken place. + +2. The local node is not directly affected by the network partition. + The result: continue participating in the system by continuing new + self-management algorithm iterations. + +After the asymmetric partition victims have "taken themselves out of +the game" temporarily, then the remaining participants rapidly +converge to rough consensus and then a visibly unanimous proposal. +For as long as the network remains partitioned but stable, any new +iteration of the self-management algorithm stops without +externally-visible effects. (I.e., it stops at the bottom of the +flowchart's Column A.) + From 72bf329e1c86d2d49fea0d0c318de1cf098250b5 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 14:57:24 +0900 Subject: [PATCH 03/70] Add fledgling log implementation based on CORFU papers --- prototype/corfurl/.gitignore | 4 + prototype/corfurl/Makefile | 23 + prototype/corfurl/include/corfurl.hrl | 26 ++ prototype/corfurl/src/corfurl.erl | 311 +++++++++++++ prototype/corfurl/src/corfurl_flu.erl | 471 ++++++++++++++++++++ prototype/corfurl/src/corfurl_sequencer.erl | 123 +++++ prototype/corfurl/src/corfurl_util.erl | 36 ++ 7 files changed, 994 insertions(+) create mode 100644 prototype/corfurl/.gitignore create mode 100644 prototype/corfurl/Makefile create mode 100644 prototype/corfurl/include/corfurl.hrl create mode 100644 prototype/corfurl/src/corfurl.erl create mode 100644 prototype/corfurl/src/corfurl_flu.erl create mode 100644 prototype/corfurl/src/corfurl_sequencer.erl create mode 100644 prototype/corfurl/src/corfurl_util.erl diff --git a/prototype/corfurl/.gitignore b/prototype/corfurl/.gitignore new file mode 100644 index 0000000..d712c69 --- /dev/null +++ b/prototype/corfurl/.gitignore @@ -0,0 +1,4 @@ +.eunit +deps +ebin/*.beam +ebin/*.app diff --git a/prototype/corfurl/Makefile b/prototype/corfurl/Makefile new file mode 100644 index 0000000..ef51767 --- /dev/null +++ b/prototype/corfurl/Makefile @@ -0,0 +1,23 @@ +REBAR_BIN := $(shell which rebar) +ifeq ($(REBAR_BIN),) +REBAR_BIN = ./rebar +endif + +.PHONY: rel deps package pkgclean + +all: deps compile + +compile: + $(REBAR_BIN) compile + +deps: + $(REBAR_BIN) get-deps + +clean: + $(REBAR_BIN) clean + +test: deps compile eunit + +eunit: + $(REBAR_BIN) -v skip_deps=true eunit + diff --git a/prototype/corfurl/include/corfurl.hrl b/prototype/corfurl/include/corfurl.hrl new file mode 100644 index 0000000..fa843e3 --- /dev/null +++ b/prototype/corfurl/include/corfurl.hrl @@ -0,0 +1,26 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% 1 byte @ offset 0: 0=unwritten, 1=written, 2=trimmed, 255=corrupt? TODO +%% 8 bytes @ offset 1: logical page number +%% P bytes @ offset 9: page data +%% 1 byte @ offset 9+P: 0=unwritten, 1=written +-define(PAGE_OVERHEAD, (1 + 8 + 1)). + diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl new file mode 100644 index 0000000..87d2bb0 --- /dev/null +++ b/prototype/corfurl/src/corfurl.erl @@ -0,0 +1,311 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl). + +-export([new_simple_projection/4, + new_range/3, + read_projection/2, + save_projection/2]). +-export([append_page/3]). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-endif. + +-type flu_name() :: atom(). +-type flu() :: pid() | flu_name(). +-type flu_chain() :: [flu()]. + +-record(range, { + pn_start :: non_neg_integer(), % start page number + pn_end :: non_neg_integer(), % end page number + chains :: [flu_chain()] + }). + +-record(proj, { % Projection + epoch :: non_neg_integer(), + r :: [#range{}] + }). + +%% append_page(Sequencer, P, Page) -> +%% append_page(Sequencer, P, 1, [Page]). + +%% append_page(Sequencer, P, NumPages, PageList) -> +%% FirstPN = corfurl_sequencer:get(Sequencer, NumPages), +%% [append_single_page(P, LPN, Page) || +%% {LPN, Page} <- lists:zip(lists:seq(FirstPN, FirstPN+NumPages-1), +%% PageList)]. + +append_page(Sequencer, P, Page) -> + append_page(Sequencer, P, Page, 1). + +append_page(Sequencer, P, Page, Retries) when Retries < 50 -> + case corfurl_sequencer:get(Sequencer, 1) of + LPN when is_integer(LPN) -> + case append_single_page(P, LPN, Page) of + ok -> + ok; + X when X == error_written; X == error_trimmed -> + io:format(user, "LPN ~p race lost: ~p\n", [LPN, X]), + append_page(Sequencer, P, Page); + Else -> + exit({todo, ?MODULE, line, ?LINE, Else}) + end; + _ -> + timer:sleep(Retries), % TODO naive + append_page(Sequencer, P, Page, Retries * 2) + end. + +append_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> + Chain = project_to_chain(LPN, P), + append_single_page_to_chain(Chain, Epoch, LPN, Page, 1). + +append_single_page_to_chain([], _Epoch, _LPN, _Page, _Nth) -> + ok; +append_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> + case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of + ok -> + append_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + error_badepoch -> + %% TODO: Interesting case: there may be cases where retrying with + %% a new epoch & that epoch's projection is just fine (and + %% we'll succeed) and cases where retrying will fail. + %% Figure out what those cases are, then for the + %% destined-to-fail case, try to clean up (via trim?)? + error_badepoch; + error_trimmed -> + %% Whoa, partner, you're movin' kinda fast for a trim. + %% This might've been due to us being too slow and someone + %% else junked us. + %% TODO We should go trim our previously successful writes? + error_trimmed; + error_written when Nth == 1 -> + %% The sequencer lied, or we didn't use the sequencer and + %% guessed and guessed poorly, or someone is accidentally + %% trying to take our page. Shouganai, these things happen. + error_written; + error_written when Nth > 1 -> + %% The likely cause is that another reader has noticed that + %% we haven't finished writing this page in this chain and + %% has repaired the remainder of the chain while we were + %% drinking coffee. Let's double-check. + case corfurl_flu:read(flu_pid(FLU), Epoch, LPN) of + {ok, AlreadyThere} when AlreadyThere =:= Page -> + %% Alright, well, let's go continue the repair/writing, + %% since we agree on the page's value. + append_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + error_badepoch -> + %% TODO: same TODO as the above error_badepoch case. + error_badepoch; + error_overwritten -> + error({impossible, ?MODULE, ?LINE, left_off_here}) + end; + Else -> + %% TODO: corner case +io:format(user, "WTF? Else = ~p\n", [Else]), + Else + end. + +flu_pid(X) when is_pid(X) -> + X; +flu_pid(X) when is_atom(X) -> + ets:lookup_element(flu_pid_tab, X, 1). + +%%%% %%%% %%%% projection utilities %%%% %%%% %%%% + +new_range(Start, End, ChainList) -> + %% TODO: sanity checking of ChainList, Start < End, yadda + #range{pn_start=Start, pn_end=End, chains=list_to_tuple(ChainList)}. + +new_simple_projection(Epoch, Start, End, ChainList) -> + #proj{epoch=Epoch, r=[new_range(Start, End, ChainList)]}. + +make_projection_path(Dir, Epoch) -> + lists:flatten(io_lib:format("~s/~12..0w.proj", [Dir, Epoch])). + +read_projection(Dir, Epoch) -> + case file:read_file(make_projection_path(Dir, Epoch)) of + {ok, Bin} -> + {ok, binary_to_term(Bin)}; % TODO if corrupted? + {error, enoent} -> + error_unwritten; + Else -> + Else % TODO API corner case + end. + +save_projection(Dir, #proj{epoch=Epoch} = P) -> + Path = make_projection_path(Dir, Epoch), + ok = filelib:ensure_dir(Dir ++ "/ignored"), + {_, B, C} = now(), + TmpPath = Path ++ lists:flatten(io_lib:format(".~w.~w.~w", [B, C, node()])), + %% TODO: don't be lazy, do a flush before link when training wheels come off + ok = file:write_file(TmpPath, term_to_binary(P)), + case file:make_link(TmpPath, Path) of + ok -> + file:delete(TmpPath), + ok; + {error, eexist} -> + error_overwritten; + Else -> + Else % TODO API corner case + end. + +project_to_chain(LPN, P) -> + %% TODO fixme + %% TODO something other than round-robin? + [#range{pn_start=Start, pn_end=End, chains=Chains}] = P#proj.r, + if Start =< LPN, LPN =< End -> + I = ((LPN - Start) rem tuple_size(Chains)) + 1, + element(I, Chains) + end. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +-ifdef(TEST). + +save_read_test() -> + Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", + Chain = [a,b], + P1 = new_simple_projection(1, 1, 1*100, [Chain]), + + try + filelib:ensure_dir(Dir ++ "/ignored"), + ok = save_projection(Dir, P1), + error_overwritten = save_projection(Dir, P1), + + {ok, P1} = read_projection(Dir, 1), + error_unwritten = read_projection(Dir, 2), + + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +setup_flu_basedir() -> + "/tmp/" ++ atom_to_list(?MODULE) ++ ".". + +setup_flu_dir(N) -> + setup_flu_basedir() ++ integer_to_list(N). + +setup_del_all(NumFLUs) -> + [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || + N <- lists:seq(1, NumFLUs)]. + +setup_basic_flus(NumFLUs, PageSize, NumPages) -> + setup_del_all(NumFLUs), + [begin + element(2, corfurl_flu:start_link(setup_flu_dir(X), + PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) + end || X <- lists:seq(1, NumFLUs)]. + +append_test() -> + NumFLUs = 4, + PageSize = 8, + NumPages = 10, + FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + try + P1 = new_simple_projection(1, 1, 1*100, [[F1, F2], [F3, F4]]), + [begin + Pg = lists:flatten(io_lib:format("~8..0w", [X])), + ok = append_page(Seq, P1, list_to_binary(Pg)) + end || X <- lists:seq(1, 5)], + + ok + after + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +forfun_append(0, _Seq, _P, _Page) -> + ok; +forfun_append(N, Seq, P, Page) -> + ok = append_page(Seq, P, Page), + forfun_append(N - 1, Seq, P, Page). + +-ifdef(TIMING_TEST). + +forfun_test_() -> + {timeout, 99999, fun() -> + [forfun(Procs) || Procs <- [10,100,1000,5000]] + end}. + +%%% My MBP, SSD +%%% The 1K and 5K procs shows full-mailbox-scan ickiness +%%% when getting replies from prim_file. :-( + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.016815 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.547976 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 13.706686 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 33.516312 sec + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.350147 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.429485 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.643233 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 15.686058 sec + +%%%% forfun: 10 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 13.479458 sec +%%%% forfun: 100 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 14.752565 sec +%%%% forfun: 1000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 25.012306 sec +%%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec + +forfun(NumProcs) -> + io:format(user, "\n", []), + NumFLUs = 4, + PageSize = 8, + %%PageSize = 4096, + NumPages = 200*1000, + PagesPerProc = NumPages div NumProcs, + FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + try + Chains = [[F1, F2], [F3, F4]], + %%Chains = [[F1], [F2], [F3], [F4]], + P = new_simple_projection(1, 1, NumPages*2, Chains), + Me = self(), + Start = now(), + Ws = [begin + Page = <>, + spawn_link(fun() -> + forfun_append(PagesPerProc, Seq, P, Page), + Me ! {done, self()} + end) + end || X <- lists:seq(1, NumProcs)], + [receive {done, W} -> ok end || W <- Ws], + End = now(), + io:format(user, "forfun: ~p procs writing ~p pages of ~p bytes/page to ~p chains of ~p total FLUs in ~p sec\n", + [NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]), + ok + after + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +-endif. % TIMING_TEST + +-endif. % TEST diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl new file mode 100644 index 0000000..6ee0d0b --- /dev/null +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -0,0 +1,471 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_flu). + +-behaviour(gen_server). + +-type flu_error() :: 'error_badepoch' | 'error_trimmed' | + 'error_overwritten' | 'error_unwritten'. +-export_type([flu_error/0]). + +%% API +-export([start_link/1, start_link/3, status/1, stop/1]). +-export([write/4, read/3, seal/2, trim/3, fill/3]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-export([get__mlp/1, get__min_epoch/1, get__trim_watermark/1]). +-endif. + +-include_lib("kernel/include/file.hrl"). + +-record(state, { + dir :: string(), + mem_fh :: term(), + min_epoch :: non_neg_integer(), + page_size :: non_neg_integer(), + max_mem :: non_neg_integer(), + max_logical_page :: 'unknown' | non_neg_integer(), + %% TODO: Trim watermark handling is *INCOMPLETE*. The + %% current code is broken but is occasionally correct, + %% like a broken analog watch is correct 2x per day. + trim_watermark :: non_neg_integer(), + trim_count :: non_neg_integer() + }). + +start_link(Dir) -> + start_link(Dir, 8, 64*1024*1024). + +start_link(Dir, PageSize, MaxMem) -> + gen_server:start_link(?MODULE, {Dir, PageSize, MaxMem}, []). + +status(Pid) -> + gen_server:call(Pid, status, infinity). + +stop(Pid) -> + gen_server:call(Pid, stop, infinity). + +write(Pid, Epoch, LogicalPN, PageBin) + when is_integer(LogicalPN), LogicalPN > 0, is_binary(PageBin) -> + gen_server:call(Pid, {write, Epoch, LogicalPN, PageBin}, infinity). + +read(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + gen_server:call(Pid, {read, Epoch, LogicalPN}, infinity). + +seal(Pid, Epoch) when is_integer(Epoch), Epoch > 0 -> + gen_server:call(Pid, {seal, Epoch}, infinity). + +trim(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + gen_server:call(Pid, {trim, Epoch, LogicalPN}, infinity). + +fill(Pid, Epoch, LogicalPN) + when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> + gen_server:call(Pid, {fill, Epoch, LogicalPN}, infinity). + +-ifdef(TEST). + +get__mlp(Pid) -> + gen_server:call(Pid, get__mlp, infinity). + +get__min_epoch(Pid) -> + gen_server:call(Pid, get__min_epoch, infinity). + +get__trim_watermark(Pid) -> + gen_server:call(Pid, get__trim_watermark, infinity). + +-endif. % TEST + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +init({Dir, ExpPageSize, ExpMaxMem}) -> + MemFile = memfile_path(Dir), + filelib:ensure_dir(MemFile), + {ok, FH} = file:open(MemFile, [read, write, raw, binary]), + + {_Version, MinEpoch, PageSize, MaxMem, TrimWatermark} = + try + Res = read_hard_state(Dir), + case Res of + {_V, _LE, PS, MM, TW} + when PS =:= ExpPageSize, MM =:= ExpMaxMem -> + Res + end + catch + X:Y -> + io:format("init: caught ~p ~p @ ~p\n", + [X, Y, erlang:get_stacktrace()]), + {no_version_number, 0, ExpPageSize, ExpMaxMem, 0} + end, + State = #state{dir=Dir, mem_fh=FH, min_epoch=MinEpoch, page_size=PageSize, + max_mem=MaxMem, max_logical_page=unknown, + trim_watermark=TrimWatermark, trim_count=0}, + self() ! finish_init, % TODO + {ok, State}. + +handle_call({write, ClientEpoch, _LogicalPN, _PageBin}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + {reply, error_badepoch, State}; +handle_call({write, _ClientEpoch, LogicalPN, PageBin}, _From, + #state{max_logical_page=MLPN} = State) -> + case check_write(LogicalPN, PageBin, State) of + {ok, Offset} -> + ok = write_page(Offset, LogicalPN, PageBin, State), + NewMLPN = erlang:max(LogicalPN, MLPN), + {reply, ok, State#state{max_logical_page=NewMLPN}}; + Else -> + {reply, Else, State} + end; + +handle_call({read, ClientEpoch, _LogicalPN}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + {reply, error_badepoch, State}; +handle_call({read, _ClientEpoch, LogicalPN}, _From, State) -> + {reply, read_page(LogicalPN, State), State}; + +handle_call({seal, ClientEpoch}, _From, #state{min_epoch=MinEpoch} = State) + when ClientEpoch =< MinEpoch -> + {reply, error_badepoch, State}; +handle_call({seal, ClientEpoch}, _From, #state{max_logical_page=MLPN}=State) -> + NewState = State#state{min_epoch=ClientEpoch}, + ok = write_hard_state(NewState), + {reply, {ok, MLPN}, NewState}; + +handle_call({trim, ClientEpoch, _LogicalPN}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + {reply, error_badepoch, State}; +handle_call({trim, _ClientEpoch, LogicalPN}, _From, State) -> + do_trim_or_fill(trim, LogicalPN, State); + +handle_call({fill, ClientEpoch, _LogicalPN}, _From, + #state{min_epoch=MinEpoch} = State) + when ClientEpoch < MinEpoch -> + {reply, error_badepoch, State}; +handle_call({fill, _ClientEpoch, LogicalPN}, _From, State) -> + do_trim_or_fill(fill, LogicalPN, State); + +handle_call(get__mlp, _From, State) -> + {reply, State#state.max_logical_page, State}; +handle_call(get__min_epoch, _From, State) -> + {reply, State#state.min_epoch, State}; +handle_call(get__trim_watermark, _From, State) -> + {reply, State#state.trim_watermark, State}; +handle_call(status, _From, State) -> + L = [{min_epoch, State#state.min_epoch}, + {page_size, State#state.page_size}, + {max_mem, State#state.max_mem}, + {max_logical_page, State#state.max_logical_page}, + {trim_watermark, State#state.trim_watermark}], + {reply, {ok, L}, State}; +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(Request, _From, State) -> + Reply = {whaaaaaaaaaaaaaaaaaa, Request}, + {reply, Reply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(finish_init, State) -> + MLP = find_max_logical_page(State), + State2 = State#state{max_logical_page=MLP}, + ok = write_hard_state(State2), + {noreply, State2}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + ok = write_hard_state(State), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +read_hard_state(Dir) -> + File = hard_state_path(Dir), + case file:read_file(File) of + {ok, Bin} -> + case binary_to_term(Bin) of + T when element(1, T) == v1 -> + T + end; + Else -> + Else + end. + +write_hard_state(#state{min_epoch=MinEpoch, page_size=PageSize, max_mem=MaxMem, + trim_watermark=TrimWatermark} = S) -> + NewPath = hard_state_path(S#state.dir), + TmpPath = NewPath ++ ".tmp", + {ok, FH} = file:open(TmpPath, [write, binary, raw]), + HS = {v1, MinEpoch, PageSize, MaxMem, TrimWatermark}, + ok = file:write(FH, term_to_binary(HS)), + %% ok = file:sync(FH), % TODO uncomment when the training wheels come off + ok = file:close(FH), + ok = file:rename(TmpPath, NewPath). + +memfile_path(Dir) -> + Dir ++ "/memfile". + +hard_state_path(Dir) -> + Dir ++ "/hard-state". + +calc_page_offset(PhysicalPN, #state{page_size=PageSize}) -> + TotalSize = ?PAGE_OVERHEAD + PageSize, + PhysicalPN * TotalSize. + +%% find_max_logical_page(): This is a kludge, based on our naive +%% implementation of not keeping the maximum logical page in hard +%% state. + +find_max_logical_page(S) -> + {ok, FI} = file:read_file_info(memfile_path(S#state.dir)), + find_max_logical_page(0, 0, FI#file_info.size, S). + +find_max_logical_page(MLP, PhysicalPN, FSize, + #state{mem_fh=FH, max_mem=MaxMem}=S) -> + Offset = calc_page_offset(PhysicalPN, S), + if Offset < MaxMem, Offset < FSize -> + case file:pread(FH, Offset, 9) of + {ok, <<1:8/big, LP:64/big>>} -> + find_max_logical_page(erlang:max(MLP, LP), PhysicalPN + 1, + FSize, S); + _ -> + find_max_logical_page(MLP, PhysicalPN + 1, FSize, S) + end; + true -> + MLP + end. + +check_write(LogicalPN, PageBin, + #state{max_mem=MaxMem, page_size=PageSize} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem, byte_size(PageBin) =:= PageSize -> + case check_is_written(Offset, LogicalPN, S) of + false -> + {ok, Offset}; + true -> + error_overwritten + end; + true -> + {bummer, ?MODULE, ?LINE, lpn, LogicalPN, offset, Offset, max_mem, MaxMem, page_size, PageSize} + end. + +check_is_written(Offset, _PhysicalPN, #state{mem_fh=FH}) -> + case file:pread(FH, Offset, 1) of + {ok, <<1:8>>} -> + true; + {ok, <<0:8>>} -> + false; + eof -> + %% We assume that Offset has been bounds-checked + false + end. + +write_page(Offset, LogicalPN, PageBin, #state{mem_fh=FH}) -> + IOList = [<<1:8>>, <>, PageBin, <<1:8>>], + ok = file:pwrite(FH, Offset, IOList). + +read_page(LogicalPN, #state{max_mem=MaxMem, mem_fh=FH, + page_size=PageSize} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem -> + case file:pread(FH, Offset, PageSize + ?PAGE_OVERHEAD) of + {ok, <<1:8, LogicalPN:64/big, Page:PageSize/binary, 1:8>>} -> + {ok, Page}; + {ok, <<1:8, _LogicalPN:64/big, _:PageSize/binary, 0:8>>} -> + io:format("BUMMER: ~s line ~w: incomplete write at ~p\n", + [?MODULE, ?LINE, LogicalPN]), + error_unwritten; + {ok, _} -> + error_unwritten; + eof -> + error_unwritten; + Else -> + io:format("BUMMER: ~s line ~w: ~p\n", + [?MODULE, ?LINE, Else]), + badarg % TODO: better idea + end; + true -> + badarg + end. + +do_trim_or_fill(Op, LogicalPN, + #state{trim_watermark=TrimWatermark, trim_count=TrimCount} = S) -> + case trim_page(Op, LogicalPN, S) of + ok -> + NewS = S#state{trim_watermark=erlang:max( + TrimWatermark, LogicalPN), + trim_count=TrimCount + 1}, + if TrimCount rem 1000 == 0 -> + ok = write_hard_state(NewS); + true -> + ok + end, + {reply, ok, NewS}; + Else -> + {reply, Else, S} + end. + +trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> + Offset = calc_page_offset(LogicalPN, S), + if Offset < MaxMem -> + Status = case file:pread(FH, Offset, 1) of + {ok, <<0:8>>} -> + error_unwritten; + {ok, <<1:8>>} -> + error_overwritten; + {ok, <<2:8>>} -> + error_trimmed; + eof -> + error_unwritten; + Else -> + io:format("BUMMER: ~s line ~w: ~p\n", + [?MODULE, ?LINE, Else]), + error_trimmed % TODO + end, + if Status == error_overwritten andalso Op == trim -> + ok = file:pwrite(FH, Offset, <<2:8>>), + ok; + Status == error_unwritten andalso Op == fill -> + ok = file:pwrite(FH, Offset, <<2:8>>), + ok; + true -> + Status + end; + true -> + badarg + end. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +-ifdef(TEST). + +startstop_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = start_link(Dir), + try + {ok, _} = status(P1), + ok = stop(P1), + {'EXIT', _} = (catch stop(P1)), + + {ok, P2} = start_link(Dir), + 0 = get__mlp(P2), + 0 = get__min_epoch(P2), + ok = stop(P2), + + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +basic_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = start_link(Dir), + try + Epoch1 = 1, + Epoch2 = 2, + LPN = 1, + Bin1 = <<42:64>>, + Bin2 = <<42042:64>>, + + error_unwritten = read(P1, Epoch1, LPN), + error_unwritten = trim(P1, Epoch1, LPN), + error_unwritten = trim(P1, Epoch1, LPN+77), + + ok = write(P1, Epoch1, LPN, Bin1), + error_overwritten = write(P1, Epoch1, LPN, Bin1), + error_overwritten = fill(P1, Epoch1, LPN), + LPN = get__mlp(P1), + 0 = get__min_epoch(P1), + 0 = get__trim_watermark(P1), + {ok, LPN} = seal(P1, Epoch1), + 1 = get__min_epoch(P1), + + error_overwritten = write(P1, Epoch2, LPN, Bin1), + ok = write(P1, Epoch2, LPN+1, Bin2), + Epoch1 = get__min_epoch(P1), + + {ok, Bin1} = read(P1, Epoch1, LPN), + {ok, Bin2} = read(P1, Epoch2, LPN+1), + error_unwritten = read(P1, Epoch2, LPN+2), + badarg = read(P1, Epoch2, 1 bsl 2982), + + error_badepoch = seal(P1, Epoch1), + {ok, _} = seal(P1, Epoch2), + error_badepoch = seal(P1, Epoch2), + + error_badepoch = read(P1, Epoch1, LPN), + error_badepoch = read(P1, Epoch1, LPN+1), + {ok, Bin1} = read(P1, Epoch2, LPN), + {ok, Bin2} = read(P1, Epoch2, LPN+1), + + error_badepoch = trim(P1, Epoch1, LPN+1), + ok = trim(P1, Epoch2, LPN+1), + error_trimmed = trim(P1, Epoch2, LPN+1), + %% Current watermark processing is broken. But we'll test what's + %% there now. + ExpectedWaterFixMe = LPN+1, + ExpectedWaterFixMe = get__trim_watermark(P1), + + ok = fill(P1, Epoch2, LPN+3), + error_trimmed = fill(P1, Epoch2, LPN+3), + error_trimmed = trim(P1, Epoch2, LPN+3), + + Epoch2 = get__min_epoch(P1), + ok = stop(P1), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +seal_persistence_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = start_link(Dir), + try + 0 = get__min_epoch(P1), + Epoch = 665, + {ok, LPN} = seal(P1, Epoch), + Epoch = get__min_epoch(P1), + ok = stop(P1), + + {ok, P2} = start_link(Dir), + Epoch = get__min_epoch(P2), + + ok = stop(P2), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +-endif. % TEST diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl new file mode 100644 index 0000000..92426d0 --- /dev/null +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -0,0 +1,123 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_sequencer). + +-behaviour(gen_server). + +-export([start_link/1, stop/1, get/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-define(SERVER, ?MODULE). + +start_link(FLUs) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, {FLUs}, []). + +stop(Pid) -> + gen_server:call(Pid, stop, infinity). + +get(Pid, NumPages) -> + gen_server:call(Pid, {get, NumPages}, infinity). + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +init({FLUs}) -> + MLP = get_max_logical_page(FLUs), + {ok, MLP + 1}. + +handle_call({get, NumPages}, _From, MLP) -> + {reply, MLP, MLP + NumPages}; +handle_call(stop, _From, MLP) -> + {stop, normal, ok, MLP}; +handle_call(_Request, _From, MLP) -> + Reply = whaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, + {reply, Reply, MLP}. + +handle_cast(_Msg, MLP) -> + {noreply, MLP}. + +handle_info(_Info, MLP) -> + {noreply, MLP}. + +terminate(_Reason, _MLP) -> + ok. + +code_change(_OldVsn, MLP, _Extra) -> + {ok, MLP}. + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +get_max_logical_page(FLUs) -> + lists:max([proplists:get_value(max_logical_page, Ps, 0) || + FLU <- FLUs, + {ok, Ps} <- [corfurl_flu:status(FLU)]]). + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +-ifdef(TEST). + +smoke_test() -> + BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".", + PageSize = 8, + NumPages = 500, + NumFLUs = 4, + MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end, + Del = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) || + X <- lists:seq(1, NumFLUs)] end, + + Del(), + FLUs = [begin + element(2, corfurl_flu:start_link(MyDir(X), + PageSize, NumPages*PageSize)) + end || X <- lists:seq(1, NumFLUs)], + FLUsNums = lists:zip(FLUs, lists:seq(1, NumFLUs)), + + try + [ok = corfurl_flu:write(FLU, 1, PageNum, <<42:(8*8)>>) || + {FLU, PageNum} <- FLUsNums], + MLP0 = NumFLUs, + NumFLUs = get_max_logical_page(FLUs), + + %% Excellent. Now let's start the sequencer and see if it gets + %% the same answer. If yes, then the first get will return MLP1, + %% yadda yadda. + MLP1 = MLP0 + 1, + MLP3 = MLP0 + 3, + MLP4 = MLP0 + 4, + {ok, Sequencer} = start_link(FLUs), + try + MLP1 = get(Sequencer, 2), + MLP3 = get(Sequencer, 1), + MLP4 = get(Sequencer, 1) + after + stop(Sequencer) + end + after + [ok = corfurl_flu:stop(FLU) || FLU <- FLUs], + Del() + end. + +-endif. % TEST diff --git a/prototype/corfurl/src/corfurl_util.erl b/prototype/corfurl/src/corfurl_util.erl new file mode 100644 index 0000000..c88da23 --- /dev/null +++ b/prototype/corfurl/src/corfurl_util.erl @@ -0,0 +1,36 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_util). + +-export([delete_dir/1]). + +delete_dir(Dir) -> + %% We don't recursively delete directories, the ok pattern match will fail. + [ok = file:delete(X) || X <- filelib:wildcard(Dir ++ "/*")], + case file:del_dir(Dir) of + ok -> + ok; + {error, enoent} -> + ok; + Else -> + Else + end. + From 05a71eebb0c93fba894543d32efacf5dc2091051 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 16:07:41 +0900 Subject: [PATCH 04/70] corfurl:read_page() done, no read-repair yet --- prototype/corfurl/src/corfurl.erl | 69 ++++++++++++++++++------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 87d2bb0..e03c470 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -24,7 +24,7 @@ new_range/3, read_projection/2, save_projection/2]). --export([append_page/3]). +-export([append_page/3, read_page/2]). -include("corfurl.hrl"). @@ -48,25 +48,16 @@ r :: [#range{}] }). -%% append_page(Sequencer, P, Page) -> -%% append_page(Sequencer, P, 1, [Page]). - -%% append_page(Sequencer, P, NumPages, PageList) -> -%% FirstPN = corfurl_sequencer:get(Sequencer, NumPages), -%% [append_single_page(P, LPN, Page) || -%% {LPN, Page} <- lists:zip(lists:seq(FirstPN, FirstPN+NumPages-1), -%% PageList)]. - append_page(Sequencer, P, Page) -> append_page(Sequencer, P, Page, 1). append_page(Sequencer, P, Page, Retries) when Retries < 50 -> case corfurl_sequencer:get(Sequencer, 1) of LPN when is_integer(LPN) -> - case append_single_page(P, LPN, Page) of + case write_single_page(P, LPN, Page) of ok -> - ok; - X when X == error_written; X == error_trimmed -> + {ok, LPN}; + X when X == error_overwritten; X == error_trimmed -> io:format(user, "LPN ~p race lost: ~p\n", [LPN, X]), append_page(Sequencer, P, Page); Else -> @@ -77,16 +68,16 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> append_page(Sequencer, P, Page, Retries * 2) end. -append_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> +write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> Chain = project_to_chain(LPN, P), - append_single_page_to_chain(Chain, Epoch, LPN, Page, 1). + write_single_page_to_chain(Chain, Epoch, LPN, Page, 1). -append_single_page_to_chain([], _Epoch, _LPN, _Page, _Nth) -> +write_single_page_to_chain([], _Epoch, _LPN, _Page, _Nth) -> ok; -append_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> +write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of ok -> - append_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); error_badepoch -> %% TODO: Interesting case: there may be cases where retrying with %% a new epoch & that epoch's projection is just fine (and @@ -100,12 +91,12 @@ append_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> %% else junked us. %% TODO We should go trim our previously successful writes? error_trimmed; - error_written when Nth == 1 -> + error_overwritten when Nth == 1 -> %% The sequencer lied, or we didn't use the sequencer and %% guessed and guessed poorly, or someone is accidentally %% trying to take our page. Shouganai, these things happen. - error_written; - error_written when Nth > 1 -> + error_overwritten; + error_overwritten when Nth > 1 -> %% The likely cause is that another reader has noticed that %% we haven't finished writing this page in this chain and %% has repaired the remainder of the chain while we were @@ -114,7 +105,7 @@ append_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> {ok, AlreadyThere} when AlreadyThere =:= Page -> %% Alright, well, let's go continue the repair/writing, %% since we agree on the page's value. - append_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); error_badepoch -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; @@ -123,10 +114,29 @@ append_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> end; Else -> %% TODO: corner case -io:format(user, "WTF? Else = ~p\n", [Else]), + io:format(user, "WTF? Else = ~p\n", [Else]), Else end. +read_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + Tail = lists:last(Chain), + case corfurl_flu:read(flu_pid(Tail), Epoch, LPN) of + {ok, _} = OK -> + OK; + error_badepoch -> + error_badepoch; + error_trimmed -> + %% TODO: A sanity/should-never-happen check would be to + %% see if everyone else in the chain are also trimmed. + error_trimmed; + error_overwritten -> + error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); + error_unwritten -> + %% TODO: Check head for possible read-repair + read_page_tail_unwritten_todo + end. + flu_pid(X) when is_pid(X) -> X; flu_pid(X) when is_atom(X) -> @@ -219,20 +229,21 @@ setup_basic_flus(NumFLUs, PageSize, NumPages) -> PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) end || X <- lists:seq(1, NumFLUs)]. -append_test() -> +smoke1_test() -> NumFLUs = 4, PageSize = 8, NumPages = 10, FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), {ok, Seq} = corfurl_sequencer:start_link(FLUs), + %% We know that the first LSN will be 1. + LPN_Pgs = [{X, list_to_binary( + lists:flatten(io_lib:format("~8..0w", [X])))} || + X <- lists:seq(1, 5)], try P1 = new_simple_projection(1, 1, 1*100, [[F1, F2], [F3, F4]]), - [begin - Pg = lists:flatten(io_lib:format("~8..0w", [X])), - ok = append_page(Seq, P1, list_to_binary(Pg)) - end || X <- lists:seq(1, 5)], - + [begin {ok, LPN} = append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + [begin {ok, Pg} = read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], ok after corfurl_sequencer:stop(Seq), From 945635f8377ec9fda1cc0a672edf551725956bd1 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 16:48:16 +0900 Subject: [PATCH 05/70] Basic scan_forward done --- prototype/corfurl/src/corfurl.erl | 41 +++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index e03c470..f85c985 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -24,7 +24,7 @@ new_range/3, read_projection/2, save_projection/2]). --export([append_page/3, read_page/2]). +-export([append_page/3, read_page/2, scan_forward/3]). -include("corfurl.hrl"). @@ -134,7 +134,32 @@ read_page(#proj{epoch=Epoch} = P, LPN) -> error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); error_unwritten -> %% TODO: Check head for possible read-repair - read_page_tail_unwritten_todo + error_unwritten + end. + +scan_forward(P, LPN, MaxPages) -> + scan_forward(P, LPN, MaxPages, ok, true, []). + +scan_forward(_P, LPN, 0, Status, MoreP, Acc) -> + {Status, LPN, MoreP, lists:reverse(Acc)}; +scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) -> + case read_page(P, LPN) of + {ok, Page} -> + Res = {LPN, Page}, + scan_forward(P, LPN + 1, MaxPages - 1, ok, true, [Res|Acc]); + error_badepoch -> + %% Halt, allow recursion to create our return value. + scan_forward(P, LPN, 0, error_badepoch, false, Acc); + error_trimmed -> + %% TODO: API question, do we add a 'trimmed' indicator + %% in the Acc? Or should the client assume that if + %% scan_forward() doesn't mention a page that + scan_forward(P, LPN + 1, MaxPages - 1, ok, true, Acc); + error_overwritten -> + error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); + error_unwritten -> + %% Halt, allow recursion to create our return value. + scan_forward(P, LPN, 0, ok, false, Acc) end. flu_pid(X) when is_pid(X) -> @@ -243,7 +268,19 @@ smoke1_test() -> try P1 = new_simple_projection(1, 1, 1*100, [[F1, F2], [F3, F4]]), [begin {ok, LPN} = append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + [begin {ok, Pg} = read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], + + [begin + LPNplus = LPN + 1, + {ok, LPNplus, true, [{LPN, Pg}]} = scan_forward(P1, LPN, 1) + end || {LPN, Pg} <- LPN_Pgs], + {ok, 6, false, []} = scan_forward(P1, 6, 1), + {ok, 6, false, []} = scan_forward(P1, 6, 10), + [{LPN1,Pg1}, {LPN2,Pg2}, {LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}] = LPN_Pgs, + {ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = scan_forward(P1, 2, 2), + {ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} = scan_forward(P1, 3, 10), + ok after corfurl_sequencer:stop(Seq), From c23aeabc20ebc2ccc29481ee847b89f1e004147f Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 17:31:41 +0900 Subject: [PATCH 06/70] Read-repair, not tested --- prototype/corfurl/src/corfurl.erl | 54 ++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index f85c985..ee7ed71 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -133,10 +133,62 @@ read_page(#proj{epoch=Epoch} = P, LPN) -> error_overwritten -> error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); error_unwritten -> - %% TODO: Check head for possible read-repair + %% TODO: During scan_forward(), this pestering of the upstream + %% nodes in the chain is possibly-excessive-work. + %% For now, we'll assume that we always want to repair. + case read_repair(Epoch, LPN, Tail, hd(Chain)) of + {ok, _} = OK2 -> + OK2; + Else -> + Else + end + end. + +read_repair(_Epoch, _LPN, RepairFLU, RepairFLU) -> + error_unwritten; +read_repair(Epoch, LPN, RepairFLU, Head) -> + case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of + {ok, Page} -> + case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of + ok -> + ok; + error_badepoch -> + error_badepoch; + error_trimmed -> + error_trimmed; + error_overwritten -> + case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of + {ok, Page2} when Page2 =:= Page -> + {ok, Page}; + {ok, Oops} -> + error({impossible, ?MODULE, ?LINE, LPN, head_said, Page, repairee_now_says, Oops}); + error_trimmed -> + %% Wow, we have lost at least 3 races in a row. + read_repair_trim(RepairFLU, LPN); + Else -> + Else + end; + error_unwritten -> + error({impossible, ?MODULE, ?LINE, written_then_unwritten}) + end; + error_badepoch -> + error_badepoch; + error_trimmed -> + read_repair_trim(RepairFLU, LPN); + error_overwritten -> + error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); + error_unwritten -> error_unwritten end. +read_repair_trim(RepairFLU, LPN) -> + case corfurl_flu:trim(flu_pid(RepairFLU), LPN) of + ok -> + error_trimmed; + Else -> + Else + end. + scan_forward(P, LPN, MaxPages) -> scan_forward(P, LPN, MaxPages, ok, true, []). From 6014b0584e7ce2eb7241a844371d2e1b9fba3ee8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 19:02:38 +0900 Subject: [PATCH 07/70] Fix read() response to a prior fill --- prototype/corfurl/src/corfurl_flu.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 6ee0d0b..01e3d06 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -307,6 +307,8 @@ read_page(LogicalPN, #state{max_mem=MaxMem, mem_fh=FH, io:format("BUMMER: ~s line ~w: incomplete write at ~p\n", [?MODULE, ?LINE, LogicalPN]), error_unwritten; + {ok, <<2:8>>} -> + error_trimmed; {ok, _} -> error_unwritten; eof -> @@ -439,6 +441,7 @@ basic_test() -> ExpectedWaterFixMe = get__trim_watermark(P1), ok = fill(P1, Epoch2, LPN+3), + error_trimmed = read(P1, Epoch2, LPN+3), error_trimmed = fill(P1, Epoch2, LPN+3), error_trimmed = trim(P1, Epoch2, LPN+3), From 3d2be7255f0590cfd3bd05ad5116fb1562429b9a Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 19:03:15 +0900 Subject: [PATCH 08/70] Basic smoke test for read repair --- prototype/corfurl/src/corfurl.erl | 139 +++++++++++++++++++----------- 1 file changed, 90 insertions(+), 49 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index ee7ed71..dc88b9a 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -109,13 +109,9 @@ write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> error_badepoch -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; - error_overwritten -> - error({impossible, ?MODULE, ?LINE, left_off_here}) - end; - Else -> - %% TODO: corner case - io:format(user, "WTF? Else = ~p\n", [Else]), - Else + Else -> + error({left_off_here, ?MODULE, ?LINE, Else}) + end end. read_page(#proj{epoch=Epoch} = P, LPN) -> @@ -130,55 +126,62 @@ read_page(#proj{epoch=Epoch} = P, LPN) -> %% TODO: A sanity/should-never-happen check would be to %% see if everyone else in the chain are also trimmed. error_trimmed; - error_overwritten -> - error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); error_unwritten -> %% TODO: During scan_forward(), this pestering of the upstream %% nodes in the chain is possibly-excessive-work. %% For now, we'll assume that we always want to repair. - case read_repair(Epoch, LPN, Tail, hd(Chain)) of - {ok, _} = OK2 -> - OK2; - Else -> - Else - end + read_repair_chain(Epoch, LPN, Chain) + %% Let it crash: error_overwritten end. -read_repair(_Epoch, _LPN, RepairFLU, RepairFLU) -> - error_unwritten; -read_repair(Epoch, LPN, RepairFLU, Head) -> +read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of {ok, Page} -> - case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of - ok -> - ok; - error_badepoch -> - error_badepoch; - error_trimmed -> - error_trimmed; - error_overwritten -> - case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of - {ok, Page2} when Page2 =:= Page -> - {ok, Page}; - {ok, Oops} -> - error({impossible, ?MODULE, ?LINE, LPN, head_said, Page, repairee_now_says, Oops}); - error_trimmed -> - %% Wow, we have lost at least 3 races in a row. - read_repair_trim(RepairFLU, LPN); - Else -> - Else - end; - error_unwritten -> - error({impossible, ?MODULE, ?LINE, written_then_unwritten}) - end; + read_repair_chain2(Rest, Epoch, LPN, Page, Chain); error_badepoch -> error_badepoch; error_trimmed -> - read_repair_trim(RepairFLU, LPN); - error_overwritten -> - error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); + %% TODO: robustify + [ok = case corfurl_flu:fill(flu_pid(X), Epoch, LPN) of + ok -> ok; + error_trimmed -> ok; + Else -> Else + end || X <- Rest], + error_trimmed; error_unwritten -> error_unwritten + %% Let it crash: error_overwritten + end. + +read_repair_chain2([] = _Repairees, _Epoch, _LPN, Page, _OriginalChain) -> + {ok, Page}; +read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) -> + case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of + ok -> + read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain); + error_badepoch -> + error_badepoch; + error_trimmed -> + error_trimmed; + error_overwritten -> + %% We're going to do an optional sanity check here. + %% TODO: make the sanity check configurable? + case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of + {ok, Page2} when Page2 =:= Page -> + %% TODO: is there a need to continue working upstream + %% to fix problems? + {ok, Page2}; + {ok, _Page2} -> + error({bummerbummer, ?MODULE, ?LINE, sanity_check_failure, + lpn, LPN, epoch, Epoch}); + error_badepoch -> + error_badepoch; + error_trimmed -> + %% Start repair at the beginning to handle this case + read_repair_chain(Epoch, LPN, OriginalChain) + %% Let it crash: error_overwritten, error_unwritten + end + %% Let it crash: error_unwritten end. read_repair_trim(RepairFLU, LPN) -> @@ -207,11 +210,15 @@ scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) -> %% in the Acc? Or should the client assume that if %% scan_forward() doesn't mention a page that scan_forward(P, LPN + 1, MaxPages - 1, ok, true, Acc); - error_overwritten -> - error({impossible, ?MODULE, ?LINE, overwritten_reply_to_read}); error_unwritten -> %% Halt, allow recursion to create our return value. + %% TODO: It's possible that we're stuck here because a client + %% crashed and that we see an unwritten page at LPN. + %% We ought to ask the sequencer always/sometime?? what + %% tail LPN is, and if there's a hole, start a timer to + %% allow us to fill the hole. scan_forward(P, LPN, 0, ok, false, Acc) + %% Let it crash: error_overwritten end. flu_pid(X) when is_pid(X) -> @@ -307,18 +314,19 @@ setup_basic_flus(NumFLUs, PageSize, NumPages) -> end || X <- lists:seq(1, NumFLUs)]. smoke1_test() -> - NumFLUs = 4, + NumFLUs = 6, PageSize = 8, NumPages = 10, - FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), + FLUs = [F1, F2, F3, F4, F5, F6] = + setup_basic_flus(NumFLUs, PageSize, NumPages), {ok, Seq} = corfurl_sequencer:start_link(FLUs), - %% We know that the first LSN will be 1. + %% We know that the first LPN will be 1. LPN_Pgs = [{X, list_to_binary( lists:flatten(io_lib:format("~8..0w", [X])))} || X <- lists:seq(1, 5)], try - P1 = new_simple_projection(1, 1, 1*100, [[F1, F2], [F3, F4]]), + P1 = new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), [begin {ok, LPN} = append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], [begin {ok, Pg} = read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], @@ -333,6 +341,39 @@ smoke1_test() -> {ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = scan_forward(P1, 2, 2), {ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} = scan_forward(P1, 3, 10), + %% Let's smoke read-repair: regular write failure + Epoch = P1#proj.epoch, + Pg6 = <<424242:(PageSize*8)>>, + + %% Simulate a failed write to the chain. + [F6a, F6b, F6c] = Chain6 = project_to_chain(6, P1), + NotHead6 = [F6b, F6c], + ok = write_single_page_to_chain([F6a], Epoch, 6, Pg6, 1), + + %% Does the chain look as expected? + {ok, Pg6} = corfurl_flu:read(flu_pid(F6a), Epoch, 6), + [error_unwritten = corfurl_flu:read(flu_pid(X), Epoch, 6) || + X <- NotHead6], + + %% Read repair should fix it. + {ok, Pg6} = read_page(P1, 6), + [{ok, Pg6} = corfurl_flu:read(flu_pid(X), Epoch, 6) || X <- Chain6], + + %% Let's smoke read-repair: failed fill + [F7a, F7b, F7c] = Chain7 = project_to_chain(7, P1), + NotHead7 = [F7b, F7c], + ok = corfurl_flu:fill(flu_pid(F7a), Epoch, 7), + + %% Does the chain look as expected? + error_trimmed = corfurl_flu:read(flu_pid(F7a), Epoch, 7), + [error_unwritten = corfurl_flu:read(flu_pid(X), Epoch, 7) || + X <- NotHead7], + + %% Read repair should fix it. + error_trimmed = read_page(P1, 7), + [error_trimmed = corfurl_flu:read(flu_pid(X), Epoch, 7) || X <- Chain7], + + ok after corfurl_sequencer:stop(Seq), From 3963ce44f084d24ce462c98603fc7f1a703a30cf Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 20:19:51 +0900 Subject: [PATCH 09/70] More sanity checking for fill() in smoke test --- prototype/corfurl/src/corfurl.erl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index dc88b9a..d50dbeb 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -372,7 +372,14 @@ smoke1_test() -> %% Read repair should fix it. error_trimmed = read_page(P1, 7), [error_trimmed = corfurl_flu:read(flu_pid(X), Epoch, 7) || X <- Chain7], + %% scan_forward shouldn't see it either + {ok, 8, false, [{6,Pg6}]} = scan_forward(P1, 6, 10), + [F8a|_] = Chain8 = project_to_chain(8, P1), + ok = corfurl_flu:fill(flu_pid(F8a), Epoch, 8), + %% No read before scan, scan_forward shouldn't see 8 either, + %% but the next seq should be 9 + {ok, 9, false, [{6,Pg6}]} = scan_forward(P1, 6, 10), ok after From feed231d5e78b4fc2f834bb033a37c03cc85ebc5 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sun, 16 Feb 2014 20:32:08 +0900 Subject: [PATCH 10/70] Move EUnit test code to test subdir --- prototype/corfurl/include/corfurl.hrl | 15 ++ prototype/corfurl/src/corfurl.erl | 207 ----------------------- prototype/corfurl/test/corfurl_test.erl | 215 ++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 207 deletions(-) create mode 100644 prototype/corfurl/test/corfurl_test.erl diff --git a/prototype/corfurl/include/corfurl.hrl b/prototype/corfurl/include/corfurl.hrl index fa843e3..e3b2b28 100644 --- a/prototype/corfurl/include/corfurl.hrl +++ b/prototype/corfurl/include/corfurl.hrl @@ -18,6 +18,21 @@ %% %% ------------------------------------------------------------------- +-type flu_name() :: atom(). +-type flu() :: pid() | flu_name(). +-type flu_chain() :: [flu()]. + +-record(range, { + pn_start :: non_neg_integer(), % start page number + pn_end :: non_neg_integer(), % end page number + chains :: [flu_chain()] + }). + +-record(proj, { % Projection + epoch :: non_neg_integer(), + r :: [#range{}] + }). + %% 1 byte @ offset 0: 0=unwritten, 1=written, 2=trimmed, 255=corrupt? TODO %% 8 bytes @ offset 1: logical page number %% P bytes @ offset 9: page data diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index d50dbeb..c242875 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -29,25 +29,9 @@ -include("corfurl.hrl"). -ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). -compile(export_all). -endif. --type flu_name() :: atom(). --type flu() :: pid() | flu_name(). --type flu_chain() :: [flu()]. - --record(range, { - pn_start :: non_neg_integer(), % start page number - pn_end :: non_neg_integer(), % end page number - chains :: [flu_chain()] - }). - --record(proj, { % Projection - epoch :: non_neg_integer(), - r :: [#range{}] - }). - append_page(Sequencer, P, Page) -> append_page(Sequencer, P, Page, 1). @@ -184,14 +168,6 @@ read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) -> %% Let it crash: error_unwritten end. -read_repair_trim(RepairFLU, LPN) -> - case corfurl_flu:trim(flu_pid(RepairFLU), LPN) of - ok -> - error_trimmed; - Else -> - Else - end. - scan_forward(P, LPN, MaxPages) -> scan_forward(P, LPN, MaxPages, ok, true, []). @@ -274,186 +250,3 @@ project_to_chain(LPN, P) -> element(I, Chains) end. -%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% - --ifdef(TEST). - -save_read_test() -> - Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", - Chain = [a,b], - P1 = new_simple_projection(1, 1, 1*100, [Chain]), - - try - filelib:ensure_dir(Dir ++ "/ignored"), - ok = save_projection(Dir, P1), - error_overwritten = save_projection(Dir, P1), - - {ok, P1} = read_projection(Dir, 1), - error_unwritten = read_projection(Dir, 2), - - ok - after - ok = corfurl_util:delete_dir(Dir) - end. - -setup_flu_basedir() -> - "/tmp/" ++ atom_to_list(?MODULE) ++ ".". - -setup_flu_dir(N) -> - setup_flu_basedir() ++ integer_to_list(N). - -setup_del_all(NumFLUs) -> - [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || - N <- lists:seq(1, NumFLUs)]. - -setup_basic_flus(NumFLUs, PageSize, NumPages) -> - setup_del_all(NumFLUs), - [begin - element(2, corfurl_flu:start_link(setup_flu_dir(X), - PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) - end || X <- lists:seq(1, NumFLUs)]. - -smoke1_test() -> - NumFLUs = 6, - PageSize = 8, - NumPages = 10, - FLUs = [F1, F2, F3, F4, F5, F6] = - setup_basic_flus(NumFLUs, PageSize, NumPages), - {ok, Seq} = corfurl_sequencer:start_link(FLUs), - - %% We know that the first LPN will be 1. - LPN_Pgs = [{X, list_to_binary( - lists:flatten(io_lib:format("~8..0w", [X])))} || - X <- lists:seq(1, 5)], - try - P1 = new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), - [begin {ok, LPN} = append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], - - [begin {ok, Pg} = read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], - - [begin - LPNplus = LPN + 1, - {ok, LPNplus, true, [{LPN, Pg}]} = scan_forward(P1, LPN, 1) - end || {LPN, Pg} <- LPN_Pgs], - {ok, 6, false, []} = scan_forward(P1, 6, 1), - {ok, 6, false, []} = scan_forward(P1, 6, 10), - [{LPN1,Pg1}, {LPN2,Pg2}, {LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}] = LPN_Pgs, - {ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = scan_forward(P1, 2, 2), - {ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} = scan_forward(P1, 3, 10), - - %% Let's smoke read-repair: regular write failure - Epoch = P1#proj.epoch, - Pg6 = <<424242:(PageSize*8)>>, - - %% Simulate a failed write to the chain. - [F6a, F6b, F6c] = Chain6 = project_to_chain(6, P1), - NotHead6 = [F6b, F6c], - ok = write_single_page_to_chain([F6a], Epoch, 6, Pg6, 1), - - %% Does the chain look as expected? - {ok, Pg6} = corfurl_flu:read(flu_pid(F6a), Epoch, 6), - [error_unwritten = corfurl_flu:read(flu_pid(X), Epoch, 6) || - X <- NotHead6], - - %% Read repair should fix it. - {ok, Pg6} = read_page(P1, 6), - [{ok, Pg6} = corfurl_flu:read(flu_pid(X), Epoch, 6) || X <- Chain6], - - %% Let's smoke read-repair: failed fill - [F7a, F7b, F7c] = Chain7 = project_to_chain(7, P1), - NotHead7 = [F7b, F7c], - ok = corfurl_flu:fill(flu_pid(F7a), Epoch, 7), - - %% Does the chain look as expected? - error_trimmed = corfurl_flu:read(flu_pid(F7a), Epoch, 7), - [error_unwritten = corfurl_flu:read(flu_pid(X), Epoch, 7) || - X <- NotHead7], - - %% Read repair should fix it. - error_trimmed = read_page(P1, 7), - [error_trimmed = corfurl_flu:read(flu_pid(X), Epoch, 7) || X <- Chain7], - %% scan_forward shouldn't see it either - {ok, 8, false, [{6,Pg6}]} = scan_forward(P1, 6, 10), - - [F8a|_] = Chain8 = project_to_chain(8, P1), - ok = corfurl_flu:fill(flu_pid(F8a), Epoch, 8), - %% No read before scan, scan_forward shouldn't see 8 either, - %% but the next seq should be 9 - {ok, 9, false, [{6,Pg6}]} = scan_forward(P1, 6, 10), - - ok - after - corfurl_sequencer:stop(Seq), - [corfurl_flu:stop(F) || F <- FLUs], - setup_del_all(NumFLUs) - end. - -forfun_append(0, _Seq, _P, _Page) -> - ok; -forfun_append(N, Seq, P, Page) -> - ok = append_page(Seq, P, Page), - forfun_append(N - 1, Seq, P, Page). - --ifdef(TIMING_TEST). - -forfun_test_() -> - {timeout, 99999, fun() -> - [forfun(Procs) || Procs <- [10,100,1000,5000]] - end}. - -%%% My MBP, SSD -%%% The 1K and 5K procs shows full-mailbox-scan ickiness -%%% when getting replies from prim_file. :-( - -%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.016815 sec -%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.547976 sec -%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 13.706686 sec -%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 33.516312 sec - -%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.350147 sec -%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.429485 sec -%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.643233 sec -%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 15.686058 sec - -%%%% forfun: 10 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 13.479458 sec -%%%% forfun: 100 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 14.752565 sec -%%%% forfun: 1000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 25.012306 sec -%%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec - -forfun(NumProcs) -> - io:format(user, "\n", []), - NumFLUs = 4, - PageSize = 8, - %%PageSize = 4096, - NumPages = 200*1000, - PagesPerProc = NumPages div NumProcs, - FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), - {ok, Seq} = corfurl_sequencer:start_link(FLUs), - - try - Chains = [[F1, F2], [F3, F4]], - %%Chains = [[F1], [F2], [F3], [F4]], - P = new_simple_projection(1, 1, NumPages*2, Chains), - Me = self(), - Start = now(), - Ws = [begin - Page = <>, - spawn_link(fun() -> - forfun_append(PagesPerProc, Seq, P, Page), - Me ! {done, self()} - end) - end || X <- lists:seq(1, NumProcs)], - [receive {done, W} -> ok end || W <- Ws], - End = now(), - io:format(user, "forfun: ~p procs writing ~p pages of ~p bytes/page to ~p chains of ~p total FLUs in ~p sec\n", - [NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]), - ok - after - corfurl_sequencer:stop(Seq), - [corfurl_flu:stop(F) || F <- FLUs], - setup_del_all(NumFLUs) - end. - --endif. % TIMING_TEST - --endif. % TEST diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl new file mode 100644 index 0000000..76af137 --- /dev/null +++ b/prototype/corfurl/test/corfurl_test.erl @@ -0,0 +1,215 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_test). + +-include("corfurl.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-endif. + +-define(M, corfurl). + +%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% + +-ifdef(TEST). + +save_read_test() -> + Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", + Chain = [a,b], + P1 = ?M:new_simple_projection(1, 1, 1*100, [Chain]), + + try + filelib:ensure_dir(Dir ++ "/ignored"), + ok = ?M:save_projection(Dir, P1), + error_overwritten = ?M:save_projection(Dir, P1), + + {ok, P1} = ?M:read_projection(Dir, 1), + error_unwritten = ?M:read_projection(Dir, 2), + + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +setup_flu_basedir() -> + "/tmp/" ++ atom_to_list(?MODULE) ++ ".". + +setup_flu_dir(N) -> + setup_flu_basedir() ++ integer_to_list(N). + +setup_del_all(NumFLUs) -> + [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || + N <- lists:seq(1, NumFLUs)]. + +setup_basic_flus(NumFLUs, PageSize, NumPages) -> + setup_del_all(NumFLUs), + [begin + element(2, corfurl_flu:start_link(setup_flu_dir(X), + PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) + end || X <- lists:seq(1, NumFLUs)]. + +smoke1_test() -> + NumFLUs = 6, + PageSize = 8, + NumPages = 10, + FLUs = [F1, F2, F3, F4, F5, F6] = + setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + %% We know that the first LPN will be 1. + LPN_Pgs = [{X, list_to_binary( + lists:flatten(io_lib:format("~8..0w", [X])))} || + X <- lists:seq(1, 5)], + try + P1 = ?M:new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), + [begin {ok, LPN} = ?M:append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + + [begin {ok, Pg} = ?M:read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], + + [begin + LPNplus = LPN + 1, + {ok, LPNplus, true, [{LPN, Pg}]} = ?M:scan_forward(P1, LPN, 1) + end || {LPN, Pg} <- LPN_Pgs], + {ok, 6, false, []} = ?M:scan_forward(P1, 6, 1), + {ok, 6, false, []} = ?M:scan_forward(P1, 6, 10), + [{LPN1,Pg1}, {LPN2,Pg2}, {LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}] = LPN_Pgs, + {ok, 4, true, [{LPN2,Pg2}, {LPN3,Pg3}]} = ?M:scan_forward(P1, 2, 2), + {ok, 6, false, [{LPN3,Pg3}, {LPN4,Pg4}, {LPN5,Pg5}]} = + ?M:scan_forward(P1, 3, 10), + + %% Let's smoke read-repair: regular write failure + Epoch = P1#proj.epoch, + Pg6 = <<424242:(PageSize*8)>>, + + %% Simulate a failed write to the chain. + [F6a, F6b, F6c] = Chain6 = ?M:project_to_chain(6, P1), + NotHead6 = [F6b, F6c], + ok = ?M:write_single_page_to_chain([F6a], Epoch, 6, Pg6, 1), + + %% Does the chain look as expected? + {ok, Pg6} = corfurl_flu:read(?M:flu_pid(F6a), Epoch, 6), + [error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) || + X <- NotHead6], + + %% Read repair should fix it. + {ok, Pg6} = ?M:read_page(P1, 6), + [{ok, Pg6} = corfurl_flu:read(?M:flu_pid(X), Epoch, 6) || X <- Chain6], + + %% Let's smoke read-repair: failed fill + [F7a, F7b, F7c] = Chain7 = ?M:project_to_chain(7, P1), + NotHead7 = [F7b, F7c], + ok = corfurl_flu:fill(?M:flu_pid(F7a), Epoch, 7), + + %% Does the chain look as expected? + error_trimmed = corfurl_flu:read(?M:flu_pid(F7a), Epoch, 7), + [error_unwritten = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) || + X <- NotHead7], + + %% Read repair should fix it. + error_trimmed = ?M:read_page(P1, 7), + [error_trimmed = corfurl_flu:read(?M:flu_pid(X), Epoch, 7) || X <- Chain7], + %% scan_forward shouldn't see it either + {ok, 8, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10), + + [F8a|_] = Chain8 = ?M:project_to_chain(8, P1), + ok = corfurl_flu:fill(?M:flu_pid(F8a), Epoch, 8), + %% No read before scan, scan_forward shouldn't see 8 either, + %% but the next seq should be 9 + {ok, 9, false, [{6,Pg6}]} = ?M:scan_forward(P1, 6, 10), + + ok + after + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +-ifdef(TIMING_TEST). + +forfun_test_() -> + {timeout, 99999, fun() -> + [forfun(Procs) || Procs <- [10,100,1000,5000]] + end}. + +forfun_append(0, _Seq, _P, _Page) -> + ok; +forfun_append(N, Seq, P, Page) -> + {ok, _} = ?M:append_page(Seq, P, Page), + forfun_append(N - 1, Seq, P, Page). + +%%% My MBP, SSD +%%% The 1K and 5K procs shows full-mailbox-scan ickiness +%%% when getting replies from prim_file. :-( + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.016815 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 10.547976 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 13.706686 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 2 chains of 4 total FLUs in 33.516312 sec + +%%% forfun: 10 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.350147 sec +%%% forfun: 100 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.429485 sec +%%% forfun: 1000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 5.643233 sec +%%% forfun: 5000 procs writing 200000 pages of 8 bytes/page to 4 chains of 4 total FLUs in 15.686058 sec + +%%%% forfun: 10 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 13.479458 sec +%%%% forfun: 100 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 14.752565 sec +%%%% forfun: 1000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 25.012306 sec +%%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec + +forfun(NumProcs) -> + io:format(user, "\n", []), + NumFLUs = 4, + PageSize = 8, + %%PageSize = 4096, + NumPages = 200*1000, + PagesPerProc = NumPages div NumProcs, + FLUs = [F1, F2, F3, F4] = setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + try + Chains = [[F1, F2], [F3, F4]], + %%Chains = [[F1], [F2], [F3], [F4]], + P = ?M:new_simple_projection(1, 1, NumPages*2, Chains), + Me = self(), + Start = now(), + Ws = [begin + Page = <>, + spawn_link(fun() -> + forfun_append(PagesPerProc, Seq, P, Page), + Me ! {done, self()} + end) + end || X <- lists:seq(1, NumProcs)], + [receive {done, W} -> ok end || W <- Ws], + End = now(), + io:format(user, "forfun: ~p procs writing ~p pages of ~p bytes/page to ~p chains of ~p total FLUs in ~p sec\n", + [NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]), + ok + after + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + +-endif. % TIMING_TEST + +-endif. % TEST From a294a0eff0d43d0f145c0c38cbfe6d1876bc4f3c Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 00:29:41 +0900 Subject: [PATCH 11/70] Skeleton of PULSE test created, first bug (race in sequencer init) is found, huzzah! --- prototype/corfurl/Makefile | 3 + prototype/corfurl/rebar.config.script | 54 ++++ prototype/corfurl/src/corfurl.erl | 3 + prototype/corfurl/src/corfurl_flu.erl | 118 +------- prototype/corfurl/src/corfurl_sequencer.erl | 9 +- prototype/corfurl/test/corfurl_flu_test.erl | 134 +++++++++ prototype/corfurl/test/corfurl_pulse.erl | 274 ++++++++++++++++++ prototype/corfurl/test/corfurl_test.erl | 41 +-- .../corfurl/test/pulse_util/event_logger.erl | 131 +++++++++ .../corfurl/test/pulse_util/handle_errors.erl | 153 ++++++++++ 10 files changed, 792 insertions(+), 128 deletions(-) create mode 100644 prototype/corfurl/rebar.config.script create mode 100644 prototype/corfurl/test/corfurl_flu_test.erl create mode 100644 prototype/corfurl/test/corfurl_pulse.erl create mode 100644 prototype/corfurl/test/pulse_util/event_logger.erl create mode 100644 prototype/corfurl/test/pulse_util/handle_errors.erl diff --git a/prototype/corfurl/Makefile b/prototype/corfurl/Makefile index ef51767..5a67094 100644 --- a/prototype/corfurl/Makefile +++ b/prototype/corfurl/Makefile @@ -21,3 +21,6 @@ test: deps compile eunit eunit: $(REBAR_BIN) -v skip_deps=true eunit +pulse: compile + env BITCASK_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile + env BITCASK_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit diff --git a/prototype/corfurl/rebar.config.script b/prototype/corfurl/rebar.config.script new file mode 100644 index 0000000..2155bb5 --- /dev/null +++ b/prototype/corfurl/rebar.config.script @@ -0,0 +1,54 @@ +PulseBuild = case os:getenv("BITCASK_PULSE") of + false -> + false; + _ -> + true + end, +case PulseBuild of + true -> + PulseOpts = + [{pulse_no_side_effect, + [{erlang,display,1} + ]}, + {pulse_side_effect, + [ {corfurl_sequencer, get, 0} + , {corfurl_flu, write, 4} + , {corfurl_flu, read, 3} + , {corfurl_flu, seal, 2} + , {corfurl_flu, trim, 3} + , {corfurl_flu, fill, 3} + + , {event_logger, event, '_'} + + , {prim_file, '_', '_'} + , {file, '_', '_'} + , {filelib, '_', '_'} + , {os, '_', '_'} ]}, + + {pulse_replace_module, + [ {gen_server, pulse_gen_server} + , {application, pulse_application} + , {supervisor, pulse_supervisor} ]} + ], + PulseCFlags = [{"CFLAGS", "$CFLAGS -DPULSE"}], + UpdConfig = case lists:keysearch(eunit_compile_opts, 1, CONFIG) of + {value, {eunit_compile_opts, Opts}} -> + lists:keyreplace(eunit_compile_opts, + 1, + CONFIG, + {eunit_compile_opts, Opts ++ PulseOpts}); + _ -> + [{eunit_compile_opts, PulseOpts} | CONFIG] + end, + case lists:keysearch(port_env, 1, UpdConfig) of + {value, {port_env, PortEnv}} -> + lists:keyreplace(port_env, + 1, + UpdConfig, + {port_env, PortEnv ++ PulseCFlags}); + _ -> + [{port_env, PulseCFlags} | UpdConfig] + end; + false -> + CONFIG +end. diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index c242875..bbf84c2 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -30,6 +30,9 @@ -ifdef(TEST). -compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. -endif. append_page(Sequencer, P, Page) -> diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 01e3d06..4c02531 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -36,8 +36,10 @@ -include("corfurl.hrl"). -ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). -export([get__mlp/1, get__min_epoch/1, get__trim_watermark/1]). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. -endif. -include_lib("kernel/include/file.hrl"). @@ -117,8 +119,14 @@ init({Dir, ExpPageSize, ExpMaxMem}) -> end catch X:Y -> - io:format("init: caught ~p ~p @ ~p\n", - [X, Y, erlang:get_stacktrace()]), + if X == error, + Y == {case_clause,{error,enoent}} -> + ok; + true -> + %% TODO: log-ify this + io:format("init: caught ~p ~p @ ~p\n", + [X, Y, erlang:get_stacktrace()]) + end, {no_version_number, 0, ExpPageSize, ExpMaxMem, 0} end, State = #state{dir=Dir, mem_fh=FH, min_epoch=MinEpoch, page_size=PageSize, @@ -368,107 +376,3 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> true -> badarg end. - -%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% - --ifdef(TEST). - -startstop_test() -> - Dir = "/tmp/flu." ++ os:getpid(), - {ok, P1} = start_link(Dir), - try - {ok, _} = status(P1), - ok = stop(P1), - {'EXIT', _} = (catch stop(P1)), - - {ok, P2} = start_link(Dir), - 0 = get__mlp(P2), - 0 = get__min_epoch(P2), - ok = stop(P2), - - ok - after - ok = corfurl_util:delete_dir(Dir) - end. - -basic_test() -> - Dir = "/tmp/flu." ++ os:getpid(), - {ok, P1} = start_link(Dir), - try - Epoch1 = 1, - Epoch2 = 2, - LPN = 1, - Bin1 = <<42:64>>, - Bin2 = <<42042:64>>, - - error_unwritten = read(P1, Epoch1, LPN), - error_unwritten = trim(P1, Epoch1, LPN), - error_unwritten = trim(P1, Epoch1, LPN+77), - - ok = write(P1, Epoch1, LPN, Bin1), - error_overwritten = write(P1, Epoch1, LPN, Bin1), - error_overwritten = fill(P1, Epoch1, LPN), - LPN = get__mlp(P1), - 0 = get__min_epoch(P1), - 0 = get__trim_watermark(P1), - {ok, LPN} = seal(P1, Epoch1), - 1 = get__min_epoch(P1), - - error_overwritten = write(P1, Epoch2, LPN, Bin1), - ok = write(P1, Epoch2, LPN+1, Bin2), - Epoch1 = get__min_epoch(P1), - - {ok, Bin1} = read(P1, Epoch1, LPN), - {ok, Bin2} = read(P1, Epoch2, LPN+1), - error_unwritten = read(P1, Epoch2, LPN+2), - badarg = read(P1, Epoch2, 1 bsl 2982), - - error_badepoch = seal(P1, Epoch1), - {ok, _} = seal(P1, Epoch2), - error_badepoch = seal(P1, Epoch2), - - error_badepoch = read(P1, Epoch1, LPN), - error_badepoch = read(P1, Epoch1, LPN+1), - {ok, Bin1} = read(P1, Epoch2, LPN), - {ok, Bin2} = read(P1, Epoch2, LPN+1), - - error_badepoch = trim(P1, Epoch1, LPN+1), - ok = trim(P1, Epoch2, LPN+1), - error_trimmed = trim(P1, Epoch2, LPN+1), - %% Current watermark processing is broken. But we'll test what's - %% there now. - ExpectedWaterFixMe = LPN+1, - ExpectedWaterFixMe = get__trim_watermark(P1), - - ok = fill(P1, Epoch2, LPN+3), - error_trimmed = read(P1, Epoch2, LPN+3), - error_trimmed = fill(P1, Epoch2, LPN+3), - error_trimmed = trim(P1, Epoch2, LPN+3), - - Epoch2 = get__min_epoch(P1), - ok = stop(P1), - ok - after - ok = corfurl_util:delete_dir(Dir) - end. - -seal_persistence_test() -> - Dir = "/tmp/flu." ++ os:getpid(), - {ok, P1} = start_link(Dir), - try - 0 = get__min_epoch(P1), - Epoch = 665, - {ok, LPN} = seal(P1, Epoch), - Epoch = get__min_epoch(P1), - ok = stop(P1), - - {ok, P2} = start_link(Dir), - Epoch = get__min_epoch(P2), - - ok = stop(P2), - ok - after - ok = corfurl_util:delete_dir(Dir) - end. - --endif. % TEST diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 92426d0..a4bee05 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -29,12 +29,16 @@ -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. -endif. -define(SERVER, ?MODULE). start_link(FLUs) -> - gen_server:start_link({local, ?SERVER}, ?MODULE, {FLUs}, []). + %% gen_server:start_link({local, ?SERVER}, ?MODULE, {FLUs}, []). + gen_server:start_link(?MODULE, {FLUs}, []). stop(Pid) -> gen_server:call(Pid, stop, infinity). @@ -46,6 +50,7 @@ get(Pid, NumPages) -> init({FLUs}) -> MLP = get_max_logical_page(FLUs), + io:format(user, "~s:init: MLP = ~p\n", [?MODULE, MLP]), {ok, MLP + 1}. handle_call({get, NumPages}, _From, MLP) -> @@ -78,6 +83,7 @@ get_max_logical_page(FLUs) -> %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% -ifdef(TEST). +-ifndef(PULSE). smoke_test() -> BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".", @@ -120,4 +126,5 @@ smoke_test() -> Del() end. +-endif. % not PULSE -endif. % TEST diff --git a/prototype/corfurl/test/corfurl_flu_test.erl b/prototype/corfurl/test/corfurl_flu_test.erl new file mode 100644 index 0000000..21d0f15 --- /dev/null +++ b/prototype/corfurl/test/corfurl_flu_test.erl @@ -0,0 +1,134 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_flu_test). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-endif. + +-include("corfurl.hrl"). + +-define(M, corfurl_flu). + +-ifdef(TEST). +-ifndef(PULSE). + +startstop_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + {ok, _} = ?M:status(P1), + ok = ?M:stop(P1), + {'EXIT', _} = (catch ?M:stop(P1)), + + {ok, P2} = ?M:start_link(Dir), + 0 = ?M:get__mlp(P2), + 0 = ?M:get__min_epoch(P2), + ok = ?M:stop(P2), + + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +basic_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + Epoch1 = 1, + Epoch2 = 2, + LPN = 1, + Bin1 = <<42:64>>, + Bin2 = <<42042:64>>, + + error_unwritten = ?M:read(P1, Epoch1, LPN), + error_unwritten = ?M:trim(P1, Epoch1, LPN), + error_unwritten = ?M:trim(P1, Epoch1, LPN+77), + + ok = ?M:write(P1, Epoch1, LPN, Bin1), + error_overwritten = ?M:write(P1, Epoch1, LPN, Bin1), + error_overwritten = ?M:fill(P1, Epoch1, LPN), + LPN = ?M:get__mlp(P1), + 0 = ?M:get__min_epoch(P1), + 0 = ?M:get__trim_watermark(P1), + {ok, LPN} = ?M:seal(P1, Epoch1), + 1 = ?M:get__min_epoch(P1), + + error_overwritten = ?M:write(P1, Epoch2, LPN, Bin1), + ok = ?M:write(P1, Epoch2, LPN+1, Bin2), + Epoch1 = ?M:get__min_epoch(P1), + + {ok, Bin1} = ?M:read(P1, Epoch1, LPN), + {ok, Bin2} = ?M:read(P1, Epoch2, LPN+1), + error_unwritten = ?M:read(P1, Epoch2, LPN+2), + badarg = ?M:read(P1, Epoch2, 1 bsl 2982), + + error_badepoch = ?M:seal(P1, Epoch1), + {ok, _} = ?M:seal(P1, Epoch2), + error_badepoch = ?M:seal(P1, Epoch2), + + error_badepoch = ?M:read(P1, Epoch1, LPN), + error_badepoch = ?M:read(P1, Epoch1, LPN+1), + {ok, Bin1} = ?M:read(P1, Epoch2, LPN), + {ok, Bin2} = ?M:read(P1, Epoch2, LPN+1), + + error_badepoch = ?M:trim(P1, Epoch1, LPN+1), + ok = ?M:trim(P1, Epoch2, LPN+1), + error_trimmed = ?M:trim(P1, Epoch2, LPN+1), + %% Current watermark processing is broken. But we'll test what's + %% there now. + ExpectedWaterFixMe = LPN+1, + ExpectedWaterFixMe = ?M:get__trim_watermark(P1), + + ok = ?M:fill(P1, Epoch2, LPN+3), + error_trimmed = ?M:read(P1, Epoch2, LPN+3), + error_trimmed = ?M:fill(P1, Epoch2, LPN+3), + error_trimmed = ?M:trim(P1, Epoch2, LPN+3), + + Epoch2 = ?M:get__min_epoch(P1), + ok = ?M:stop(P1), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +seal_persistence_test() -> + Dir = "/tmp/flu." ++ os:getpid(), + {ok, P1} = ?M:start_link(Dir), + try + 0 = ?M:get__min_epoch(P1), + Epoch = 665, + {ok, LPN} = ?M:seal(P1, Epoch), + Epoch = ?M:get__min_epoch(P1), + ok = ?M:stop(P1), + + {ok, P2} = ?M:start_link(Dir), + Epoch = ?M:get__min_epoch(P2), + + ok = ?M:stop(P2), + ok + after + ok = corfurl_util:delete_dir(Dir) + end. + +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl new file mode 100644 index 0000000..7e8ba55 --- /dev/null +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -0,0 +1,274 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_pulse). + +-ifdef(TEST). +-ifdef(PULSE). + +-compile(export_all). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eqc/include/eqc_statem.hrl"). + +-include("corfurl.hrl"). + +-include_lib("eunit/include/eunit.hrl"). + +-compile({parse_transform, pulse_instrument}). +%% -compile({pulse_replace_module, +%% [{application, pulse_application}]}). + +-compile({pulse_skip,[{prop_pulse_test_,0},{really_delete_bitcask,0},{copy_bitcask_app,0}]}). +-compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). + +%% Used for output within EUnit... +-define(QC_FMT(Fmt, Args), + io:format(user, Fmt, Args)). + +%% And to force EUnit to output QuickCheck output... +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)). + +-record(run, { + seq, % Sequencer + proj, % Projection + flus % List of FLUs + }). + +-record(state, { + is_setup = false :: boolean(), + num_chains = 0 :: integer(), + chain_len = 0 :: integer(), + page_size = 0 :: integer(), + run :: #run{} + }). + +initial_state() -> + #state{}. + +gen_page(PageSize) -> + binary(PageSize). + +command(#state{run=Run} = S) -> + ?LET({NumChains, ChainLen, PageSize}, + {parameter(num_chains), parameter(chain_len), parameter(page_size)}, + frequency( + [{10, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize]}} + || not S#state.is_setup] ++ + [{10, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} + || S#state.is_setup] ++ + [])). + +%% Precondition, checked before a command is added to the command sequence. +precondition(S, {call, _, setup, _}) -> + not S#state.is_setup; +precondition(S, {call, _, _, _}) -> + S#state.is_setup. + +%% Next state transformation, S is the current state and V is the result of the +%% command. +next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize]}) -> + S#state{is_setup=true, + num_chains=NumChains, + chain_len=ChainLen, + page_size=PageSize, + run=Res}; +next_state(S, _, {call, _, append, _}) -> + S. + +eqeq(X, X) -> true; +eqeq(X, Y) -> {X, '/=', Y}. + +postcondition(_S, {call, _, setup, _}, #run{} = _V) -> + true; +postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) -> + true; +postcondition(_S, {call, _, append, _}, V) -> + eqeq(V, todoTODO_fixit). + +run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> + %% AfterTime = if LocalOrSlave == local -> 50000; + %% LocalOrSlave == slave -> 1000000 + %% end, + event_logger:start_link(), + pulse:start(), + error_logger:tty(false), + error_logger:add_report_handler(handle_errors), + event_logger:start_logging(), + X = + try + {H, S, Res, Trace} = pulse:run(fun() -> + %% application:start(my_test_app), + %% receive after AfterTime -> ok end, + {H, S, R} = run_parallel_commands(?MODULE, Cmds), + %% io:format(user, "Yooo: H = ~p\n", [H]), + %% io:format(user, "Yooo: S = ~p\n", [S]), + %% io:format(user, "Yooo: R = ~p\n", [R]), + %% receive after AfterTime -> ok end, + Trace = event_logger:get_events(), + %% receive after AfterTime -> ok end, + catch exit(pulse_application_controller, shutdown), + {H, S, R, Trace} + end, [{seed, Seed}, + {strategy, unfair}]), + Schedule = pulse:get_schedule(), + Errors = gen_event:call(error_logger, handle_errors, get_errors, 60*1000), + [clean_up_runtime(S) || S#state.run /= undefined], + {H, S, Res, Trace, Schedule, Errors} + catch + _:Err -> + {'EXIT', Err} + end, + X. + +prop_pulse() -> + prop_pulse(local). + +prop_pulse(LocalOrSlave) -> + ?FORALL({NumChains, ChainLen, PageSize}, + {choose(1, 3), choose(1, 3), choose(1, 16)}, + begin + P = ?FORALL({Cmds, Seed}, + {with_parameters([{num_chains, NumChains}, + {chain_len, ChainLen}, + {page_size, PageSize}], parallel_commands(?MODULE)), + pulse:seed()}, + begin + case run_commands_on_node(LocalOrSlave, Cmds, Seed) of + {'EXIT', Err} -> + equals({'EXIT', Err}, ok); + {_H, S, Res, Trace, Schedule, Errors} -> + CheckTrace = check_trace(Trace, Cmds, Seed), + ?WHENFAIL( + ?QC_FMT("\nState: ~p\n", [S]), + measure(schedule, length(Schedule), + conjunction( + [{simple_result, equals(Res, ok)}, + {errors, equals(Errors, [])}, + {events, CheckTrace} ]))) + end + end), + P + end). + +prop_pulse_test_() -> + Timeout = case os:getenv("PULSE_TIME") of + false -> 60; + Val -> list_to_integer(Val) + end, + ExtraTO = case os:getenv("PULSE_SHRINK_TIME") of + false -> 0; + Val2 -> list_to_integer(Val2) + end, + io:format(user, "prop_pulse_test time: ~p + ~p seconds\n", + [Timeout, ExtraTO]), + {timeout, (Timeout+ExtraTO) + 60, + fun() -> + ?assert(eqc:quickcheck(eqc:testing_time(Timeout,?QC_OUT(prop_pulse())))) + end}. + +check_trace(Trace, _Cmds, _Seed) -> + %% TODO: yeah + Results = [X || {_TS, {result, _Pid, X}} <- Trace], + lists:sort(Results) == lists:usort(Results). + +%% Presenting command data statistics in a nicer way +command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> + Fun. + +%% Convenience functions for running tests + +test() -> + test({20, sec}). + +test(N) when is_integer(N) -> + quickcheck(numtests(N, prop_pulse())); +test({Time, sec}) -> + quickcheck(eqc:testing_time(Time, prop_pulse())); +test({Time, min}) -> + test({Time * 60, sec}); +test({Time, h}) -> + test({Time * 60, min}). + +check() -> + check(current_counterexample()). + +verbose() -> + verbose(current_counterexample()). + +verbose(CE) -> + erlang:put(verbose, true), + Ok = check(CE), + erlang:put(verbose, false), + Ok. + +check(CE) -> + check(on_output(fun("OK" ++ _, []) -> ok; (Fmt, Args) -> io:format(Fmt, Args) end, + prop_pulse(true == erlang:get(verbose))), + CE). + +recheck() -> + recheck(prop_pulse()). + +zipwith(F, [X|Xs], [Y|Ys]) -> + [F(X, Y)|zipwith(F, Xs, Ys)]; +zipwith(_, _, _) -> []. + +clean_up_runtime(#state{run=R} = _S) -> + %% io:format(user, "clean_up_runtime: run = ~p\n", [R]), + catch corfurl_sequencer:stop(R#run.seq), + [catch corfurl_flu:stop(F) || F <- R#run.flus], + corfurl_test:setup_del_all(length(R#run.flus)). + +make_chains(ChainLen, FLUs) -> + make_chains(ChainLen, FLUs, [], []). + +make_chains(_ChainLen, [], SmallAcc, BigAcc) -> + lists:reverse([SmallAcc|BigAcc]); +make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> + if length(SmallAcc) == ChainLen -> + make_chains(ChainLen, T, [H], [SmallAcc|BigAcc]); + true -> + make_chains(ChainLen, T, [H|SmallAcc], BigAcc) + end. + +setup(NumChains, ChainLen, PageSize) -> + N = NumChains * ChainLen, + FLUs = corfurl_test:setup_basic_flus(N, PageSize, 50000), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + Chains = make_chains(ChainLen, FLUs), + %% io:format(user, "Cs = ~p\n", [Chains]), + Proj = corfurl:new_simple_projection(1, 1, 50000, Chains), + #run{seq=Seq, proj=Proj, flus=FLUs}. + +-define(LOG(Tag, MkCall), + event_logger:event({call, self(), Tag}), + __Result = MkCall, + event_logger:event({result, self(), __Result}), + __Result). + +append(#run{seq=Seq,proj=Proj}, Page) -> + ?LOG({append, Page}, + corfurl:append_page(Seq, Proj, Page)). + +-endif. % PULSE +-endif. % TEST + diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 76af137..4490131 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -23,15 +23,33 @@ -include("corfurl.hrl"). -ifdef(TEST). + -include_lib("eunit/include/eunit.hrl"). -compile(export_all). --endif. -define(M, corfurl). %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% --ifdef(TEST). + +setup_flu_basedir() -> + "/tmp/" ++ atom_to_list(?MODULE) ++ ".". + +setup_flu_dir(N) -> + setup_flu_basedir() ++ integer_to_list(N). + +setup_del_all(NumFLUs) -> + [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || + N <- lists:seq(1, NumFLUs)]. + +setup_basic_flus(NumFLUs, PageSize, NumPages) -> + setup_del_all(NumFLUs), + [begin + element(2, corfurl_flu:start_link(setup_flu_dir(X), + PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) + end || X <- lists:seq(1, NumFLUs)]. + +-ifndef(PULSE). save_read_test() -> Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", @@ -51,23 +69,6 @@ save_read_test() -> ok = corfurl_util:delete_dir(Dir) end. -setup_flu_basedir() -> - "/tmp/" ++ atom_to_list(?MODULE) ++ ".". - -setup_flu_dir(N) -> - setup_flu_basedir() ++ integer_to_list(N). - -setup_del_all(NumFLUs) -> - [ok = corfurl_util:delete_dir(setup_flu_dir(N)) || - N <- lists:seq(1, NumFLUs)]. - -setup_basic_flus(NumFLUs, PageSize, NumPages) -> - setup_del_all(NumFLUs), - [begin - element(2, corfurl_flu:start_link(setup_flu_dir(X), - PageSize, NumPages * (PageSize * ?PAGE_OVERHEAD))) - end || X <- lists:seq(1, NumFLUs)]. - smoke1_test() -> NumFLUs = 6, PageSize = 8, @@ -211,5 +212,5 @@ forfun(NumProcs) -> end. -endif. % TIMING_TEST - +-endif. % not PULSE -endif. % TEST diff --git a/prototype/corfurl/test/pulse_util/event_logger.erl b/prototype/corfurl/test/pulse_util/event_logger.erl new file mode 100644 index 0000000..54fa964 --- /dev/null +++ b/prototype/corfurl/test/pulse_util/event_logger.erl @@ -0,0 +1,131 @@ +%%% File : handle_errors.erl +%%% Author : Ulf Norell +%%% Description : +%%% Created : 26 Mar 2012 by Ulf Norell +-module(event_logger). + +-compile(export_all). + +-behaviour(gen_server). + +%% API +-export([start_link/0, event/1, get_events/0, start_logging/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, { start_time, events = [] }). + +-record(event, { timestamp, data }). + + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link() -> {ok,Pid} | ignore | {error,Error} +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +start_logging() -> + gen_server:call(?MODULE, {start, timestamp()}). + +event(EventData) -> + gen_server:call(?MODULE, + #event{ timestamp = timestamp(), data = EventData }). + +async_event(EventData) -> + gen_server:cast(?MODULE, + #event{ timestamp = timestamp(), data = EventData }). + +get_events() -> + gen_server:call(?MODULE, get_events). + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% Description: Initiates the server +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% Function: %% handle_call(Request, From, State) -> +%% {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% Description: Handling call messages +%%-------------------------------------------------------------------- +handle_call(Event = #event{}, _From, State) -> + {reply, ok, add_event(Event, State)}; +handle_call({start, Now}, _From, S) -> + {reply, ok, S#state{ events = [], start_time = Now }}; +handle_call(get_events, _From, S) -> + {reply, lists:reverse([ {E#event.timestamp, E#event.data} || E <- S#state.events]), + S#state{ events = [] }}; +handle_call(Request, _From, State) -> + {reply, {error, {bad_call, Request}}, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling cast messages +%%-------------------------------------------------------------------- +handle_cast(Event = #event{}, State) -> + {noreply, add_event(Event, State)}; +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling all non call/cast messages +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate(Reason, State) -> void() +%% Description: This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState} +%% Description: Convert process state when code is changed +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +add_event(#event{timestamp = Now, data = Data}, State) -> + Event = #event{ timestamp = Now - State#state.start_time, data = Data }, + State#state{ events = [Event|State#state.events] }. + +timestamp() -> + {A, B, C} = erlang:now(), + 1000000 * (1000000 * A + B) + C. + diff --git a/prototype/corfurl/test/pulse_util/handle_errors.erl b/prototype/corfurl/test/pulse_util/handle_errors.erl new file mode 100644 index 0000000..798f379 --- /dev/null +++ b/prototype/corfurl/test/pulse_util/handle_errors.erl @@ -0,0 +1,153 @@ +%%%------------------------------------------------------------------- +%%% @author Hans Svensson <> +%%% @copyright (C) 2012, Hans Svensson +%%% @doc +%%% +%%% @end +%%% Created : 19 Mar 2012 by Hans Svensson <> +%%%------------------------------------------------------------------- +-module(handle_errors). + +-behaviour(gen_event). + +%% API +-export([start_link/0, add_handler/0]). + +%% gen_event callbacks +-export([init/1, handle_event/2, handle_call/2, + handle_info/2, terminate/2, code_change/3]). + +-define(SERVER, ?MODULE). + +-record(state, { errors = [] }). + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @doc +%% Creates an event manager +%% +%% @spec start_link() -> {ok, Pid} | {error, Error} +%% @end +%%-------------------------------------------------------------------- +start_link() -> + gen_event:start_link({local, ?SERVER}). + +%%-------------------------------------------------------------------- +%% @doc +%% Adds an event handler +%% +%% @spec add_handler() -> ok | {'EXIT', Reason} | term() +%% @end +%%-------------------------------------------------------------------- +add_handler() -> + gen_event:add_handler(?SERVER, ?MODULE, []). + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever a new event handler is added to an event manager, +%% this function is called to initialize the event handler. +%% +%% @spec init(Args) -> {ok, State} +%% @end +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives an event sent using +%% gen_event:notify/2 or gen_event:sync_notify/2, this function is +%% called for each installed event handler to handle the event. +%% +%% @spec handle_event(Event, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_event({error, _, {_, "Hintfile '~s' has bad CRC" ++ _, _}}, State) -> + {ok, State}; +handle_event({error, _, {_, "** Generic server" ++ _, _}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, not_ready]}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to merge ~p: ~p\n", [_, {merge_locked, _, _}]}}, State) -> + {ok, State}; +handle_event({error, _, {_, "Failed to read lock data from ~s: ~p\n", [_, {invalid_data, <<>>}]}}, State) -> + {ok, State}; +handle_event({error, _, Event}, State) -> + {ok, State#state{ errors = [Event|State#state.errors] }}; +handle_event(_Event, State) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives a request sent using +%% gen_event:call/3,4, this function is called for the specified +%% event handler to handle the request. +%% +%% @spec handle_call(Request, State) -> +%% {ok, Reply, State} | +%% {swap_handler, Reply, Args1, State1, Mod2, Args2} | +%% {remove_handler, Reply} +%% @end +%%-------------------------------------------------------------------- +handle_call(get_errors, S) -> + {ok, S#state.errors, S#state{ errors = [] }}; +handle_call(_Request, State) -> + Reply = ok, + {ok, Reply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% This function is called for each installed event handler when +%% an event manager receives any other message than an event or a +%% synchronous request (or a system message). +%% +%% @spec handle_info(Info, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event handler is deleted from an event manager, this +%% function is called. It should be the opposite of Module:init/1 and +%% do any necessary cleaning up. +%% +%% @spec terminate(Reason, State) -> void() +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Convert process state when code is changed +%% +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== From bcc6cf1e6a70462ba1528988bda3f98455563604 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 00:52:15 +0900 Subject: [PATCH 12/70] PULSE bugfix: race with finish_init message --- prototype/corfurl/src/corfurl_flu.erl | 3 +++ prototype/corfurl/src/corfurl_sequencer.erl | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 4c02531..65c3936 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -135,6 +135,9 @@ init({Dir, ExpPageSize, ExpMaxMem}) -> self() ! finish_init, % TODO {ok, State}. +handle_call(Call, From, #state{max_logical_page=unknown} = State) -> + {noreply, NewState} = handle_info(finish_init, State), + handle_call(Call, From, NewState); handle_call({write, ClientEpoch, _LogicalPN, _PageBin}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index a4bee05..2dafadd 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -50,7 +50,6 @@ get(Pid, NumPages) -> init({FLUs}) -> MLP = get_max_logical_page(FLUs), - io:format(user, "~s:init: MLP = ~p\n", [?MODULE, MLP]), {ok, MLP + 1}. handle_call({get, NumPages}, _From, MLP) -> From b430fa479c26cceb1408d0bb92da76637c4a1e5a Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 01:27:02 +0900 Subject: [PATCH 13/70] PULSE condition checking is only 98% embarassing --- prototype/corfurl/rebar.config.script | 12 +++++------ prototype/corfurl/test/corfurl_pulse.erl | 27 ++++++++++++++++++------ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/prototype/corfurl/rebar.config.script b/prototype/corfurl/rebar.config.script index 2155bb5..ae29cb2 100644 --- a/prototype/corfurl/rebar.config.script +++ b/prototype/corfurl/rebar.config.script @@ -11,12 +11,12 @@ case PulseBuild of [{erlang,display,1} ]}, {pulse_side_effect, - [ {corfurl_sequencer, get, 0} - , {corfurl_flu, write, 4} - , {corfurl_flu, read, 3} - , {corfurl_flu, seal, 2} - , {corfurl_flu, trim, 3} - , {corfurl_flu, fill, 3} + [ {corfurl_sequencer, get, '_'} + , {corfurl_flu, write, '_'} + , {corfurl_flu, read, '_'} + , {corfurl_flu, seal, '_'} + , {corfurl_flu, trim, '_'} + , {corfurl_flu, fill, '_'} , {event_logger, event, '_'} diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 7e8ba55..57f5e6e 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -185,10 +185,23 @@ prop_pulse_test_() -> ?assert(eqc:quickcheck(eqc:testing_time(Timeout,?QC_OUT(prop_pulse())))) end}. -check_trace(Trace, _Cmds, _Seed) -> - %% TODO: yeah +check_trace(Trace, Cmds, _Seed) -> + %% TODO: yeah!!!!!!!!!! + Results = [X || {_TS, {result, _Pid, X}} <- Trace], - lists:sort(Results) == lists:usort(Results). + {CmdsSeq, CmdsPars} = Cmds, + NaiveCmds = CmdsSeq ++ lists:flatten(CmdsPars), + NaiveCommands = [{Sym, Args} || {set,_,{call,_,Sym,Args}} <- NaiveCmds], + NaiveAppends = [X || {append, _} = X <- NaiveCommands], + + %% If you want to see PULSE causing crazy scheduling, then + %% use this commented conjunction() instead of the real one: + %% conjunction( + %% [{bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). + + conjunction( + [{hackkkkk_NumResults_match_NumAppends, equals(length(NaiveAppends), length(Results))}, + {no_duplicate_results, equals(lists:sort(Results), lists:usort(Results))}]). %% Presenting command data statistics in a nicer way command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> @@ -260,10 +273,10 @@ setup(NumChains, ChainLen, PageSize) -> #run{seq=Seq, proj=Proj, flus=FLUs}. -define(LOG(Tag, MkCall), - event_logger:event({call, self(), Tag}), - __Result = MkCall, - event_logger:event({result, self(), __Result}), - __Result). + event_logger:event({call, self(), Tag}), + LOG__Result = MkCall, + event_logger:event({result, self(), LOG__Result}), + LOG__Result). append(#run{seq=Seq,proj=Proj}, Page) -> ?LOG({append, Page}, From e0ec95e8f7a81c18b2e80cab6f475d076d347c82 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 01:54:16 +0900 Subject: [PATCH 14/70] Added small PULSE usage sketch in docs/corfurl.md --- prototype/corfurl/.gitignore | 2 + prototype/corfurl/docs/corfurl.md | 106 +++++++++++++++++++++++ prototype/corfurl/test/corfurl_pulse.erl | 5 +- 3 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 prototype/corfurl/docs/corfurl.md diff --git a/prototype/corfurl/.gitignore b/prototype/corfurl/.gitignore index d712c69..661af0e 100644 --- a/prototype/corfurl/.gitignore +++ b/prototype/corfurl/.gitignore @@ -1,4 +1,6 @@ .eunit +.eqc-info +current_counterexample.eqc deps ebin/*.beam ebin/*.app diff --git a/prototype/corfurl/docs/corfurl.md b/prototype/corfurl/docs/corfurl.md new file mode 100644 index 0000000..fd02134 --- /dev/null +++ b/prototype/corfurl/docs/corfurl.md @@ -0,0 +1,106 @@ + +## Fiddling with PULSE + +Do the following: + + make clean + make + make pulse + +... then watch the dots go across the screen for 60 seconds. If you +wish, you can press `Control-c` to interrupt the test. We're really +interested in the build artifacts. + + erl -pz .eunit deps/*/ebin + eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + +This will run the PULSE test for 5 seconds. Feel free to adjust for +as many seconds as you wish. + + Erlang R16B02-basho4 (erts-5.10.3) [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace] + + Eshell V5.10.3 (abort with ^G) + 1> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + Starting Quviq QuickCheck version 1.30.4 + (compiled at {{2014,2,7},{9,19,50}}) + Licence for Basho reserved until {{2014,2,17},{1,41,39}} + ...................................................................................... + OK, passed 86 tests + schedule: Count: 86 Min: 2 Max: 1974 Avg: 3.2e+2 Total: 27260 + true + 2> + +REPL interactive work can be done via: + +1. Edit code, e.g. `corfurl_pulse.erl`. +2. Run `env BITCASK_PULSE=1 ./rebar skip_deps=true -D PULSE eunit suites=SKIP` +to compile. +3. Reload any recompiled modules, e.g. `l(corfurl_pulse).` +4. Resume QuickCheck activities. + +## Seeing an PULSE scheduler interleaving failure in action + +1. Edit `corfurl_pulse:check_trace()` to uncomment the + use of `conjunction()` that mentions `bogus_order_check_do_not_use_me` + and comment out the real `conjunction()` call below it. +2. Recompile & reload. +3. Check. + +For example: + + 9> eqc:quickcheck(eqc:testing_time(5, corfurl_pulse:prop_pulse())). + .........Failed! After 9 tests. + +Sweet! The first tuple below are the first `?FORALL()` values, +and the 2nd is the list of commands, +`{SequentialCommands, ListofParallelCommandLists}`. The 3rd is the +seed used to perturb the PULSE scheduler. + +In this case, `SequentialCommands` has two calls (to `setup()` then +`append()`) and there are two parallel procs: one makes 1 call +call to `append()` and the other makes 2 calls to `append()`. + + {2,2,9} + {{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,9]}}], + [[{set,{var,3}, + {call,corfurl_pulse,append, + [{var,1},<<231,149,226,203,10,105,54,223,147>>]}}], + [{set,{var,2}, + {call,corfurl_pulse,append, + [{var,1},<<7,206,146,75,249,13,154,238,110>>]}}, + {set,{var,4}, + {call,corfurl_pulse,append, + [{var,1},<<224,121,129,78,207,23,79,216,36>>]}}]]}, + {27492,46961,4884}} + +Here are our results: + + simple_result: passed + errors: passed + events: failed + identity: passed + bogus_order_check_do_not_use_me: failed + [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}] + +Our (bogus!) order expectation was violated. Shrinking! + + simple_result: passed + errors: passed + events: failed + identity: passed + bogus_order_check_do_not_use_me: failed + [{ok,1},{ok,3},{ok,2}] /= [{ok,1},{ok,2},{ok,3}] + +Shrinking was able to remove two `append()` calls and to shrink the +size of the pages down from 9 bytes down to 1 byte. + + Shrinking........(8 times) + {1,1,1} + {{[{set,{var,1},{call,corfurl_pulse,setup,[1,1,1]}}], + [[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}], + [{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}}]]}, + {27492,46961,4884}} + events: failed + bogus_order_check_do_not_use_me: failed + [{ok,2},{ok,1}] /= [{ok,1},{ok,2}] + false diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 57f5e6e..97526b2 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -158,7 +158,7 @@ prop_pulse(LocalOrSlave) -> {_H, S, Res, Trace, Schedule, Errors} -> CheckTrace = check_trace(Trace, Cmds, Seed), ?WHENFAIL( - ?QC_FMT("\nState: ~p\n", [S]), + S = S, % ?QC_FMT("\nState: ~p\n", [S]), measure(schedule, length(Schedule), conjunction( [{simple_result, equals(Res, ok)}, @@ -197,7 +197,8 @@ check_trace(Trace, Cmds, _Seed) -> %% If you want to see PULSE causing crazy scheduling, then %% use this commented conjunction() instead of the real one: %% conjunction( - %% [{bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). + %% [{identity, equals(NaiveAppends, NaiveAppends)}, + %% {bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). conjunction( [{hackkkkk_NumResults_match_NumAppends, equals(length(NaiveAppends), length(Results))}, From 21a3fd6d07e58102707dfbf2c0291dc7abdf8d41 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 18:59:42 +0900 Subject: [PATCH 15/70] Use temporal logic for check_trace() --- prototype/corfurl/test/corfurl_pulse.erl | 80 ++++++++++++++++++------ 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 97526b2..91f323c 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -33,11 +33,9 @@ -include_lib("eunit/include/eunit.hrl"). -compile({parse_transform, pulse_instrument}). -%% -compile({pulse_replace_module, -%% [{application, pulse_application}]}). --compile({pulse_skip,[{prop_pulse_test_,0},{really_delete_bitcask,0},{copy_bitcask_app,0}]}). --compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). +-compile({pulse_skip,[{prop_pulse_test_,0},{clean_up_runtime,1}]}). +%% -compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). %% Used for output within EUnit... -define(QC_FMT(Fmt, Args), @@ -185,24 +183,70 @@ prop_pulse_test_() -> ?assert(eqc:quickcheck(eqc:testing_time(Timeout,?QC_OUT(prop_pulse())))) end}. -check_trace(Trace, Cmds, _Seed) -> - %% TODO: yeah!!!!!!!!!! - Results = [X || {_TS, {result, _Pid, X}} <- Trace], - {CmdsSeq, CmdsPars} = Cmds, - NaiveCmds = CmdsSeq ++ lists:flatten(CmdsPars), - NaiveCommands = [{Sym, Args} || {set,_,{call,_,Sym,Args}} <- NaiveCmds], - NaiveAppends = [X || {append, _} = X <- NaiveCommands], +%% If you want to see PULSE causing crazy scheduling, then +%% use this code instead of the usual stuff. +%% check_trace(Trace, Cmds, _Seed) -> +%% Results = [X || {_TS, {result, _Pid, X}} <- Trace], +%% {CmdsSeq, CmdsPars} = Cmds, +%% NaiveCmds = CmdsSeq ++ lists:flatten(CmdsPars), +%% NaiveCommands = [{Sym, Args} || {set,_,{call,_,Sym,Args}} <- NaiveCmds], +%% NaiveAppends = [X || {append, _} = X <- NaiveCommands], +%% conjunction( +%% [{identity, equals(NaiveAppends, NaiveAppends)}, +%% {bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). - %% If you want to see PULSE causing crazy scheduling, then - %% use this commented conjunction() instead of the real one: - %% conjunction( - %% [{identity, equals(NaiveAppends, NaiveAppends)}, - %% {bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). +%% Example Trace (raw event info, from the ?LOG macro) +%% +%% [{32014,{call,<0.467.0>,{append,<<"O">>}}}, +%% {32421,{call,<0.466.0>,{append,<<134>>}}}, +%% {44522,{result,<0.467.0>,{ok,1}}}, +%% {47651,{result,<0.466.0>,{ok,2}}}] + +check_trace(Trace, _Cmds, _Seed) -> + Events = eqc_temporal:from_timed_list(Trace), + %% Example Events, temporal style, 1 usec resolution, same as original trace + %% + %% [{0,32014,[]}, + %% {32014,32015,[{call,<0.467.0>,{append,<<"O">>}}]}, + %% {32015,32421,[]}, + %% {32421,32422,[{call,<0.466.0>,{append,<<134>>}}]}, + %% {32422,44522,[]}, + %% {44522,44523,[{result,<0.467.0>,{ok,...}}]}, + %% {44523,47651,[]}, + %% {47651,47652,[{result,<0.466.0>,{ok,...}}]}, + %% {47652,infinity,[]}] + + Calls = eqc_temporal:stateful( + fun({call, Pid, Call}) -> [{call, Pid, Call}] end, + fun({call, Pid, _Call}, {result, Pid, _}) -> [] end, + Events), + %% Example Calls (temporal map of when a call is in progress) + %% + %% [{0,32014,[]}, + %% {32014,32421,[{call,<0.467.0>,{append,<<"O">>}}]}, + %% {32421,44522, + %% [{call,<0.466.0>,{append,<<134>>}},{call,<0.467.0>,{append,<<"O">>}}]}, + %% {44522,47651,[{call,<0.466.0>,{append,<<134>>}}]}, + %% {47651,infinity,[]}] + + AppendResultFilter = fun({ok, LPN}) -> LPN; + (Else) -> Else end, + AppendResults = eqc_temporal:stateful( + fun({call, Pid, Call}) -> [{call, Pid, Call}] end, + fun({call, Pid, {append, _Pg}}, {result, Pid, Res}) -> + [AppendResultFilter(Res)] end, + Events), + + %% Desired properties + AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), + NoAppendLPNDups = lists:sort(AppendResults) == lists:usort(AppendResults), conjunction( - [{hackkkkk_NumResults_match_NumAppends, equals(length(NaiveAppends), length(Results))}, - {no_duplicate_results, equals(lists:sort(Results), lists:usort(Results))}]). + [ + {all_calls_finish, AllCallsFinish}, + {no_append_duplicates, NoAppendLPNDups} + ]). %% Presenting command data statistics in a nicer way command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> From 58ced8d14c0bf700ca392d1f418db8198fbd0be2 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 17 Feb 2014 22:04:51 +0900 Subject: [PATCH 16/70] Add PULSE control over sequencer handing out duplicate page numbers --- prototype/corfurl/src/corfurl_sequencer.erl | 30 ++++++++-- prototype/corfurl/test/corfurl_pulse.erl | 61 ++++++++++++++------- 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 2dafadd..8b41040 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -23,6 +23,10 @@ -behaviour(gen_server). -export([start_link/1, stop/1, get/2]). +-ifdef(TEST). +-export([start_link/2]). +-compile(export_all). +-endif. -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -37,8 +41,10 @@ -define(SERVER, ?MODULE). start_link(FLUs) -> - %% gen_server:start_link({local, ?SERVER}, ?MODULE, {FLUs}, []). - gen_server:start_link(?MODULE, {FLUs}, []). + start_link(FLUs, standard). + +start_link(FLUs, SeqType) -> + gen_server:start_link(?MODULE, {FLUs, SeqType}, []). stop(Pid) -> gen_server:call(Pid, stop, infinity). @@ -48,12 +54,26 @@ get(Pid, NumPages) -> %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% -init({FLUs}) -> +init({FLUs, TypeOrSeed}) -> MLP = get_max_logical_page(FLUs), - {ok, MLP + 1}. + if TypeOrSeed == standard -> + {ok, MLP + 1}; + true -> + {Seed, BadPercent, MaxDifference} = TypeOrSeed, + random:seed(Seed), + {ok, {MLP+1, BadPercent, MaxDifference}} + end. -handle_call({get, NumPages}, _From, MLP) -> +handle_call({get, NumPages}, _From, MLP) when is_integer(MLP) -> {reply, MLP, MLP + NumPages}; +handle_call({get, NumPages}, _From, {MLP, BadPercent, MaxDifference}) -> + Fudge = case random:uniform(100) of + N when N < BadPercent -> + random:uniform(MaxDifference * 2) - MaxDifference; + _ -> + 0 + end, + {reply, erlang:max(1, MLP + Fudge), {MLP + NumPages, BadPercent, MaxDifference}}; handle_call(stop, _From, MLP) -> {stop, normal, ok, MLP}; handle_call(_Request, _From, MLP) -> diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 91f323c..830186c 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -65,11 +65,22 @@ initial_state() -> gen_page(PageSize) -> binary(PageSize). +gen_seed() -> + noshrink({choose(1, 20000), choose(1, 20000), choose(1, 20000)}). + +gen_sequencer_percent() -> + frequency([{10, choose(1,100)}, + {5, choose(90,100)}]). + +gen_sequencer() -> + frequency([{100, standard}, + {50, {gen_seed(), gen_sequencer_percent(), choose(1, 2)}}]). + command(#state{run=Run} = S) -> ?LET({NumChains, ChainLen, PageSize}, {parameter(num_chains), parameter(chain_len), parameter(page_size)}, frequency( - [{10, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize]}} + [{10, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize, gen_sequencer()]}} || not S#state.is_setup] ++ [{10, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ @@ -83,7 +94,7 @@ precondition(S, {call, _, _, _}) -> %% Next state transformation, S is the current state and V is the result of the %% command. -next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize]}) -> +next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize, _SeqType]}) -> S#state{is_setup=true, num_chains=NumChains, chain_len=ChainLen, @@ -184,18 +195,6 @@ prop_pulse_test_() -> end}. -%% If you want to see PULSE causing crazy scheduling, then -%% use this code instead of the usual stuff. -%% check_trace(Trace, Cmds, _Seed) -> -%% Results = [X || {_TS, {result, _Pid, X}} <- Trace], -%% {CmdsSeq, CmdsPars} = Cmds, -%% NaiveCmds = CmdsSeq ++ lists:flatten(CmdsPars), -%% NaiveCommands = [{Sym, Args} || {set,_,{call,_,Sym,Args}} <- NaiveCmds], -%% NaiveAppends = [X || {append, _} = X <- NaiveCommands], -%% conjunction( -%% [{identity, equals(NaiveAppends, NaiveAppends)}, -%% {bogus_order_check_do_not_use_me, equals(Results, lists:usort(Results))}]). - %% Example Trace (raw event info, from the ?LOG macro) %% %% [{32014,{call,<0.467.0>,{append,<<"O">>}}}, @@ -237,16 +236,27 @@ check_trace(Trace, _Cmds, _Seed) -> fun({call, Pid, {append, _Pg}}, {result, Pid, Res}) -> [AppendResultFilter(Res)] end, Events), + {_, infinity, AppendLPNs} = lists:last(eqc_temporal:all_future(AppendResults)), %% Desired properties AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), - NoAppendLPNDups = lists:sort(AppendResults) == lists:usort(AppendResults), + NoAppendLPNDups = lists:sort(AppendLPNs) == lists:usort(AppendLPNs), + ?WHENFAIL(begin + ?QC_FMT("*AppendLPNs: ~p\n", [range_ify(AppendLPNs)]) + end, conjunction( [ {all_calls_finish, AllCallsFinish}, - {no_append_duplicates, NoAppendLPNDups} - ]). + {no_append_duplicates, NoAppendLPNDups}, + %% If you want to see PULSE causing crazy scheduling, then + %% change one of the "true orelse" -> "false orelse" below. + {bogus_no_gaps, + true orelse + (AppendLPNs == [] orelse length(range_ify(AppendLPNs)) == 1)}, + {bogus_exactly_1_to_N, + true orelse (AppendLPNs == lists:seq(1, length(AppendLPNs)))} + ])). %% Presenting command data statistics in a nicer way command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> @@ -308,15 +318,28 @@ make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> make_chains(ChainLen, T, [H|SmallAcc], BigAcc) end. -setup(NumChains, ChainLen, PageSize) -> +setup(NumChains, ChainLen, PageSize, SeqType) -> N = NumChains * ChainLen, FLUs = corfurl_test:setup_basic_flus(N, PageSize, 50000), - {ok, Seq} = corfurl_sequencer:start_link(FLUs), + {ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType), Chains = make_chains(ChainLen, FLUs), %% io:format(user, "Cs = ~p\n", [Chains]), Proj = corfurl:new_simple_projection(1, 1, 50000, Chains), #run{seq=Seq, proj=Proj, flus=FLUs}. +range_ify([]) -> + []; +range_ify(L) -> + [H|T] = lists:sort(L), + range_ify(H, H+1, T). + +range_ify(Beginning, Next, [Next|T]) -> + range_ify(Beginning, Next+1, T); +range_ify(Beginning, Next, [Else|T]) -> + [{Beginning, to, Next-1}|range_ify(Else, Else+1, T)]; +range_ify(Beginning, Next, []) -> + [{Beginning, to, Next-1}]. + -define(LOG(Tag, MkCall), event_logger:event({call, self(), Tag}), LOG__Result = MkCall, From 25bf64a03c13cf335e6d22ebd5aa12ca5e16b7f8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 18 Feb 2014 00:15:14 +0900 Subject: [PATCH 17/70] Just in case commit: WIP --- prototype/corfurl/test/corfurl_pulse.erl | 87 ++++++++++++++++++------ 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 830186c..7b27563 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -202,14 +202,24 @@ prop_pulse_test_() -> %% {44522,{result,<0.467.0>,{ok,1}}}, %% {47651,{result,<0.466.0>,{ok,2}}}] -check_trace(Trace, _Cmds, _Seed) -> +check_trace(Trace0, _Cmds, _Seed) -> + %% Let's treat this thing like a KV store. It is, mostly. + %% Key = LPN, Value = error_unwritten | {ok, Blob} | error_trimmed + %% + %% Problem: At {call, Pid, ...} time, we don't know what Key is! + %% We find out at {return, Pid, {ok, LSN}} time. + %% Also, the append might fail, so the model can ignore those + %% failures because they're not mutating any state that and + %% external viewer can see. + Trace = add_LPN_to_append_calls(Trace0), + Events = eqc_temporal:from_timed_list(Trace), %% Example Events, temporal style, 1 usec resolution, same as original trace %% %% [{0,32014,[]}, - %% {32014,32015,[{call,<0.467.0>,{append,<<"O">>}}]}, + %% {32014,32015,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, %% {32015,32421,[]}, - %% {32421,32422,[{call,<0.466.0>,{append,<<134>>}}]}, + %% {32421,32422,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, %% {32422,44522,[]}, %% {44522,44523,[{result,<0.467.0>,{ok,...}}]}, %% {44523,47651,[]}, @@ -223,27 +233,44 @@ check_trace(Trace, _Cmds, _Seed) -> %% Example Calls (temporal map of when a call is in progress) %% %% [{0,32014,[]}, - %% {32014,32421,[{call,<0.467.0>,{append,<<"O">>}}]}, + %% {32014,32421,[{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, %% {32421,44522, - %% [{call,<0.466.0>,{append,<<134>>}},{call,<0.467.0>,{append,<<"O">>}}]}, - %% {44522,47651,[{call,<0.466.0>,{append,<<134>>}}]}, + %% [{call,<0.466.0>,{append,<<134>>,will_be,2}},{call,<0.467.0>,{append,<<"O">>,will_be,1}}]}, + %% {44522,47651,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, %% {47651,infinity,[]}] - AppendResultFilter = fun({ok, LPN}) -> LPN; - (Else) -> Else end, - AppendResults = eqc_temporal:stateful( - fun({call, Pid, Call}) -> [{call, Pid, Call}] end, - fun({call, Pid, {append, _Pg}}, {result, Pid, Res}) -> - [AppendResultFilter(Res)] end, + %% Remember: Appends contains only successful append ops! + Appends = eqc_temporal:stateful( + fun({call, Pid, {append, Pg, will_be, LPN}}) -> + {status, LPN, Pid, Pg} + end, + fun({status, LPN, Pid, Pg}, {result, Pid, {ok, LPN}})-> + [{status, LPN, x, Pg}] + end, Events), - {_, infinity, AppendLPNs} = lists:last(eqc_temporal:all_future(AppendResults)), + if length(Appends) < 10 -> io:format("Trace ~p\n", [Trace]), io:format("Events ~p\n", [Events]), io:format("Appends ~p\n", [Appends]); true -> ok end, + %% The last item in the relation tells us what the last/infinite future + %% state of each LPN is. We'll use it to identify all successfully + %% written LPNs and other stuff. + {_, infinity, FinalStatus} = lists:last(eqc_temporal:all_future(Appends)), + + + InitialVals = eqc_temporal:elems(eqc_temporal:ret([{status, LPN, x, error_unwritten} || {status, LPN, _, _} <- FinalStatus])), + Vals = eqc_temporal:union(InitialVals, Appends), + + Values = eqc_temporal:stateful( + fun({status, _LPN, Pid, _Pg} = I) when is_pid(Pid) -> I end, + fun({status, LPN, Pid, Pg}, {status, LPN, x, Pg}) + when is_pid(Pid) -> [] end, + Vals), + if length(Appends) < 10 -> io:format(user, "Values ~P\n", [Values, 100]); true -> ok end, %% Desired properties AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), - NoAppendLPNDups = lists:sort(AppendLPNs) == lists:usort(AppendLPNs), + NoAppendLPNDups = true, %%% QQQ TODO!!!!!!!! lists:sort(AppendLPNs) == lists:usort(AppendLPNs), ?WHENFAIL(begin - ?QC_FMT("*AppendLPNs: ~p\n", [range_ify(AppendLPNs)]) + ?QC_FMT("*AppendLPNs: ~p\n", [todoTODO]) %%%%% [range_ify(AppendLPNs)]) end, conjunction( [ @@ -251,13 +278,33 @@ check_trace(Trace, _Cmds, _Seed) -> {no_append_duplicates, NoAppendLPNDups}, %% If you want to see PULSE causing crazy scheduling, then %% change one of the "true orelse" -> "false orelse" below. - {bogus_no_gaps, - true orelse - (AppendLPNs == [] orelse length(range_ify(AppendLPNs)) == 1)}, - {bogus_exactly_1_to_N, - true orelse (AppendLPNs == lists:seq(1, length(AppendLPNs)))} + %% {bogus_no_gaps, + %% true orelse + %% (AppendLPNs == [] orelse length(range_ify(AppendLPNs)) == 1)}, + %% {bogus_exactly_1_to_N, + %% true orelse (AppendLPNs == lists:seq(1, length(AppendLPNs)))}, + {true, true} ])). +add_LPN_to_append_calls([{TS, {call, Pid, {append, Page}}}|Rest]) -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + {ok, LPN} -> + {TS, {call, Pid, {append, Page, will_be, LPN}}}; + Else -> + {TS, {call, Pid, {append, Page, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([X|Rest]) -> + [X|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([]) -> + []. + +trace_lookahead_pid(Pid, [{_TS, {result, Pid, Res}}|_]) -> + Res; +trace_lookahead_pid(Pid, [_H|T]) -> + trace_lookahead_pid(Pid, T). + %% Presenting command data statistics in a nicer way command_data({set, _, {call, _, Fun, _}}, {_S, _V}) -> Fun. From 572d1803d0e8a0ad6345919e338f56eedec6a92d Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 18 Feb 2014 17:51:14 +0900 Subject: [PATCH 18/70] Add (mostly) temporal logic checking for exactly-once append_page(). Also, for peace of mind (I hope), I've added this -ifndef to introduce a bug that should cause the new exactly-once append_page() check to fail. This should make it easier to change the model and *TEST* the changes, to avoid breaking the model without ever knowing it. --- prototype/corfurl/test/corfurl_pulse.erl | 115 +++++++++++++++++++---- 1 file changed, 96 insertions(+), 19 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 7b27563..ba6b0e8 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -195,7 +195,7 @@ prop_pulse_test_() -> end}. -%% Example Trace (raw event info, from the ?LOG macro) +%% Example Trace0 (raw event info, from the ?LOG macro) %% %% [{32014,{call,<0.467.0>,{append,<<"O">>}}}, %% {32421,{call,<0.466.0>,{append,<<134>>}}}, @@ -239,43 +239,103 @@ check_trace(Trace0, _Cmds, _Seed) -> %% {44522,47651,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, %% {47651,infinity,[]}] - %% Remember: Appends contains only successful append ops! - Appends = eqc_temporal:stateful( + %% Remember: Mods contains only successful append ops! + %% ModsAllFuture is used for calculating which LPNs were written, + %% but Mods is used for everything else. The two stateful() calls + %% at identical except for the "Compare here" difference. + Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> - {status, LPN, Pid, Pg} + {lpn, LPN, Pg, Pid} end, - fun({status, LPN, Pid, Pg}, {result, Pid, {ok, LPN}})-> - [{status, LPN, x, Pg}] + fun({lpn, LPN, _Pg, Pid}, {result, Pid, {ok, LPN}})-> + [] % Compare here end, Events), - if length(Appends) < 10 -> io:format("Trace ~p\n", [Trace]), io:format("Events ~p\n", [Events]), io:format("Appends ~p\n", [Appends]); true -> ok end, + ModsAllFuture = eqc_temporal:stateful( + fun({call, Pid, {append, Pg, will_be, LPN}}) -> + {lpn, LPN, Pg, Pid} + end, + fun({lpn, LPN, Pg, Pid}, {result, Pid, {ok, LPN}})-> + %% Keep this into the infinite future + [{lpn, LPN, Pg}] % Compare here + end, + Events), + %%QQQ = -5, + %%if length(Trace) < QQQ -> io:format("Trace ~p\n", [Trace]), io:format("Events ~p\n", [Events]), io:format("Mods ~p\n", [Mods]); true -> ok end, + %% The last item in the relation tells us what the last/infinite future %% state of each LPN is. We'll use it to identify all successfully %% written LPNs and other stuff. - {_, infinity, FinalStatus} = lists:last(eqc_temporal:all_future(Appends)), + {_, infinity, FinalStatus} = lists:last(eqc_temporal:all_future(ModsAllFuture)), + %% StartMod contains {m_start, LPN, V} when a modification finished. + %% DoneMod contains {m_end, LPN, V} when a modification finished. - InitialVals = eqc_temporal:elems(eqc_temporal:ret([{status, LPN, x, error_unwritten} || {status, LPN, _, _} <- FinalStatus])), - Vals = eqc_temporal:union(InitialVals, Appends), + %% This is a clever trick: Mods contains the start & end timestamp + %% for each modification. Use shift() by 1 usec to move all timestamps + %% backward 1 usec, then subtract away the original time range to leave + %% a 1 usec relation in time, then map() to convert it to a {m_end,...}. + DoneMod = eqc_temporal:map( + fun({lpn, LPN, Pg, _Pid}) -> {m_end, LPN, Pg} end, + eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), + StartMod = eqc_temporal:map( + fun({lpn, LPN, Pg, _Pid}) -> {m_start, LPN, Pg} end, + eqc_temporal:subtract(Mods, eqc_temporal:shift(1, Mods))), + %% if length(Trace) < QQQ -> io:format(user, "StartMod ~P\n", [StartMod, 100]), io:format(user, "DoneMod ~P\n", [DoneMod, 100]); true -> ok end, + StartsDones = eqc_temporal:union(StartMod, DoneMod), + %%if length(Trace) < QQQ -> io:format(user, "StartsDones ~P\n", [StartsDones, 100]); true -> ok end, - Values = eqc_temporal:stateful( - fun({status, _LPN, Pid, _Pg} = I) when is_pid(Pid) -> I end, - fun({status, LPN, Pid, Pg}, {status, LPN, x, Pg}) - when is_pid(Pid) -> [] end, - Vals), - if length(Appends) < 10 -> io:format(user, "Values ~P\n", [Values, 100]); true -> ok end, + %% TODO: A brighter mind than mine might figure out how to do this + %% next step using only eqc_temporal. + %% + %% We create a new relation, ValuesR. This relation contains + %% {values, OD::orddict()} for each time interval in the relation. + %% The OD contains all possible values for a particular LPN at + %% that time in the relation. + %% The key for OD is LPN, the value is an unordered list of possible values. + + InitialDict = orddict:from_list([{LPN, [error_unwritten]} || + {lpn, LPN, _} <- FinalStatus]), + {_ValuesR, _} = + lists:mapfoldl( + fun({TS1, TS2, StEnds}, Dict1) -> + Dict2 = lists:foldl( + fun({m_start, LPN, Pg}, D) -> + orddict:append(LPN, Pg, D) + end, Dict1, [X || X={m_start,_,_} <- StEnds]), + Dict3 = lists:foldl( + fun({m_end, LPN, Pg}, D) -> + orddict:store(LPN, [Pg], D) + end, Dict2, [X || X={m_end,_,_} <- StEnds]), + {{TS1, TS2, [{values, Dict3}]}, Dict3} + end, InitialDict, StartsDones), + %%if length(Trace) < QQQ -> io:format(user, "ValuesR ~P\n", [ValuesR, 100]); true -> ok end, + + %% We want to find & fail any two clients that append the exact same page + %% data to the same LPN. Unfortunately, the eqc_temporal library will + %% merge two such facts together into a single fact. So this method + %% commented below isn't good enough. + %% M_Ends = eqc_temporal:at(infinity, eqc_temporal:any_past(DoneMod)), + %% AppendedLPNs = lists:sort([LPN || {m_end, LPN, _} <- M_Ends]), + %% {_Last, DuplicateLPNs} = lists:foldl(fun(X, {X, Dups}) -> {X, [X|Dups]}; + %% (X, {_, Dups}) -> {X, Dups} + %% end, {undefined, []}, AppendedLPNs), + AppendWillBes = [LPN || {_TS, {call, _, {append, _, will_be, LPN}}} <- Trace], + DuplicateLPNs = AppendWillBes -- lists:usort(AppendWillBes), %% Desired properties AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), - NoAppendLPNDups = true, %%% QQQ TODO!!!!!!!! lists:sort(AppendLPNs) == lists:usort(AppendLPNs), + NoAppendDuplicates = (DuplicateLPNs == []), ?WHENFAIL(begin - ?QC_FMT("*AppendLPNs: ~p\n", [todoTODO]) %%%%% [range_ify(AppendLPNs)]) + %% ?QC_FMT("*Events: ~p\n", [Events]), + ?QC_FMT("*Mods: ~p\n", [Mods]), + ?QC_FMT("*DuplicateLPNs: ~p\n", [DuplicateLPNs]) end, conjunction( [ {all_calls_finish, AllCallsFinish}, - {no_append_duplicates, NoAppendLPNDups}, + {no_append_duplicates, NoAppendDuplicates}, %% If you want to see PULSE causing crazy scheduling, then %% change one of the "true orelse" -> "false orelse" below. %% {bogus_no_gaps, @@ -393,9 +453,26 @@ range_ify(Beginning, Next, []) -> event_logger:event({result, self(), LOG__Result}), LOG__Result). +-ifndef(TEST_TRIP_no_append_duplicates). + append(#run{seq=Seq,proj=Proj}, Page) -> ?LOG({append, Page}, corfurl:append_page(Seq, Proj, Page)). +-else. % TEST_TRIP_no_append_duplicates + +%% If the appended LPN > 3, just lie and say that it was 3. + +append(#run{seq=Seq,proj=Proj}, Page) -> + ?LOG({append, Page}, + begin + case corfurl:append_page(Seq, Proj, Page) of + {ok, LPN} when LPN > 3 -> + {ok, 3}; + Else -> + Else + end + end). +-endif. % TEST_TRIP_no_append_duplicates -endif. % PULSE -endif. % TEST From c14e1facf487ff29d1b30f581604a8ac89f53fba Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 18 Feb 2014 18:15:49 +0900 Subject: [PATCH 19/70] Add read_approx() to the PULSE model, only 5% correctness checks done --- prototype/corfurl/test/corfurl_pulse.erl | 45 ++++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index ba6b0e8..cf6eecf 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -45,6 +45,8 @@ -define(QC_OUT(P), eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)). +-define(MAX_PAGES, 50000). + -record(run, { seq, % Sequencer proj, % Projection @@ -76,6 +78,10 @@ gen_sequencer() -> frequency([{100, standard}, {50, {gen_seed(), gen_sequencer_percent(), choose(1, 2)}}]). +gen_approx_page() -> + %% EQC can't know what pages are perhaps-written, so pick something big. + ?LET(I, largeint(), abs(I)). + command(#state{run=Run} = S) -> ?LET({NumChains, ChainLen, PageSize}, {parameter(num_chains), parameter(chain_len), parameter(page_size)}, @@ -84,6 +90,8 @@ command(#state{run=Run} = S) -> || not S#state.is_setup] ++ [{10, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ + [{10, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ [])). %% Precondition, checked before a command is added to the command sequence. @@ -101,6 +109,8 @@ next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize, _SeqType]}) page_size=PageSize, run=Res}; next_state(S, _, {call, _, append, _}) -> + S; +next_state(S, _, {call, _, read_approx, _}) -> S. eqeq(X, X) -> true; @@ -111,7 +121,14 @@ postcondition(_S, {call, _, setup, _}, #run{} = _V) -> postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) -> true; postcondition(_S, {call, _, append, _}, V) -> - eqeq(V, todoTODO_fixit). + eqeq(V, todoTODO_fixit); +postcondition(_S, {call, _, read_approx, _}, V) -> + case V of + {ok, Pg} when is_binary(Pg) -> true; + error_unwritten -> true; + error_trimmed -> true; + _ -> eqeq(V, todoTODO_fixit) + end. run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> %% AfterTime = if LocalOrSlave == local -> 50000; @@ -128,9 +145,9 @@ run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> %% application:start(my_test_app), %% receive after AfterTime -> ok end, {H, S, R} = run_parallel_commands(?MODULE, Cmds), - %% io:format(user, "Yooo: H = ~p\n", [H]), - %% io:format(user, "Yooo: S = ~p\n", [S]), - %% io:format(user, "Yooo: R = ~p\n", [R]), + %% io:format(user, "Yooo: H = ~p\n", [H]), + %% io:format(user, "Yooo: S = ~p\n", [S]), + %% io:format(user, "Yooo: R = ~p\n", [R]), %% receive after AfterTime -> ok end, Trace = event_logger:get_events(), %% receive after AfterTime -> ok end, @@ -270,11 +287,10 @@ check_trace(Trace0, _Cmds, _Seed) -> %% StartMod contains {m_start, LPN, V} when a modification finished. %% DoneMod contains {m_end, LPN, V} when a modification finished. - %% This is a clever trick: Mods contains the start & end timestamp %% for each modification. Use shift() by 1 usec to move all timestamps - %% backward 1 usec, then subtract away the original time range to leave - %% a 1 usec relation in time, then map() to convert it to a {m_end,...}. + %% forward/backward 1 usec, then subtract away the original time range to + %% leave a 1 usec relation in time. DoneMod = eqc_temporal:map( fun({lpn, LPN, Pg, _Pid}) -> {m_end, LPN, Pg} end, eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), @@ -427,11 +443,11 @@ make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> setup(NumChains, ChainLen, PageSize, SeqType) -> N = NumChains * ChainLen, - FLUs = corfurl_test:setup_basic_flus(N, PageSize, 50000), + FLUs = corfurl_test:setup_basic_flus(N, PageSize, ?MAX_PAGES), {ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType), Chains = make_chains(ChainLen, FLUs), %% io:format(user, "Cs = ~p\n", [Chains]), - Proj = corfurl:new_simple_projection(1, 1, 50000, Chains), + Proj = corfurl:new_simple_projection(1, 1, ?MAX_PAGES, Chains), #run{seq=Seq, proj=Proj, flus=FLUs}. range_ify([]) -> @@ -455,14 +471,14 @@ range_ify(Beginning, Next, []) -> -ifndef(TEST_TRIP_no_append_duplicates). -append(#run{seq=Seq,proj=Proj}, Page) -> +append(#run{seq=Seq, proj=Proj}, Page) -> ?LOG({append, Page}, corfurl:append_page(Seq, Proj, Page)). -else. % TEST_TRIP_no_append_duplicates %% If the appended LPN > 3, just lie and say that it was 3. -append(#run{seq=Seq,proj=Proj}, Page) -> +append(#run{seq=Seq, proj=Proj}, Page) -> ?LOG({append, Page}, begin case corfurl:append_page(Seq, Proj, Page) of @@ -474,6 +490,13 @@ append(#run{seq=Seq,proj=Proj}, Page) -> end). -endif. % TEST_TRIP_no_append_duplicates +read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> + Max = corfurl_sequencer:get(Seq, 0), + %% The sequencer may be lying to us, shouganai. + LPN = (SeedInt rem Max) + 1, + ?LOG({read, LPN}, + corfurl:read_page(Proj, LPN)). + -endif. % PULSE -endif. % TEST From a7aff2f14160307c6dbbe5a0fdcf1b1346097726 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 18 Feb 2014 20:08:20 +0900 Subject: [PATCH 20/70] Dumbdumbdumb don't interfere with event_logger:event() duh! --- prototype/corfurl/rebar.config.script | 2 -- 1 file changed, 2 deletions(-) diff --git a/prototype/corfurl/rebar.config.script b/prototype/corfurl/rebar.config.script index ae29cb2..79df2a3 100644 --- a/prototype/corfurl/rebar.config.script +++ b/prototype/corfurl/rebar.config.script @@ -18,8 +18,6 @@ case PulseBuild of , {corfurl_flu, trim, '_'} , {corfurl_flu, fill, '_'} - , {event_logger, event, '_'} - , {prim_file, '_', '_'} , {file, '_', '_'} , {filelib, '_', '_'} From e9851767fcc2414b7ee0206393651f6e44bdd261 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 18 Feb 2014 20:13:53 +0900 Subject: [PATCH 21/70] Add read_page() temporal check --- prototype/corfurl/test/corfurl_pulse.erl | 77 ++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index cf6eecf..17dcb5b 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -277,6 +277,7 @@ check_trace(Trace0, _Cmds, _Seed) -> [{lpn, LPN, Pg}] % Compare here end, Events), + %%QQQ = -5, %%if length(Trace) < QQQ -> io:format("Trace ~p\n", [Trace]), io:format("Events ~p\n", [Events]), io:format("Mods ~p\n", [Mods]); true -> ok end, @@ -312,7 +313,7 @@ check_trace(Trace0, _Cmds, _Seed) -> InitialDict = orddict:from_list([{LPN, [error_unwritten]} || {lpn, LPN, _} <- FinalStatus]), - {_ValuesR, _} = + {ValuesR, _} = lists:mapfoldl( fun({TS1, TS2, StEnds}, Dict1) -> Dict2 = lists:foldl( @@ -339,19 +340,65 @@ check_trace(Trace0, _Cmds, _Seed) -> AppendWillBes = [LPN || {_TS, {call, _, {append, _, will_be, LPN}}} <- Trace], DuplicateLPNs = AppendWillBes -- lists:usort(AppendWillBes), + Reads = eqc_temporal:stateful( + fun({call, Pid, {read, LPN}}) -> + {read, Pid, LPN} + end, + fun({read, Pid, LPN}, {result, Pid, {ok, Pg}}) -> + [{read_finished, LPN, Pg}]; + ({read, Pid, LPN}, {result, Pid, Else}) -> + [{read_finished, LPN, Else}] + end, + Events), + DoneRead = eqc_temporal:map( + fun({read_finished, LPN, Pg}) -> {read_end, LPN, Pg} end, + eqc_temporal:subtract(eqc_temporal:shift(-1, Reads), Reads)), + StartRead = eqc_temporal:map( + fun({read, Pid, LPN}) -> {read_start, LPN, Pid} end, + eqc_temporal:subtract(Reads, eqc_temporal:shift(1, Reads))), + %%io:format("Reads = ~P\n", [Reads, 30]), + %%io:format("DoneRead = ~P\n", [DoneRead, 30]), + %%io:format("UU ~p\n", [eqc_temporal:union(DoneRead, ValuesR)]), + BadReadR = eqc_temporal:stateful( + fun({read_end, _, _} = I) -> I end, + fun({read_end, LPN, Pg}, {values, Dict}) -> + {ok, PossibleVals} = orddict:find(LPN, Dict), + case lists:member(Pg, PossibleVals) of + true -> + []; + false -> + [{bad, read, LPN, got, Pg, + possible, PossibleVals}] + end + end, eqc_temporal:union(DoneRead, ValuesR)), + %%io:format("BadReadR = ~P\n", [BadReadR, 20]), + BadFilter = fun(bad) -> true; + (Bad) when is_tuple(Bad), element(1, Bad) == bad -> true; + (_) -> false end, + %%io:format("BadReadR = ~P\n", [BadReadR, 40]), + BadReads = [{TS1, TS2, lists:filter(BadFilter, Facts)} || + {TS1, TS2, Facts} <- BadReadR, + Fact <- Facts, BadFilter(Fact)], + %% Desired properties AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), NoAppendDuplicates = (DuplicateLPNs == []), + NoBadReads = (BadReads == []), ?WHENFAIL(begin %% ?QC_FMT("*Events: ~p\n", [Events]), + ?QC_FMT("*DuplicateLPNs: ~p\n", [DuplicateLPNs]), ?QC_FMT("*Mods: ~p\n", [Mods]), - ?QC_FMT("*DuplicateLPNs: ~p\n", [DuplicateLPNs]) + ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), + ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, + StartRead, StartMod])]), + ?QC_FMT("*BadReads: ~p\n", [BadReads]) end, conjunction( [ {all_calls_finish, AllCallsFinish}, {no_append_duplicates, NoAppendDuplicates}, + {no_bad_reads, NoBadReads}, %% If you want to see PULSE causing crazy scheduling, then %% change one of the "true orelse" -> "false orelse" below. %% {bogus_no_gaps, @@ -479,23 +526,45 @@ append(#run{seq=Seq, proj=Proj}, Page) -> %% If the appended LPN > 3, just lie and say that it was 3. append(#run{seq=Seq, proj=Proj}, Page) -> + MaxLPN = 3, ?LOG({append, Page}, begin case corfurl:append_page(Seq, Proj, Page) of - {ok, LPN} when LPN > 3 -> - {ok, 3}; + {ok, LPN} when LPN > MaxLPN -> + Bad = {ok, MaxLPN}, + io:format("BAD: append: ~p -> ~p\n", [Page, Bad]), + Bad; Else -> Else end end). -endif. % TEST_TRIP_no_append_duplicates +-ifndef(TEST_TRIP_bad_read). + read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> Max = corfurl_sequencer:get(Seq, 0), %% The sequencer may be lying to us, shouganai. LPN = (SeedInt rem Max) + 1, ?LOG({read, LPN}, corfurl:read_page(Proj, LPN)). +-else. % TEST_TRIP_bad_read + +read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> + Fake = <<"FAKE!">>, + Max = corfurl_sequencer:get(Seq, 0), + LPN = (SeedInt rem Max) + 1, + ?LOG({read, LPN}, + if LPN > 4 -> + io:format("read_approx: ~p -> ~p\n", [LPN, Fake]), + {ok, Fake}; + true -> + Res = corfurl:read_page(Proj, LPN), + %% io:format("read_approx: ~p -> ~P\n", [LPN, Res, 6]), + Res + end). + +-endif. % TEST_TRIP_bad_read -endif. % PULSE -endif. % TEST From fb6b1cdc3ce3248dec879c41cbddc33bc7ea52ca Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 15:39:40 +0900 Subject: [PATCH 22/70] Fix read_page() model problem: no more false positives! --- prototype/corfurl/src/corfurl.erl | 23 ++++- prototype/corfurl/test/corfurl_pulse.erl | 125 +++++++++++------------ 2 files changed, 79 insertions(+), 69 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index bbf84c2..de0048d 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -45,7 +45,7 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> ok -> {ok, LPN}; X when X == error_overwritten; X == error_trimmed -> - io:format(user, "LPN ~p race lost: ~p\n", [LPN, X]), + report_lost_race(LPN, X), append_page(Sequencer, P, Page); Else -> exit({todo, ?MODULE, line, ?LINE, Else}) @@ -253,3 +253,24 @@ project_to_chain(LPN, P) -> element(I, Chains) end. +-ifdef(TEST). +-ifdef(PULSE). +report_lost_race(_LPN, _Reason) -> + %% It's interesting (sometime?) to know if a page was overwritten + %% because the sequencer was configured by QuickCheck to hand out + %% duplicate LPNs. If this gets too annoying, this can be a no-op + %% function. + io:format(user, "o", []). +-else. % PULSE +report_lost_race(LPN, Reason) -> + io:format(user, "LPN ~p race lost: ~p\n", [LPN, Reason]). +-endif. % PULSE +-else. % TEST + +report_lost_race(LPN, Reason) -> + %% Perhaps it's an interesting event, but the rest of the system + %% should react correctly whenever this happens, so it shouldn't + %% ever cause an external consistency problem. + error_logger:debug_msg("LPN ~p race lost: ~p\n", [LPN, Reason]). + +-endif. % TEST diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 17dcb5b..3bd570b 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -80,7 +80,7 @@ gen_sequencer() -> gen_approx_page() -> %% EQC can't know what pages are perhaps-written, so pick something big. - ?LET(I, largeint(), abs(I)). + noshrink(?LET(I, largeint(), abs(I))). command(#state{run=Run} = S) -> ?LET({NumChains, ChainLen, PageSize}, @@ -124,7 +124,7 @@ postcondition(_S, {call, _, append, _}, V) -> eqeq(V, todoTODO_fixit); postcondition(_S, {call, _, read_approx, _}, V) -> case V of - {ok, Pg} when is_binary(Pg) -> true; + Pg when is_binary(Pg) -> true; error_unwritten -> true; error_trimmed -> true; _ -> eqeq(V, todoTODO_fixit) @@ -244,7 +244,7 @@ check_trace(Trace0, _Cmds, _Seed) -> %% {47652,infinity,[]}] Calls = eqc_temporal:stateful( - fun({call, Pid, Call}) -> [{call, Pid, Call}] end, + fun({call, _Pid, _Call} = I) -> [I] end, fun({call, Pid, _Call}, {result, Pid, _}) -> [] end, Events), %% Example Calls (temporal map of when a call is in progress) @@ -256,35 +256,25 @@ check_trace(Trace0, _Cmds, _Seed) -> %% {44522,47651,[{call,<0.466.0>,{append,<<134>>,will_be,2}}]}, %% {47651,infinity,[]}] - %% Remember: Mods contains only successful append ops! - %% ModsAllFuture is used for calculating which LPNs were written, - %% but Mods is used for everything else. The two stateful() calls - %% at identical except for the "Compare here" difference. + AllLPNsR = eqc_temporal:stateful( + fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; + ({call, _Pid, {read, LPN}}) -> LPN + end, + fun(x) -> [] end, + Calls), + %% The last item in the relation tells us what the final facts are in the + %% relation. In this case, it's all LPNs ever mentioned in the test run. + {_, infinity, AllLPNs} = lists:last(eqc_temporal:all_future(AllLPNsR)), + + %% Remember: Mods contains only successful ops that modify an LPN Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> - {lpn, LPN, Pg, Pid} + {mod_lpn, LPN, Pg, Pid} end, - fun({lpn, LPN, _Pg, Pid}, {result, Pid, {ok, LPN}})-> + fun({mod_lpn, LPN, _Pg, Pid}, {result, Pid, {ok, LPN}})-> [] % Compare here end, Events), - ModsAllFuture = eqc_temporal:stateful( - fun({call, Pid, {append, Pg, will_be, LPN}}) -> - {lpn, LPN, Pg, Pid} - end, - fun({lpn, LPN, Pg, Pid}, {result, Pid, {ok, LPN}})-> - %% Keep this into the infinite future - [{lpn, LPN, Pg}] % Compare here - end, - Events), - - %%QQQ = -5, - %%if length(Trace) < QQQ -> io:format("Trace ~p\n", [Trace]), io:format("Events ~p\n", [Events]), io:format("Mods ~p\n", [Mods]); true -> ok end, - - %% The last item in the relation tells us what the last/infinite future - %% state of each LPN is. We'll use it to identify all successfully - %% written LPNs and other stuff. - {_, infinity, FinalStatus} = lists:last(eqc_temporal:all_future(ModsAllFuture)), %% StartMod contains {m_start, LPN, V} when a modification finished. %% DoneMod contains {m_end, LPN, V} when a modification finished. @@ -293,14 +283,12 @@ check_trace(Trace0, _Cmds, _Seed) -> %% forward/backward 1 usec, then subtract away the original time range to %% leave a 1 usec relation in time. DoneMod = eqc_temporal:map( - fun({lpn, LPN, Pg, _Pid}) -> {m_end, LPN, Pg} end, + fun({mod_lpn, LPN, Pg, _Pid}) -> {m_end, LPN, Pg} end, eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), StartMod = eqc_temporal:map( - fun({lpn, LPN, Pg, _Pid}) -> {m_start, LPN, Pg} end, + fun({mod_lpn, LPN, Pg, _Pid}) -> {m_start, LPN, Pg} end, eqc_temporal:subtract(Mods, eqc_temporal:shift(1, Mods))), - %% if length(Trace) < QQQ -> io:format(user, "StartMod ~P\n", [StartMod, 100]), io:format(user, "DoneMod ~P\n", [DoneMod, 100]); true -> ok end, StartsDones = eqc_temporal:union(StartMod, DoneMod), - %%if length(Trace) < QQQ -> io:format(user, "StartsDones ~P\n", [StartsDones, 100]); true -> ok end, %% TODO: A brighter mind than mine might figure out how to do this %% next step using only eqc_temporal. @@ -312,7 +300,7 @@ check_trace(Trace0, _Cmds, _Seed) -> %% The key for OD is LPN, the value is an unordered list of possible values. InitialDict = orddict:from_list([{LPN, [error_unwritten]} || - {lpn, LPN, _} <- FinalStatus]), + LPN <- AllLPNs]), {ValuesR, _} = lists:mapfoldl( fun({TS1, TS2, StEnds}, Dict1) -> @@ -326,7 +314,6 @@ check_trace(Trace0, _Cmds, _Seed) -> end, Dict2, [X || X={m_end,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} end, InitialDict, StartsDones), - %%if length(Trace) < QQQ -> io:format(user, "ValuesR ~P\n", [ValuesR, 100]); true -> ok end, %% We want to find & fail any two clients that append the exact same page %% data to the same LPN. Unfortunately, the eqc_temporal library will @@ -340,44 +327,41 @@ check_trace(Trace0, _Cmds, _Seed) -> AppendWillBes = [LPN || {_TS, {call, _, {append, _, will_be, LPN}}} <- Trace], DuplicateLPNs = AppendWillBes -- lists:usort(AppendWillBes), + %% Checking reads is a tricky thing. My first attempt created a temporal + %% relation for the 1usec window when the read call was complete, then + %% union with the ValuesR relation to see what values were valid at that + %% particular instant. That approach fails sometimes! + %% + %% The reason is honest race conditions with a mutation: the model doesn't + %% know exactly when the data was written, so a valid value may have been + %% added/removed from the ValuesR relation that aren't there for the + %% 1usec window that intersects with ValuesR. + %% + %% Instead, we need to merge together all possible values from ValuesR + %% that appear at any time during the read op's lifetime. + Reads = eqc_temporal:stateful( fun({call, Pid, {read, LPN}}) -> - {read, Pid, LPN} + {read, Pid, LPN, []} end, - fun({read, Pid, LPN}, {result, Pid, {ok, Pg}}) -> - [{read_finished, LPN, Pg}]; - ({read, Pid, LPN}, {result, Pid, Else}) -> - [{read_finished, LPN, Else}] - end, - Events), - DoneRead = eqc_temporal:map( - fun({read_finished, LPN, Pg}) -> {read_end, LPN, Pg} end, - eqc_temporal:subtract(eqc_temporal:shift(-1, Reads), Reads)), - StartRead = eqc_temporal:map( - fun({read, Pid, LPN}) -> {read_start, LPN, Pid} end, - eqc_temporal:subtract(Reads, eqc_temporal:shift(1, Reads))), - %%io:format("Reads = ~P\n", [Reads, 30]), - %%io:format("DoneRead = ~P\n", [DoneRead, 30]), - %%io:format("UU ~p\n", [eqc_temporal:union(DoneRead, ValuesR)]), - BadReadR = eqc_temporal:stateful( - fun({read_end, _, _} = I) -> I end, - fun({read_end, LPN, Pg}, {values, Dict}) -> - {ok, PossibleVals} = orddict:find(LPN, Dict), - case lists:member(Pg, PossibleVals) of - true -> - []; - false -> - [{bad, read, LPN, got, Pg, - possible, PossibleVals}] - end - end, eqc_temporal:union(DoneRead, ValuesR)), - %%io:format("BadReadR = ~P\n", [BadReadR, 20]), + fun({read, Pid, LPN, V1s}, {values, Values}) -> + {ok, V2s} = orddict:find(LPN, Values), + NewVs = lists:umerge(lists:sort(V1s), + lists:sort(V2s)), + false = NewVs == V1s, + {read, Pid, LPN, NewVs}; + ({read, Pid, LPN, Vs}, {result, Pid, Pg}) -> + case lists:member(Pg, Vs) of + true -> []; + false -> [{bad, read, LPN, Pid, got, Pg, + possible, Vs}] + end + end, eqc_temporal:union(Events, ValuesR)), BadFilter = fun(bad) -> true; (Bad) when is_tuple(Bad), element(1, Bad) == bad -> true; (_) -> false end, - %%io:format("BadReadR = ~P\n", [BadReadR, 40]), BadReads = [{TS1, TS2, lists:filter(BadFilter, Facts)} || - {TS1, TS2, Facts} <- BadReadR, + {TS1, TS2, Facts} <- Reads, Fact <- Facts, BadFilter(Fact)], %% Desired properties @@ -390,8 +374,8 @@ check_trace(Trace0, _Cmds, _Seed) -> ?QC_FMT("*DuplicateLPNs: ~p\n", [DuplicateLPNs]), ?QC_FMT("*Mods: ~p\n", [Mods]), ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), - ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, - StartRead, StartMod])]), + %% ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, + %% StartRead, StartMod])]), ?QC_FMT("*BadReads: ~p\n", [BadReads]) end, conjunction( @@ -540,6 +524,11 @@ append(#run{seq=Seq, proj=Proj}, Page) -> end). -endif. % TEST_TRIP_no_append_duplicates +read_result_mangle({ok, Page}) -> + Page; +read_result_mangle(Else) -> + Else. + -ifndef(TEST_TRIP_bad_read). read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> @@ -547,7 +536,7 @@ read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> %% The sequencer may be lying to us, shouganai. LPN = (SeedInt rem Max) + 1, ?LOG({read, LPN}, - corfurl:read_page(Proj, LPN)). + read_result_mangle(corfurl:read_page(Proj, LPN))). -else. % TEST_TRIP_bad_read read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> @@ -557,9 +546,9 @@ read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> ?LOG({read, LPN}, if LPN > 4 -> io:format("read_approx: ~p -> ~p\n", [LPN, Fake]), - {ok, Fake}; + read_result_mangle(Fake); true -> - Res = corfurl:read_page(Proj, LPN), + Res = read_result_mangle(corfurl:read_page(Proj, LPN)), %% io:format("read_approx: ~p -> ~P\n", [LPN, Res, 6]), Res end). From c80921de25017f1cf7cf852842a636e59d7c2fa1 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 16:33:45 +0900 Subject: [PATCH 23/70] Add scan_forward() command, no result checking yet --- prototype/corfurl/test/corfurl_pulse.erl | 70 +++++++++++++++++-- .../corfurl/test/pulse_util/event_logger.erl | 8 ++- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 3bd570b..f6999ad 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -82,6 +82,9 @@ gen_approx_page() -> %% EQC can't know what pages are perhaps-written, so pick something big. noshrink(?LET(I, largeint(), abs(I))). +gen_scan_forward_start() -> + oneof([1, gen_approx_page()]). + command(#state{run=Run} = S) -> ?LET({NumChains, ChainLen, PageSize}, {parameter(num_chains), parameter(chain_len), parameter(page_size)}, @@ -90,7 +93,9 @@ command(#state{run=Run} = S) -> || not S#state.is_setup] ++ [{10, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ - [{10, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + [{3, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{5, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} || S#state.is_setup] ++ [])). @@ -111,6 +116,8 @@ next_state(S, Res, {call, _, setup, [NumChains, ChainLen, PageSize, _SeqType]}) next_state(S, _, {call, _, append, _}) -> S; next_state(S, _, {call, _, read_approx, _}) -> + S; +next_state(S, _, {call, _, scan_forward, _}) -> S. eqeq(X, X) -> true; @@ -123,13 +130,25 @@ postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) -> postcondition(_S, {call, _, append, _}, V) -> eqeq(V, todoTODO_fixit); postcondition(_S, {call, _, read_approx, _}, V) -> + valid_read_result(V); +postcondition(_S, {call, _, scan_forward, _}, V) -> case V of - Pg when is_binary(Pg) -> true; - error_unwritten -> true; - error_trimmed -> true; - _ -> eqeq(V, todoTODO_fixit) + {ok, LastLSN, MoreP, Pages} -> + true = is_integer(LastLSN), + true = LastLSN > 0, + true = (MoreP == true orelse MoreP == false), + [] = lists:usort([X || {_LPN, Pg} <- Pages, + X <- [valid_read_result(Pg)], X /= true]), + true; + _ -> + eqeq(V, {todoTODO_fixit,?LINE}) end. +valid_read_result(Pg) when is_binary(Pg) -> true; +valid_read_result(error_unwritten) -> true; +valid_read_result(error_trimmed) -> true; +valid_read_result(V) -> eqeq(V, {todoTODO_fixit,?LINE}). + run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> %% AfterTime = if LocalOrSlave == local -> 50000; %% LocalOrSlave == slave -> 1000000 @@ -494,10 +513,22 @@ range_ify(Beginning, Next, [Else|T]) -> range_ify(Beginning, Next, []) -> [{Beginning, to, Next-1}]. +log_make_call(Tag) -> + log_make_call(self(), Tag). + +log_make_call(Pid, Tag) -> + {call, Pid, Tag}. + +log_make_result(Result) -> + log_make_result(self(), Result). + +log_make_result(Pid, Result) -> + {result, Pid, Result}. + -define(LOG(Tag, MkCall), - event_logger:event({call, self(), Tag}), + event_logger:event(log_make_call(Tag)), LOG__Result = MkCall, - event_logger:event({result, self(), LOG__Result}), + event_logger:event(log_make_result(LOG__Result)), LOG__Result). -ifndef(TEST_TRIP_no_append_duplicates). @@ -555,6 +586,31 @@ read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> -endif. % TEST_TRIP_bad_read +scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> + Max = corfurl_sequencer:get(Seq, 0), + StartLPN = if SeedInt == 1 -> 1; + true -> (SeedInt rem Max) + 1 + end, + ?LOG({scan_forward, StartLPN, NumPages}, + begin + TS1 = event_logger:timestamp(), + case corfurl:scan_forward(Proj, StartLPN, NumPages) of + {ok, EndLPN, MoreP, Pages} -> + PageIs = lists:zip(Pages, lists:seq(1, length(Pages))), + TS2 = event_logger:timestamp(), + [begin + PidI = {self(), I}, + event_logger:event(log_make_call(PidI, {read, LPN}), + TS1), + Pm = read_result_mangle(P), + event_logger:event(log_make_result(PidI, Pm), TS2) + end || {{LPN, P}, I} <- PageIs], + Ps = [{LPN, read_result_mangle(P)} || + {LPN, P} <- Pages], + {ok, EndLPN, MoreP, Ps} + end + end). + -endif. % PULSE -endif. % TEST diff --git a/prototype/corfurl/test/pulse_util/event_logger.erl b/prototype/corfurl/test/pulse_util/event_logger.erl index 54fa964..063ed70 100644 --- a/prototype/corfurl/test/pulse_util/event_logger.erl +++ b/prototype/corfurl/test/pulse_util/event_logger.erl @@ -9,7 +9,8 @@ -behaviour(gen_server). %% API --export([start_link/0, event/1, get_events/0, start_logging/0]). +-export([start_link/0, event/1, event/2, get_events/0, start_logging/0]). +-export([timestamp/0]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -36,8 +37,11 @@ start_logging() -> gen_server:call(?MODULE, {start, timestamp()}). event(EventData) -> + event(EventData, timestamp()). + +event(EventData, Timestamp) -> gen_server:call(?MODULE, - #event{ timestamp = timestamp(), data = EventData }). + #event{ timestamp = Timestamp, data = EventData }). async_event(EventData) -> gen_server:cast(?MODULE, From 78019b402fe80add9adf3b80e02e3466e3649c4d Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 17:26:39 +0900 Subject: [PATCH 24/70] Refactor the PULSE model testing error 'trip' code --- prototype/corfurl/test/corfurl_pulse.erl | 91 ++++++++++++++---------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index f6999ad..9fa3631 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -61,6 +61,19 @@ run :: #run{} }). +%% Model testing things: +%% Define true to fake bad behavior that model **must** notice & fail! + +-ifndef(TRIP_no_append_duplicates). +-define(TRIP_no_append_duplicates, false). +-endif. +-ifndef(TRIP_bad_read). +-define(TRIP_bad_read, false). +-endif. +-ifndef(TRIP_bad_scan_forward). +-define(TRIP_bad_scan_forward, false). +-endif. + initial_state() -> #state{}. @@ -367,6 +380,9 @@ check_trace(Trace0, _Cmds, _Seed) -> {ok, V2s} = orddict:find(LPN, Values), NewVs = lists:umerge(lists:sort(V1s), lists:sort(V2s)), + %% Throw an exception (which is equivalent to a no-op) + %% if there are no differences: if we make multiples + %% of the exact same thing, stateful() will get confused. false = NewVs == V1s, {read, Pid, LPN, NewVs}; ({read, Pid, LPN, Vs}, {result, Pid, Pg}) -> @@ -531,66 +547,37 @@ log_make_result(Pid, Result) -> event_logger:event(log_make_result(LOG__Result)), LOG__Result). --ifndef(TEST_TRIP_no_append_duplicates). - append(#run{seq=Seq, proj=Proj}, Page) -> - ?LOG({append, Page}, - corfurl:append_page(Seq, Proj, Page)). --else. % TEST_TRIP_no_append_duplicates - -%% If the appended LPN > 3, just lie and say that it was 3. - -append(#run{seq=Seq, proj=Proj}, Page) -> - MaxLPN = 3, ?LOG({append, Page}, begin - case corfurl:append_page(Seq, Proj, Page) of - {ok, LPN} when LPN > MaxLPN -> - Bad = {ok, MaxLPN}, - io:format("BAD: append: ~p -> ~p\n", [Page, Bad]), - Bad; - Else -> - Else - end + Res = corfurl:append_page(Seq, Proj, Page), + perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) end). --endif. % TEST_TRIP_no_append_duplicates read_result_mangle({ok, Page}) -> Page; read_result_mangle(Else) -> Else. --ifndef(TEST_TRIP_bad_read). - read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> Max = corfurl_sequencer:get(Seq, 0), %% The sequencer may be lying to us, shouganai. LPN = (SeedInt rem Max) + 1, ?LOG({read, LPN}, - read_result_mangle(corfurl:read_page(Proj, LPN))). --else. % TEST_TRIP_bad_read - -read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> - Fake = <<"FAKE!">>, - Max = corfurl_sequencer:get(Seq, 0), - LPN = (SeedInt rem Max) + 1, - ?LOG({read, LPN}, - if LPN > 4 -> - io:format("read_approx: ~p -> ~p\n", [LPN, Fake]), - read_result_mangle(Fake); - true -> - Res = read_result_mangle(corfurl:read_page(Proj, LPN)), - %% io:format("read_approx: ~p -> ~P\n", [LPN, Res, 6]), - Res + begin + Res = read_result_mangle(corfurl:read_page(Proj, LPN)), + perhaps_trip_read_approx(?TRIP_bad_read, Res, LPN) end). --endif. % TEST_TRIP_bad_read - scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> Max = corfurl_sequencer:get(Seq, 0), StartLPN = if SeedInt == 1 -> 1; true -> (SeedInt rem Max) + 1 end, + %% Our job is complicated by the ?LOG() macro, which isn't good enough + %% for our purpose: we must lie about the starting timestamp, to make + %% it appear as if each LPN result that scan_forward() gives us came + %% instead from a single-page read_page() call. ?LOG({scan_forward, StartLPN, NumPages}, begin TS1 = event_logger:timestamp(), @@ -602,7 +589,9 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> PidI = {self(), I}, event_logger:event(log_make_call(PidI, {read, LPN}), TS1), - Pm = read_result_mangle(P), + Pm = perhaps_trip_scan_forward( + ?TRIP_bad_scan_forward, read_result_mangle(P), + EndLPN), event_logger:event(log_make_result(PidI, Pm), TS2) end || {{LPN, P}, I} <- PageIs], Ps = [{LPN, read_result_mangle(P)} || @@ -611,6 +600,30 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end end). +perhaps_trip_append_page(false, Res, _Page) -> + Res; +perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 -> + io:format(user, "TRIP: append_page\n", []), + {ok, 3}; +perhaps_trip_append_page(true, Else, _Page) -> + Else. + +perhaps_trip_read_approx(false, Res, _LPN) -> + Res; +perhaps_trip_read_approx(true, _Res, 3 = LPN) -> + io:format(user, "TRIP: read_approx LPN ~p", [LPN]), + <<"FAKE!">>; +perhaps_trip_read_approx(true, Res, _LPN) -> + Res. + +perhaps_trip_scan_forward(false, Res, _EndLPN) -> + Res; +perhaps_trip_scan_forward(true, _Res, 20) -> + io:format(user, "TRIP: scan_forward\n", []), + <<"magic number bingo, you are a winner">>; +perhaps_trip_scan_forward(true, Res, _EndLPN) -> + Res. + -endif. % PULSE -endif. % TEST From 7dba8beae9fa65a4374dc81203baa1c819ec9065 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 21:18:09 +0900 Subject: [PATCH 25/70] Refactor PULSE test for easier checking, prior to adding fill & trim. --- prototype/corfurl/test/corfurl_pulse.erl | 124 +++++++++++++++-------- 1 file changed, 83 insertions(+), 41 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 9fa3631..51a41c3 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -298,28 +298,40 @@ check_trace(Trace0, _Cmds, _Seed) -> %% relation. In this case, it's all LPNs ever mentioned in the test run. {_, infinity, AllLPNs} = lists:last(eqc_temporal:all_future(AllLPNsR)), - %% Remember: Mods contains only successful ops that modify an LPN + %% Use the following atoms to denote transitions ("Ttn") by an LPN: + %% w_0 = not written yet, error_unwritten + %% w_1 = written successfully, {ok, binary::()} + %% w_t = trimmed, error_trimmed + Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> - {mod_lpn, LPN, Pg, Pid} + {mod_working, w_1, LPN, Pg, Pid}; + ({call, Pid, {fill, LPN}}) -> + {mod_working, w_t, LPN, unused, Pid}; + ({call, Pid, {trim, LPN}}) -> + {mod_working, w_t, LPN, unused, Pid} end, - fun({mod_lpn, LPN, _Pg, Pid}, {result, Pid, {ok, LPN}})-> - [] % Compare here + fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})-> + [] end, Events), - %% StartMod contains {m_start, LPN, V} when a modification finished. - %% DoneMod contains {m_end, LPN, V} when a modification finished. + %% StartMod contains {mod_start, Ttn, LPN, V} when a modification finished. + %% DoneMod contains {mod_end, Ttn, LPN, V} when a modification finished. %% This is a clever trick: Mods contains the start & end timestamp %% for each modification. Use shift() by 1 usec to move all timestamps %% forward/backward 1 usec, then subtract away the original time range to %% leave a 1 usec relation in time. - DoneMod = eqc_temporal:map( - fun({mod_lpn, LPN, Pg, _Pid}) -> {m_end, LPN, Pg} end, - eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), StartMod = eqc_temporal:map( - fun({mod_lpn, LPN, Pg, _Pid}) -> {m_start, LPN, Pg} end, + fun({mod_working, Ttn, LPN, Pg, _Pid}) -> + {mod_start, Ttn, LPN, Pg} + end, eqc_temporal:subtract(Mods, eqc_temporal:shift(1, Mods))), + DoneMod = eqc_temporal:map( + fun({mod_working, Ttn, LPN, Pg, _Pid}) -> + {mod_end, Ttn, LPN, Pg} + end, + eqc_temporal:subtract(eqc_temporal:shift(1, Mods), Mods)), StartsDones = eqc_temporal:union(StartMod, DoneMod), %% TODO: A brighter mind than mine might figure out how to do this @@ -331,33 +343,52 @@ check_trace(Trace0, _Cmds, _Seed) -> %% that time in the relation. %% The key for OD is LPN, the value is an unordered list of possible values. - InitialDict = orddict:from_list([{LPN, [error_unwritten]} || + InitialValDict = orddict:from_list([{LPN, [error_unwritten]} || LPN <- AllLPNs]), {ValuesR, _} = lists:mapfoldl( fun({TS1, TS2, StEnds}, Dict1) -> Dict2 = lists:foldl( - fun({m_start, LPN, Pg}, D) -> + fun({mod_start, _Ttn, LPN, Pg}, D) -> orddict:append(LPN, Pg, D) - end, Dict1, [X || X={m_start,_,_} <- StEnds]), + end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( - fun({m_end, LPN, Pg}, D) -> + fun({mod_end, _Ttn, LPN, Pg}, D) -> orddict:store(LPN, [Pg], D) - end, Dict2, [X || X={m_end,_,_} <- StEnds]), + end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} - end, InitialDict, StartsDones), + end, InitialValDict, StartsDones), - %% We want to find & fail any two clients that append the exact same page - %% data to the same LPN. Unfortunately, the eqc_temporal library will - %% merge two such facts together into a single fact. So this method - %% commented below isn't good enough. - %% M_Ends = eqc_temporal:at(infinity, eqc_temporal:any_past(DoneMod)), - %% AppendedLPNs = lists:sort([LPN || {m_end, LPN, _} <- M_Ends]), - %% {_Last, DuplicateLPNs} = lists:foldl(fun(X, {X, Dups}) -> {X, [X|Dups]}; - %% (X, {_, Dups}) -> {X, Dups} - %% end, {undefined, []}, AppendedLPNs), - AppendWillBes = [LPN || {_TS, {call, _, {append, _, will_be, LPN}}} <- Trace], - DuplicateLPNs = AppendWillBes -- lists:usort(AppendWillBes), + InitialTtnDict = orddict:from_list([{LPN, [w_0]} || LPN <- AllLPNs]), + {TransitionsR, _} = + lists:mapfoldl( + fun({TS1, TS2, StEnds}, Dict1) -> + Dict2 = lists:foldl( + fun({mod_end, Ttn, LPN, _Pg}, D) -> + %% orddict does not discard duplicates + orddict:append(LPN, Ttn, D); + (_, D) -> + D + end, Dict1, [X || X={mod_end,_,_,_} <- StEnds]), + {{TS1, TS2, [{transitions, Dict2}]}, Dict2} + end, InitialTtnDict, StartsDones), + + %% Property: For all LPNs, the transition list for K must be one of the + %% following four (4) acceptable transition orderings. + {_, _, [{transitions, FinalTtns}]} = lists:last( + eqc_temporal:all_future(TransitionsR)), + InvalidTransitions = orddict:fold( + fun(_LPN, [w_0], Acc) -> + Acc; + (_LPN, [w_0,w_1], Acc) -> + Acc; + (_LPN, [w_0,w_t], Acc) -> + Acc; + (_LPN, [w_0,w_1,w_t], Acc) -> + Acc; + (LPN, BadTtns, Acc) -> + [{LPN, BadTtns}|Acc] + end, [], FinalTtns), %% Checking reads is a tricky thing. My first attempt created a temporal %% relation for the 1usec window when the read call was complete, then @@ -395,29 +426,26 @@ check_trace(Trace0, _Cmds, _Seed) -> BadFilter = fun(bad) -> true; (Bad) when is_tuple(Bad), element(1, Bad) == bad -> true; (_) -> false end, - BadReads = [{TS1, TS2, lists:filter(BadFilter, Facts)} || - {TS1, TS2, Facts} <- Reads, - Fact <- Facts, BadFilter(Fact)], - - %% Desired properties - AllCallsFinish = eqc_temporal:is_false(eqc_temporal:all_future(Calls)), - NoAppendDuplicates = (DuplicateLPNs == []), - NoBadReads = (BadReads == []), + BadReads = filter_relation_facts(BadFilter, Reads), ?WHENFAIL(begin %% ?QC_FMT("*Events: ~p\n", [Events]), - ?QC_FMT("*DuplicateLPNs: ~p\n", [DuplicateLPNs]), ?QC_FMT("*Mods: ~p\n", [Mods]), - ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), + ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), + ?QC_FMT("*Reads: ~p\n", [Reads]), + %% ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), %% ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, %% StartRead, StartMod])]), ?QC_FMT("*BadReads: ~p\n", [BadReads]) end, conjunction( [ - {all_calls_finish, AllCallsFinish}, - {no_append_duplicates, NoAppendDuplicates}, - {no_bad_reads, NoBadReads}, + {all_calls_finish, + eqc_temporal:is_false(eqc_temporal:all_future(Calls))}, + {no_invalidTransitions, + InvalidTransitions == []}, + {no_bad_reads, + eqc_temporal:is_false(eqc_temporal:all_future(BadReads))}, %% If you want to see PULSE causing crazy scheduling, then %% change one of the "true orelse" -> "false orelse" below. %% {bogus_no_gaps, @@ -437,6 +465,16 @@ add_LPN_to_append_calls([{TS, {call, Pid, {append, Page}}}|Rest]) -> {TS, {call, Pid, {append, Page, will_fail, Else}}} end, [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([{TS, {call, Pid, {OpName, LPN}}}|Rest]) + when OpName == fill; OpName == trim -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + ok -> + {TS, {call, Pid, {OpName, LPN, will_be, ok}}}; + Else -> + {TS, {call, Pid, {OpName, LPN, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; add_LPN_to_append_calls([X|Rest]) -> [X|add_LPN_to_append_calls(Rest)]; add_LPN_to_append_calls([]) -> @@ -529,6 +567,10 @@ range_ify(Beginning, Next, [Else|T]) -> range_ify(Beginning, Next, []) -> [{Beginning, to, Next-1}]. +filter_relation_facts(FilterFun, R) -> + [{TS1, TS2, lists:filter(FilterFun, Facts)} || {TS1, TS2, Facts} <- R]. + %% {TS1, TS2, Facts} <- Reads, Fact <- Facts, BadFilter(Fact)], + log_make_call(Tag) -> log_make_call(self(), Tag). @@ -586,7 +628,7 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> PageIs = lists:zip(Pages, lists:seq(1, length(Pages))), TS2 = event_logger:timestamp(), [begin - PidI = {self(), I}, + PidI = {self(), s_f, I}, event_logger:event(log_make_call(PidI, {read, LPN}), TS1), Pm = perhaps_trip_scan_forward( From 86d4583aeffa484e080c31bff98efcaa134e641e Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 23:14:33 +0900 Subject: [PATCH 26/70] Add fill support to the PULSE model --- prototype/corfurl/src/corfurl.erl | 18 +++++- prototype/corfurl/test/corfurl_pulse.erl | 72 +++++++++++++++++++----- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index de0048d..1eb047b 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -24,7 +24,8 @@ new_range/3, read_projection/2, save_projection/2]). --export([append_page/3, read_page/2, scan_forward/3]). +-export([append_page/3, read_page/2, scan_forward/3, + fill_page/2]). -include("corfurl.hrl"). @@ -200,6 +201,21 @@ scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) -> %% Let it crash: error_overwritten end. +fill_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + fill_page2(Chain, Epoch, LPN). + +fill_page2([], _Epoch, _LPN) -> + ok; +fill_page2([H|T], Epoch, LPN) -> + case corfurl_flu:fill(flu_pid(H), Epoch, LPN) of + ok -> + fill_page2(T, Epoch, LPN); + Else -> + %% TODO: worth doing anything here, if we're in the middle of chain? + Else + end. + flu_pid(X) when is_pid(X) -> X; flu_pid(X) when is_atom(X) -> diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 51a41c3..33b2f1a 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -73,6 +73,9 @@ -ifndef(TRIP_bad_scan_forward). -define(TRIP_bad_scan_forward, false). -endif. +-ifndef(TRIP_bad_fill). +-define(TRIP_bad_fill, false). +-endif. initial_state() -> #state{}. @@ -110,6 +113,8 @@ command(#state{run=Run} = S) -> || S#state.is_setup] ++ [{5, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} || S#state.is_setup] ++ + [{4, {call, ?MODULE, fill, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ [])). %% Precondition, checked before a command is added to the command sequence. @@ -131,6 +136,8 @@ next_state(S, _, {call, _, append, _}) -> next_state(S, _, {call, _, read_approx, _}) -> S; next_state(S, _, {call, _, scan_forward, _}) -> + S; +next_state(S, _, {call, _, fill, _}) -> S. eqeq(X, X) -> true; @@ -155,6 +162,13 @@ postcondition(_S, {call, _, scan_forward, _}, V) -> true; _ -> eqeq(V, {todoTODO_fixit,?LINE}) + end; +postcondition(_S, {call, _, fill, _}, V) -> + case V of + ok -> true; + error_trimmed -> true; + error_overwritten -> true; + _ -> eqeq(V, {fill_error, V}) end. valid_read_result(Pg) when is_binary(Pg) -> true; @@ -162,10 +176,10 @@ valid_read_result(error_unwritten) -> true; valid_read_result(error_trimmed) -> true; valid_read_result(V) -> eqeq(V, {todoTODO_fixit,?LINE}). -run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> - %% AfterTime = if LocalOrSlave == local -> 50000; - %% LocalOrSlave == slave -> 1000000 - %% end, +run_commands_on_node(LocalOrSlave, Cmds, Seed) -> + AfterTime = if LocalOrSlave == local -> 50000; + LocalOrSlave == slave -> 1000000 + end, event_logger:start_link(), pulse:start(), error_logger:tty(false), @@ -180,7 +194,7 @@ run_commands_on_node(_LocalOrSlave, Cmds, Seed) -> %% io:format(user, "Yooo: H = ~p\n", [H]), %% io:format(user, "Yooo: S = ~p\n", [S]), %% io:format(user, "Yooo: R = ~p\n", [R]), - %% receive after AfterTime -> ok end, + receive after AfterTime -> ok end, Trace = event_logger:get_events(), %% receive after AfterTime -> ok end, catch exit(pulse_application_controller, shutdown), @@ -290,10 +304,13 @@ check_trace(Trace0, _Cmds, _Seed) -> AllLPNsR = eqc_temporal:stateful( fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; - ({call, _Pid, {read, LPN}}) -> LPN + ({call, _Pid, {read, LPN}}) -> LPN; + ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN end, fun(x) -> [] end, Calls), + %%io:format("Calls ~p\n", [Calls]), + %%io:format("AllLPNsR ~p\n", [AllLPNsR]), %% The last item in the relation tells us what the final facts are in the %% relation. In this case, it's all LPNs ever mentioned in the test run. {_, infinity, AllLPNs} = lists:last(eqc_temporal:all_future(AllLPNsR)), @@ -306,15 +323,17 @@ check_trace(Trace0, _Cmds, _Seed) -> Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> {mod_working, w_1, LPN, Pg, Pid}; - ({call, Pid, {fill, LPN}}) -> - {mod_working, w_t, LPN, unused, Pid}; - ({call, Pid, {trim, LPN}}) -> - {mod_working, w_t, LPN, unused, Pid} + ({call, Pid, {fill, LPN, will_be, ok}}) -> + {mod_working, w_t, LPN, fill, Pid}; + ({call, Pid, {trim, LPN, will_be, ok}}) -> + {mod_working, w_t, LPN, trim, Pid} end, fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})-> [] end, Events), + %%ModsX = filter_relation_facts(fun(T) when element(4,T) == fill; element(4,T) == trim -> true; (_) -> false end, Mods), + %%io:format("Modsx ~p\n", [ModsX]), %% StartMod contains {mod_start, Ttn, LPN, V} when a modification finished. %% DoneMod contains {mod_end, Ttn, LPN, V} when a modification finished. @@ -349,12 +368,16 @@ check_trace(Trace0, _Cmds, _Seed) -> lists:mapfoldl( fun({TS1, TS2, StEnds}, Dict1) -> Dict2 = lists:foldl( - fun({mod_start, _Ttn, LPN, Pg}, D) -> - orddict:append(LPN, Pg, D) + fun({mod_start, w_1, LPN, Pg}, D) -> + orddict:append(LPN, Pg, D); + ({mod_start, w_t, LPN, _Pg}, D) -> + orddict:append(LPN, [aaaa_error_trimmed], D) end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( - fun({mod_end, _Ttn, LPN, Pg}, D) -> - orddict:store(LPN, [Pg], D) + fun({mod_end, w_1, LPN, Pg}, D) -> + orddict:store(LPN, [Pg], D); + ({mod_end, w_t, LPN, _Pg}, D) -> + orddict:store(LPN, [error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} end, InitialValDict, StartsDones), @@ -432,7 +455,8 @@ check_trace(Trace0, _Cmds, _Seed) -> %% ?QC_FMT("*Events: ~p\n", [Events]), ?QC_FMT("*Mods: ~p\n", [Mods]), ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), - ?QC_FMT("*Reads: ~p\n", [Reads]), + %?QC_FMT("*Reads: ~p\n", [Reads]), + ?QC_FMT("*Reads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), %% ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), %% ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, %% StartRead, StartMod])]), @@ -642,6 +666,16 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end end). +fill(#run{seq=Seq, proj=Proj}, SeedInt) -> + Max = corfurl_sequencer:get(Seq, 0), + %% The sequencer may be lying to us, shouganai. + LPN = (SeedInt rem Max) + 3, + ?LOG({fill, LPN}, + begin + Res = corfurl:fill_page(Proj, LPN), + perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) + end). + perhaps_trip_append_page(false, Res, _Page) -> Res; perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 -> @@ -666,6 +700,14 @@ perhaps_trip_scan_forward(true, _Res, 20) -> perhaps_trip_scan_forward(true, Res, _EndLPN) -> Res. +perhaps_trip_fill_page(false, Res, _EndLPN) -> + Res; +perhaps_trip_fill_page(true, _Res, 20) -> + io:format(user, "TRIP: fill_page\n", []), + error_overwritten; +perhaps_trip_fill_page(true, Res, _EndLPN) -> + Res. + -endif. % PULSE -endif. % TEST From db6fa3d89587dfaca2ade58270c29bdbf209af7e Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 23:15:01 +0900 Subject: [PATCH 27/70] Fix two bugs found by PULSE in corfurl_flu.erl, yay! --- prototype/corfurl/src/corfurl_flu.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 65c3936..1f0dcf6 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -294,10 +294,12 @@ check_write(LogicalPN, PageBin, check_is_written(Offset, _PhysicalPN, #state{mem_fh=FH}) -> case file:pread(FH, Offset, 1) of - {ok, <<1:8>>} -> - true; {ok, <<0:8>>} -> false; + {ok, <<1:8>>} -> % written + true; + {ok, <<2:8>>} -> % trimmed + true; eof -> %% We assume that Offset has been bounds-checked false @@ -318,7 +320,7 @@ read_page(LogicalPN, #state{max_mem=MaxMem, mem_fh=FH, io:format("BUMMER: ~s line ~w: incomplete write at ~p\n", [?MODULE, ?LINE, LogicalPN]), error_unwritten; - {ok, <<2:8>>} -> + {ok, <<2:8, _/binary>>} -> error_trimmed; {ok, _} -> error_unwritten; From 8a5677118268103edab81281b5d16eb4f3dd9f36 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 23:43:55 +0900 Subject: [PATCH 28/70] Add better condition for perhaps_trip_fill_page() --- prototype/corfurl/test/corfurl_pulse.erl | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 33b2f1a..23c99dc 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -74,7 +74,7 @@ -define(TRIP_bad_scan_forward, false). -endif. -ifndef(TRIP_bad_fill). --define(TRIP_bad_fill, false). +-define(TRIP_bad_fill, true). -endif. initial_state() -> @@ -607,6 +607,13 @@ log_make_result(Result) -> log_make_result(Pid, Result) -> {result, Pid, Result}. +pick_an_LPN(Seq, SeedInt) -> + Max = corfurl_sequencer:get(Seq, 0), + %% The sequencer may be lying to us, shouganai. + if SeedInt > Max -> (SeedInt rem Max) + 1; + true -> SeedInt + end. + -define(LOG(Tag, MkCall), event_logger:event(log_make_call(Tag)), LOG__Result = MkCall, @@ -626,9 +633,7 @@ read_result_mangle(Else) -> Else. read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> - Max = corfurl_sequencer:get(Seq, 0), - %% The sequencer may be lying to us, shouganai. - LPN = (SeedInt rem Max) + 1, + LPN = pick_an_LPN(Seq, SeedInt), ?LOG({read, LPN}, begin Res = read_result_mangle(corfurl:read_page(Proj, LPN)), @@ -636,9 +641,8 @@ read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> end). scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> - Max = corfurl_sequencer:get(Seq, 0), StartLPN = if SeedInt == 1 -> 1; - true -> (SeedInt rem Max) + 1 + true -> pick_an_LPN(Seq, SeedInt) end, %% Our job is complicated by the ?LOG() macro, which isn't good enough %% for our purpose: we must lie about the starting timestamp, to make @@ -667,9 +671,7 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end). fill(#run{seq=Seq, proj=Proj}, SeedInt) -> - Max = corfurl_sequencer:get(Seq, 0), - %% The sequencer may be lying to us, shouganai. - LPN = (SeedInt rem Max) + 3, + LPN = pick_an_LPN(Seq, SeedInt) + 2, ?LOG({fill, LPN}, begin Res = corfurl:fill_page(Proj, LPN), @@ -702,9 +704,9 @@ perhaps_trip_scan_forward(true, Res, _EndLPN) -> perhaps_trip_fill_page(false, Res, _EndLPN) -> Res; -perhaps_trip_fill_page(true, _Res, 20) -> +perhaps_trip_fill_page(true, _Res, LPN) when 10 =< LPN, LPN =< 20 -> io:format(user, "TRIP: fill_page\n", []), - error_overwritten; + ok; % can trigger both invalid ttn and bad read perhaps_trip_fill_page(true, Res, _EndLPN) -> Res. From 7a46709c131c554654c6cfa3c47b30f9dddc2741 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 19 Feb 2014 23:54:59 +0900 Subject: [PATCH 29/70] Change transition type names to make better invalid transition detection --- prototype/corfurl/test/corfurl_pulse.erl | 59 +++++++++++------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 23c99dc..0426beb 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -74,7 +74,7 @@ -define(TRIP_bad_scan_forward, false). -endif. -ifndef(TRIP_bad_fill). --define(TRIP_bad_fill, true). +-define(TRIP_bad_fill, false). -endif. initial_state() -> @@ -318,15 +318,16 @@ check_trace(Trace0, _Cmds, _Seed) -> %% Use the following atoms to denote transitions ("Ttn") by an LPN: %% w_0 = not written yet, error_unwritten %% w_1 = written successfully, {ok, binary::()} - %% w_t = trimmed, error_trimmed + %% w_ft = fill trimmed, error_trimmed + %% w_tt = trim trimmed, error_trimmed Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> {mod_working, w_1, LPN, Pg, Pid}; ({call, Pid, {fill, LPN, will_be, ok}}) -> - {mod_working, w_t, LPN, fill, Pid}; + {mod_working, w_ft, LPN, fill, Pid}; ({call, Pid, {trim, LPN, will_be, ok}}) -> - {mod_working, w_t, LPN, trim, Pid} + {mod_working, w_tt, LPN, trim, Pid} end, fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})-> [] @@ -370,13 +371,15 @@ check_trace(Trace0, _Cmds, _Seed) -> Dict2 = lists:foldl( fun({mod_start, w_1, LPN, Pg}, D) -> orddict:append(LPN, Pg, D); - ({mod_start, w_t, LPN, _Pg}, D) -> - orddict:append(LPN, [aaaa_error_trimmed], D) + ({mod_start, WType, LPN, _Pg}, D) + when WType == w_ft; WType == w_tt -> + orddict:append(LPN, [error_trimmed], D) end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( fun({mod_end, w_1, LPN, Pg}, D) -> orddict:store(LPN, [Pg], D); - ({mod_end, w_t, LPN, _Pg}, D) -> + ({mod_end, WType, LPN, _Pg}, D) + when WType == w_ft; WType == w_tt -> orddict:store(LPN, [error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} @@ -396,23 +399,6 @@ check_trace(Trace0, _Cmds, _Seed) -> {{TS1, TS2, [{transitions, Dict2}]}, Dict2} end, InitialTtnDict, StartsDones), - %% Property: For all LPNs, the transition list for K must be one of the - %% following four (4) acceptable transition orderings. - {_, _, [{transitions, FinalTtns}]} = lists:last( - eqc_temporal:all_future(TransitionsR)), - InvalidTransitions = orddict:fold( - fun(_LPN, [w_0], Acc) -> - Acc; - (_LPN, [w_0,w_1], Acc) -> - Acc; - (_LPN, [w_0,w_t], Acc) -> - Acc; - (_LPN, [w_0,w_1,w_t], Acc) -> - Acc; - (LPN, BadTtns, Acc) -> - [{LPN, BadTtns}|Acc] - end, [], FinalTtns), - %% Checking reads is a tricky thing. My first attempt created a temporal %% relation for the 1usec window when the read call was complete, then %% union with the ValuesR relation to see what values were valid at that @@ -451,15 +437,26 @@ check_trace(Trace0, _Cmds, _Seed) -> (_) -> false end, BadReads = filter_relation_facts(BadFilter, Reads), + %% Property: For all LPNs, the transition list for K must be one of the + %% following four (4) acceptable transition orderings. + {_, _, [{transitions, FinalTtns}]} = lists:last( + eqc_temporal:all_future(TransitionsR)), + InvalidTransitions = orddict:fold( + fun(_LPN, [w_0], Acc) -> + Acc; + (_LPN, [w_0,w_1], Acc) -> + Acc; + (_LPN, [w_0,w_ft], Acc) -> + Acc; + (_LPN, [w_0,w_1,w_tt], Acc) -> + Acc; + (LPN, BadTtns, Acc) -> + [{LPN, BadTtns}|Acc] + end, [], FinalTtns), + ?WHENFAIL(begin - %% ?QC_FMT("*Events: ~p\n", [Events]), - ?QC_FMT("*Mods: ~p\n", [Mods]), + ?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), - %?QC_FMT("*Reads: ~p\n", [Reads]), - ?QC_FMT("*Reads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), - %% ?QC_FMT("*readsUmods: ~p\n", [eqc_temporal:union(Reads, Mods)]), - %% ?QC_FMT("*DreadUDmod: ~p\n", [eqc_temporal:unions([DoneRead, DoneMod, - %% StartRead, StartMod])]), ?QC_FMT("*BadReads: ~p\n", [BadReads]) end, conjunction( From b3ed9ef51ccfff6e0bef42f5f7fe22e37d3bd1d2 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 00:12:20 +0900 Subject: [PATCH 30/70] Add fill checking to PULSE model, minimal API coverage is complete --- prototype/corfurl/src/corfurl.erl | 17 ++++++---- prototype/corfurl/test/corfurl_pulse.erl | 40 +++++++++++++++++++----- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 1eb047b..c92a1ac 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -25,7 +25,7 @@ read_projection/2, save_projection/2]). -export([append_page/3, read_page/2, scan_forward/3, - fill_page/2]). + fill_page/2, trim_page/2]). -include("corfurl.hrl"). @@ -203,16 +203,21 @@ scan_forward(P, LPN, MaxPages, _Status, _MoreP, Acc) -> fill_page(#proj{epoch=Epoch} = P, LPN) -> Chain = project_to_chain(LPN, P), - fill_page2(Chain, Epoch, LPN). + fill_or_trim_page(Chain, Epoch, LPN, fill). -fill_page2([], _Epoch, _LPN) -> +trim_page(#proj{epoch=Epoch} = P, LPN) -> + Chain = project_to_chain(LPN, P), + fill_or_trim_page(Chain, Epoch, LPN, trim). + +fill_or_trim_page([], _Epoch, _LPN, _Func) -> ok; -fill_page2([H|T], Epoch, LPN) -> - case corfurl_flu:fill(flu_pid(H), Epoch, LPN) of +fill_or_trim_page([H|T], Epoch, LPN, Func) -> + case corfurl_flu:Func(flu_pid(H), Epoch, LPN) of ok -> - fill_page2(T, Epoch, LPN); + fill_or_trim_page(T, Epoch, LPN, Func); Else -> %% TODO: worth doing anything here, if we're in the middle of chain? + %% TODO: is that ^^ anything different for fill vs. trim? Else end. diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 0426beb..0b4bb27 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -65,7 +65,7 @@ %% Define true to fake bad behavior that model **must** notice & fail! -ifndef(TRIP_no_append_duplicates). --define(TRIP_no_append_duplicates, false). +-define(TRIP_no_append_duplicates, true). -endif. -ifndef(TRIP_bad_read). -define(TRIP_bad_read, false). @@ -76,6 +76,9 @@ -ifndef(TRIP_bad_fill). -define(TRIP_bad_fill, false). -endif. +-ifndef(TRIP_bad_trim). +-define(TRIP_bad_trim, false). +-endif. initial_state() -> #state{}. @@ -115,6 +118,8 @@ command(#state{run=Run} = S) -> || S#state.is_setup] ++ [{4, {call, ?MODULE, fill, [Run, gen_approx_page()]}} || S#state.is_setup] ++ + [{4, {call, ?MODULE, trim, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ [])). %% Precondition, checked before a command is added to the command sequence. @@ -138,6 +143,8 @@ next_state(S, _, {call, _, read_approx, _}) -> next_state(S, _, {call, _, scan_forward, _}) -> S; next_state(S, _, {call, _, fill, _}) -> + S; +next_state(S, _, {call, _, trim, _}) -> S. eqeq(X, X) -> true; @@ -163,12 +170,14 @@ postcondition(_S, {call, _, scan_forward, _}, V) -> _ -> eqeq(V, {todoTODO_fixit,?LINE}) end; -postcondition(_S, {call, _, fill, _}, V) -> +postcondition(_S, {call, _, FillTrim, _}, V) + when FillTrim == fill; FillTrim == trim -> case V of ok -> true; error_trimmed -> true; + error_unwritten -> true; error_overwritten -> true; - _ -> eqeq(V, {fill_error, V}) + _ -> eqeq(V, {error, FillTrim, V}) end. valid_read_result(Pg) when is_binary(Pg) -> true; @@ -305,7 +314,8 @@ check_trace(Trace0, _Cmds, _Seed) -> AllLPNsR = eqc_temporal:stateful( fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; ({call, _Pid, {read, LPN}}) -> LPN; - ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN + ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN; + ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN end, fun(x) -> [] end, Calls), @@ -333,8 +343,6 @@ check_trace(Trace0, _Cmds, _Seed) -> [] end, Events), - %%ModsX = filter_relation_facts(fun(T) when element(4,T) == fill; element(4,T) == trim -> true; (_) -> false end, Mods), - %%io:format("Modsx ~p\n", [ModsX]), %% StartMod contains {mod_start, Ttn, LPN, V} when a modification finished. %% DoneMod contains {mod_end, Ttn, LPN, V} when a modification finished. @@ -675,6 +683,14 @@ fill(#run{seq=Seq, proj=Proj}, SeedInt) -> perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) end). +trim(#run{seq=Seq, proj=Proj}, SeedInt) -> + LPN = pick_an_LPN(Seq, SeedInt) + 2, + ?LOG({trim, LPN}, + begin + Res = corfurl:trim_page(Proj, LPN), + perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN) + end). + perhaps_trip_append_page(false, Res, _Page) -> Res; perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 -> @@ -693,7 +709,7 @@ perhaps_trip_read_approx(true, Res, _LPN) -> perhaps_trip_scan_forward(false, Res, _EndLPN) -> Res; -perhaps_trip_scan_forward(true, _Res, 20) -> +perhaps_trip_scan_forward(true, _Res, 10) -> io:format(user, "TRIP: scan_forward\n", []), <<"magic number bingo, you are a winner">>; perhaps_trip_scan_forward(true, Res, _EndLPN) -> @@ -701,12 +717,20 @@ perhaps_trip_scan_forward(true, Res, _EndLPN) -> perhaps_trip_fill_page(false, Res, _EndLPN) -> Res; -perhaps_trip_fill_page(true, _Res, LPN) when 10 =< LPN, LPN =< 20 -> +perhaps_trip_fill_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 -> io:format(user, "TRIP: fill_page\n", []), ok; % can trigger both invalid ttn and bad read perhaps_trip_fill_page(true, Res, _EndLPN) -> Res. +perhaps_trip_trim_page(false, Res, _EndLPN) -> + Res; +perhaps_trip_trim_page(true, _Res, LPN) when 3 =< LPN, LPN =< 5 -> + io:format(user, "TRIP: trim_page\n", []), + ok; +perhaps_trip_trim_page(true, Res, _EndLPN) -> + Res. + -endif. % PULSE -endif. % TEST From f5c4474669b8a03755d7df87e9e4dbc36cd63c33 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 02:24:06 +0900 Subject: [PATCH 31/70] Derp, turn off TRIP_no_append_duplicates --- prototype/corfurl/test/corfurl_pulse.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 0b4bb27..a5a2c0e 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -65,7 +65,7 @@ %% Define true to fake bad behavior that model **must** notice & fail! -ifndef(TRIP_no_append_duplicates). --define(TRIP_no_append_duplicates, true). +-define(TRIP_no_append_duplicates, false). -endif. -ifndef(TRIP_bad_read). -define(TRIP_bad_read, false). From e7918762123774d8a54f548ca9801a588ff9cd85 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 19:08:33 +0900 Subject: [PATCH 32/70] Fix silly model error when calculating values --- prototype/corfurl/test/corfurl_pulse.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index a5a2c0e..d4a9f3d 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -381,7 +381,7 @@ check_trace(Trace0, _Cmds, _Seed) -> orddict:append(LPN, Pg, D); ({mod_start, WType, LPN, _Pg}, D) when WType == w_ft; WType == w_tt -> - orddict:append(LPN, [error_trimmed], D) + orddict:append(LPN, error_trimmed, D) end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( fun({mod_end, w_1, LPN, Pg}, D) -> From d2562588f28b28825fa01e694800ad40d83dc04d Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 19:16:39 +0900 Subject: [PATCH 33/70] Move the lists:reverse() in make_chains() to preserve input's order in the output --- prototype/corfurl/test/corfurl_pulse.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index d4a9f3d..5dd2a2f 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -566,7 +566,7 @@ make_chains(ChainLen, FLUs) -> make_chains(ChainLen, FLUs, [], []). make_chains(_ChainLen, [], SmallAcc, BigAcc) -> - lists:reverse([SmallAcc|BigAcc]); + [lists:reverse(SmallAcc)|BigAcc]; make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> if length(SmallAcc) == ChainLen -> make_chains(ChainLen, T, [H], [SmallAcc|BigAcc]); From 5edee3a2cf9af35a9f1585a62306318aa1005c62 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 19:17:17 +0900 Subject: [PATCH 34/70] Don't bother adding 2 when picking an LPN for fill & trim --- prototype/corfurl/test/corfurl_pulse.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 5dd2a2f..33b0308 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -676,7 +676,7 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end). fill(#run{seq=Seq, proj=Proj}, SeedInt) -> - LPN = pick_an_LPN(Seq, SeedInt) + 2, + LPN = pick_an_LPN(Seq, SeedInt), ?LOG({fill, LPN}, begin Res = corfurl:fill_page(Proj, LPN), @@ -684,7 +684,7 @@ fill(#run{seq=Seq, proj=Proj}, SeedInt) -> end). trim(#run{seq=Seq, proj=Proj}, SeedInt) -> - LPN = pick_an_LPN(Seq, SeedInt) + 2, + LPN = pick_an_LPN(Seq, SeedInt), ?LOG({trim, LPN}, begin Res = corfurl:trim_page(Proj, LPN), From 431827f65eebccb40b9c45fb0b5c0e86552107c1 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 19:19:16 +0900 Subject: [PATCH 35/70] Allow racing trim/fill and read-repair to simply "win". This exposes a bug in the PULSE model, now that we can have multiple successful fill/trim for the same LPN. --- prototype/corfurl/src/corfurl.erl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index c92a1ac..3ec0418 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -213,7 +213,22 @@ fill_or_trim_page([], _Epoch, _LPN, _Func) -> ok; fill_or_trim_page([H|T], Epoch, LPN, Func) -> case corfurl_flu:Func(flu_pid(H), Epoch, LPN) of - ok -> + Res when Res == ok; Res == error_trimmed -> + %% Detecting a race here between fills and trims is too crazy, + %% and I don't believe that it *matters*. The ickiest one + %% is a race between Proc A = trim and Proc B = read, + %% chain length of 2 or more: + %% Proc A: trim head -> ok + %% Proc B: read tail -> error_unwritten + %% Proc B: read head -> error_trimmed + %% Proc B: trim tail -> ok + %% Proc A: trim tail -> ?? + %% + %% The result that we want that both A & B & any later + %% readers agree that the LPN is trimmed. If the chain is + %% >2, then the procs can win some/all/none of the races + %% to fix up the chain, that's no problem. But don't tell + %% the caller that there was an error during those races. fill_or_trim_page(T, Epoch, LPN, Func); Else -> %% TODO: worth doing anything here, if we're in the middle of chain? From fd32bcb30814c005c5decb964c5ee18939bf0cc7 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 21:06:10 +0900 Subject: [PATCH 36/70] Fix PULSE model to accomodate API change from previous commit. Now 1+ trim & fill transitions are collapsed to a single 'w_t+' atom. The atom name is a bit odd; think about regexps and it hopefully makes sense. --- prototype/corfurl/test/corfurl_pulse.erl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 33b0308..f835b67 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -449,18 +449,19 @@ check_trace(Trace0, _Cmds, _Seed) -> %% following four (4) acceptable transition orderings. {_, _, [{transitions, FinalTtns}]} = lists:last( eqc_temporal:all_future(TransitionsR)), + FinaTtns_filtered = filter_transition_trimfill_suffixes(FinalTtns), InvalidTransitions = orddict:fold( fun(_LPN, [w_0], Acc) -> Acc; (_LPN, [w_0,w_1], Acc) -> Acc; - (_LPN, [w_0,w_ft], Acc) -> + (_LPN, [w_0,'w_t+'], Acc) -> Acc; - (_LPN, [w_0,w_1,w_tt], Acc) -> + (_LPN, [w_0,w_1,'w_t+'], Acc) -> Acc; (LPN, BadTtns, Acc) -> [{LPN, BadTtns}|Acc] - end, [], FinalTtns), + end, [], FinaTtns_filtered), ?WHENFAIL(begin ?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), @@ -600,6 +601,23 @@ filter_relation_facts(FilterFun, R) -> [{TS1, TS2, lists:filter(FilterFun, Facts)} || {TS1, TS2, Facts} <- R]. %% {TS1, TS2, Facts} <- Reads, Fact <- Facts, BadFilter(Fact)], +filter_transition_trimfill_suffixes(Ttns) -> + [{X, filter_1_transition_list(L)} || {X, L} <- Ttns]. + +filter_1_transition_list([]) -> + []; +filter_1_transition_list(Old) -> + New = lists:reverse(lists:dropwhile(fun(w_tt) -> true; + (w_ft) -> true; + (_) -> false + end, lists:reverse(Old))), + Suffix = case lists:last(Old) of + w_ft -> ['w_t+']; + w_tt -> ['w_t+']; + _ -> [] + end, + New ++ Suffix. + log_make_call(Tag) -> log_make_call(self(), Tag). From 370c57b78a6868609fee765b4d2285059a674dd5 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 21:38:02 +0900 Subject: [PATCH 37/70] Bug: corfurl:read_repair_chain() should use trim when it encounters error_trimmed --- prototype/corfurl/src/corfurl.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 3ec0418..9df2b6d 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -130,7 +130,7 @@ read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> error_badepoch; error_trimmed -> %% TODO: robustify - [ok = case corfurl_flu:fill(flu_pid(X), Epoch, LPN) of + [ok = case corfurl_flu:trim(flu_pid(X), Epoch, LPN) of ok -> ok; error_trimmed -> ok; Else -> Else From e40394a3a75d16ebec2c9517303b05e95d05516c Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 22:05:05 +0900 Subject: [PATCH 38/70] Bugfix: yet another race in read_repair, sweet --- prototype/corfurl/src/corfurl.erl | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 9df2b6d..9b36656 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -98,7 +98,7 @@ write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; Else -> - error({left_off_here, ?MODULE, ?LINE, Else}) + giant_error({left_off_here, ?MODULE, ?LINE, Else}) end end. @@ -122,6 +122,13 @@ read_page(#proj{epoch=Epoch} = P, LPN) -> %% Let it crash: error_overwritten end. +ok_or_trim(ok) -> + ok; +ok_or_trim(error_trimmed) -> + ok; +ok_or_trim(Else) -> + Else. + read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of {ok, Page} -> @@ -130,10 +137,11 @@ read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> error_badepoch; error_trimmed -> %% TODO: robustify - [ok = case corfurl_flu:trim(flu_pid(X), Epoch, LPN) of - ok -> ok; - error_trimmed -> ok; - Else -> Else + [ok = case ok_or_trim(corfurl_flu:trim(flu_pid(X), Epoch, LPN)) of + ok -> ok; + error_unwritten -> ok_or_trim(corfurl_flu:fill( + flu_pid(X), Epoch, LPN)); + Else -> Else end || X <- Rest], error_trimmed; error_unwritten -> @@ -160,8 +168,8 @@ read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) -> %% to fix problems? {ok, Page2}; {ok, _Page2} -> - error({bummerbummer, ?MODULE, ?LINE, sanity_check_failure, - lpn, LPN, epoch, Epoch}); + giant_error({bummerbummer, ?MODULE, ?LINE, + sanity_check_failure, lpn, LPN, epoch, Epoch}); error_badepoch -> error_badepoch; error_trimmed -> @@ -241,6 +249,10 @@ flu_pid(X) when is_pid(X) -> flu_pid(X) when is_atom(X) -> ets:lookup_element(flu_pid_tab, X, 1). +giant_error(Err) -> + io:format(user, "GIANT ERROR: ~p\n", [Err]), + exit(Err). + %%%% %%%% %%%% projection utilities %%%% %%%% %%%% new_range(Start, End, ChainList) -> From 8ec5f04903d660daed2213008931223cbecd9639 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 22:06:34 +0900 Subject: [PATCH 39/70] Bug: PULSE found a way to reach a 'left_off_here' corner case, sweet --- prototype/corfurl/src/corfurl.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 9b36656..b92f8f4 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -98,6 +98,7 @@ write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; Else -> + %% Guess what?? PULSE can drive us to this case, excellent! giant_error({left_off_here, ?MODULE, ?LINE, Else}) end end. From 88d44722bef4fdecf85954b22c10c1056a88ecd8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Feb 2014 23:09:57 +0900 Subject: [PATCH 40/70] Fix PULSE model bug of adding multiple same values to orddict --- prototype/corfurl/test/corfurl_pulse.erl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index f835b67..7f05484 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -380,14 +380,20 @@ check_trace(Trace0, _Cmds, _Seed) -> fun({mod_start, w_1, LPN, Pg}, D) -> orddict:append(LPN, Pg, D); ({mod_start, WType, LPN, _Pg}, D) - when WType == w_ft; WType == w_tt -> - orddict:append(LPN, error_trimmed, D) + when WType == w_ft; WType == w_tt -> + case lists:member(error_trimmed, + orddict:fetch(LPN, D)) of + true -> + D; + false -> + orddict:append(LPN, error_trimmed,D) + end end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( fun({mod_end, w_1, LPN, Pg}, D) -> orddict:store(LPN, [Pg], D); ({mod_end, WType, LPN, _Pg}, D) - when WType == w_ft; WType == w_tt -> + when WType == w_ft; WType == w_tt -> orddict:store(LPN, [error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} From 5420e9ca1f35f00e2e5331c1ca6de86b40bc8422 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Fri, 21 Feb 2014 15:28:57 +0900 Subject: [PATCH 41/70] Bugfix for read repair: if trimmed, try fill first then trim --- prototype/corfurl/src/corfurl.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index b92f8f4..ab0b127 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -138,11 +138,11 @@ read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> error_badepoch; error_trimmed -> %% TODO: robustify - [ok = case ok_or_trim(corfurl_flu:trim(flu_pid(X), Epoch, LPN)) of - ok -> ok; - error_unwritten -> ok_or_trim(corfurl_flu:fill( + [ok = case ok_or_trim(corfurl_flu:fill(flu_pid(X), Epoch, LPN)) of + ok -> ok; + error_overwritten -> ok_or_trim(corfurl_flu:trim( flu_pid(X), Epoch, LPN)); - Else -> Else + Else -> Else end || X <- Rest], error_trimmed; error_unwritten -> From a7dd78d8f1652ca9c5c7764878b9d45bd16d20bf Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Fri, 21 Feb 2014 18:01:43 +0900 Subject: [PATCH 42/70] Switch to Lamport clocks for PULSE verifying --- prototype/corfurl/src/corfurl_flu.erl | 77 ++++++++++++------- prototype/corfurl/src/corfurl_sequencer.erl | 18 +++-- prototype/corfurl/test/corfurl_pulse.erl | 37 +++++++-- .../corfurl/test/pulse_util/event_logger.erl | 6 +- .../corfurl/test/pulse_util/lamport_clock.erl | 67 ++++++++++++++++ 5 files changed, 160 insertions(+), 45 deletions(-) create mode 100644 prototype/corfurl/test/pulse_util/lamport_clock.erl diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 1f0dcf6..ba370da 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -37,6 +37,7 @@ -ifdef(TEST). -export([get__mlp/1, get__min_epoch/1, get__trim_watermark/1]). +-compile(export_all). -ifdef(PULSE). -compile({parse_transform, pulse_instrument}). -endif. @@ -72,22 +73,28 @@ stop(Pid) -> write(Pid, Epoch, LogicalPN, PageBin) when is_integer(LogicalPN), LogicalPN > 0, is_binary(PageBin) -> - gen_server:call(Pid, {write, Epoch, LogicalPN, PageBin}, infinity). + g_call(Pid, {write, Epoch, LogicalPN, PageBin}, infinity). read(Pid, Epoch, LogicalPN) when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> - gen_server:call(Pid, {read, Epoch, LogicalPN}, infinity). + g_call(Pid, {read, Epoch, LogicalPN}, infinity). seal(Pid, Epoch) when is_integer(Epoch), Epoch > 0 -> - gen_server:call(Pid, {seal, Epoch}, infinity). + g_call(Pid, {seal, Epoch}, infinity). trim(Pid, Epoch, LogicalPN) when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> - gen_server:call(Pid, {trim, Epoch, LogicalPN}, infinity). + g_call(Pid, {trim, Epoch, LogicalPN}, infinity). fill(Pid, Epoch, LogicalPN) when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> - gen_server:call(Pid, {fill, Epoch, LogicalPN}, infinity). + g_call(Pid, {fill, Epoch, LogicalPN}, infinity). + +g_call(Pid, Arg, Timeout) -> + LC1 = lamport_clock:get(), + {Res, LC2} = gen_server:call(Pid, {Arg, LC1}, Timeout), + lamport_clock:update(LC2), + Res. -ifdef(TEST). @@ -105,6 +112,8 @@ get__trim_watermark(Pid) -> %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% init({Dir, ExpPageSize, ExpMaxMem}) -> + lamport_clock:init(), + MemFile = memfile_path(Dir), filelib:ensure_dir(MemFile), {ok, FH} = file:open(MemFile, [read, write, raw, binary]), @@ -138,49 +147,61 @@ init({Dir, ExpPageSize, ExpMaxMem}) -> handle_call(Call, From, #state{max_logical_page=unknown} = State) -> {noreply, NewState} = handle_info(finish_init, State), handle_call(Call, From, NewState); -handle_call({write, ClientEpoch, _LogicalPN, _PageBin}, _From, +handle_call({{write, ClientEpoch, _LogicalPN, _PageBin}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - {reply, error_badepoch, State}; -handle_call({write, _ClientEpoch, LogicalPN, PageBin}, _From, + LC2 = lamport_clock:update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, #state{max_logical_page=MLPN} = State) -> + LC2 = lamport_clock:update(LC1), case check_write(LogicalPN, PageBin, State) of {ok, Offset} -> ok = write_page(Offset, LogicalPN, PageBin, State), NewMLPN = erlang:max(LogicalPN, MLPN), - {reply, ok, State#state{max_logical_page=NewMLPN}}; + {reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}}; Else -> - {reply, Else, State} + {reply, {Else, LC2}, State} end; -handle_call({read, ClientEpoch, _LogicalPN}, _From, +handle_call({{read, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - {reply, error_badepoch, State}; -handle_call({read, _ClientEpoch, LogicalPN}, _From, State) -> - {reply, read_page(LogicalPN, State), State}; + LC2 = lamport_clock:update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lamport_clock:update(LC1), + {reply, {read_page(LogicalPN, State), LC2}, State}; -handle_call({seal, ClientEpoch}, _From, #state{min_epoch=MinEpoch} = State) +handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch =< MinEpoch -> - {reply, error_badepoch, State}; -handle_call({seal, ClientEpoch}, _From, #state{max_logical_page=MLPN}=State) -> + LC2 = lamport_clock:update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=State) -> + LC2 = lamport_clock:update(LC1), NewState = State#state{min_epoch=ClientEpoch}, ok = write_hard_state(NewState), - {reply, {ok, MLPN}, NewState}; + {reply, {{ok, MLPN}, LC2}, NewState}; -handle_call({trim, ClientEpoch, _LogicalPN}, _From, +handle_call({{trim, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - {reply, error_badepoch, State}; -handle_call({trim, _ClientEpoch, LogicalPN}, _From, State) -> - do_trim_or_fill(trim, LogicalPN, State); + LC2 = lamport_clock:update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lamport_clock:update(LC1), + {Reply, NewState} = do_trim_or_fill(trim, LogicalPN, State), + {reply, {Reply, LC2}, NewState}; -handle_call({fill, ClientEpoch, _LogicalPN}, _From, +handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - {reply, error_badepoch, State}; -handle_call({fill, _ClientEpoch, LogicalPN}, _From, State) -> - do_trim_or_fill(fill, LogicalPN, State); + LC2 = lamport_clock:update(LC1), + {reply, {error_badepoch, LC2}, State}; +handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> + LC2 = lamport_clock:update(LC1), + {Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State), + {reply, {Reply, LC2}, NewState}; handle_call(get__mlp, _From, State) -> {reply, State#state.max_logical_page, State}; @@ -347,9 +368,9 @@ do_trim_or_fill(Op, LogicalPN, true -> ok end, - {reply, ok, NewS}; + {ok, NewS}; Else -> - {reply, Else, S} + {Else, S} end. trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 8b41040..4f14e66 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -50,11 +50,15 @@ stop(Pid) -> gen_server:call(Pid, stop, infinity). get(Pid, NumPages) -> - gen_server:call(Pid, {get, NumPages}, infinity). + {LPN, LC} = gen_server:call(Pid, {get, NumPages, lamport_clock:get()}, + infinity), + lamport_clock:update(LC), + LPN. %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% init({FLUs, TypeOrSeed}) -> + lamport_clock:init(), MLP = get_max_logical_page(FLUs), if TypeOrSeed == standard -> {ok, MLP + 1}; @@ -64,16 +68,19 @@ init({FLUs, TypeOrSeed}) -> {ok, {MLP+1, BadPercent, MaxDifference}} end. -handle_call({get, NumPages}, _From, MLP) when is_integer(MLP) -> - {reply, MLP, MLP + NumPages}; -handle_call({get, NumPages}, _From, {MLP, BadPercent, MaxDifference}) -> +handle_call({get, NumPages, LC}, _From, MLP) when is_integer(MLP) -> + NewLC = lamport_clock:update(LC), + {reply, {MLP, NewLC}, MLP + NumPages}; +handle_call({get, NumPages, LC}, _From, {MLP, BadPercent, MaxDifference}) -> + NewLC = lamport_clock:update(LC), Fudge = case random:uniform(100) of N when N < BadPercent -> random:uniform(MaxDifference * 2) - MaxDifference; _ -> 0 end, - {reply, erlang:max(1, MLP + Fudge), {MLP + NumPages, BadPercent, MaxDifference}}; + {reply, {erlang:max(1, MLP + Fudge), NewLC}, + {MLP + NumPages, BadPercent, MaxDifference}}; handle_call(stop, _From, MLP) -> {stop, normal, ok, MLP}; handle_call(_Request, _From, MLP) -> @@ -87,6 +94,7 @@ handle_info(_Info, MLP) -> {noreply, MLP}. terminate(_Reason, _MLP) -> + %% io:format(user, "C=~w,", [lamport_clock:get()]), ok. code_change(_OldVsn, MLP, _Extra) -> diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 7f05484..01ce1cc 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -46,6 +46,8 @@ eqc:on_output(fun(Str, Args) -> ?QC_FMT(Str, Args) end, P)). -define(MAX_PAGES, 50000). +-define(MY_TAB, i_have_a_name). +-define(MY_KEY, ?MY_TAB). -record(run, { seq, % Sequencer @@ -197,6 +199,8 @@ run_commands_on_node(LocalOrSlave, Cmds, Seed) -> X = try {H, S, Res, Trace} = pulse:run(fun() -> + catch ets:new(?MY_TAB, [public, set, named_table]), + ets:insert(?MY_TAB, {?MY_KEY, undefined}), %% application:start(my_test_app), %% receive after AfterTime -> ok end, {H, S, R} = run_parallel_commands(?MODULE, Cmds), @@ -206,13 +210,15 @@ run_commands_on_node(LocalOrSlave, Cmds, Seed) -> receive after AfterTime -> ok end, Trace = event_logger:get_events(), %% receive after AfterTime -> ok end, + [{_, ThisRun}] = ets:lookup(?MY_TAB, ?MY_KEY), + [clean_up_runtime(ThisRun) || ThisRun /= undefined], + %% stop pulse controller *after* clean_up_runtime(). catch exit(pulse_application_controller, shutdown), {H, S, R, Trace} end, [{seed, Seed}, {strategy, unfair}]), Schedule = pulse:get_schedule(), Errors = gen_event:call(error_logger, handle_errors, get_errors, 60*1000), - [clean_up_runtime(S) || S#state.run /= undefined], {H, S, Res, Trace, Schedule, Errors} catch _:Err -> @@ -283,7 +289,8 @@ check_trace(Trace0, _Cmds, _Seed) -> %% Also, the append might fail, so the model can ignore those %% failures because they're not mutating any state that and %% external viewer can see. - Trace = add_LPN_to_append_calls(Trace0), + %% WARNING: Trace0 + lamport_clocks means Trace0 is not strictly sorted! + Trace = add_LPN_to_append_calls(lists:sort(Trace0)), Events = eqc_temporal:from_timed_list(Trace), %% Example Events, temporal style, 1 usec resolution, same as original trace @@ -470,6 +477,7 @@ check_trace(Trace0, _Cmds, _Seed) -> end, [], FinaTtns_filtered), ?WHENFAIL(begin + ?QC_FMT("*Trace: ~p\n", [Trace]), ?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), ?QC_FMT("*BadReads: ~p\n", [BadReads]) @@ -563,7 +571,7 @@ zipwith(F, [X|Xs], [Y|Ys]) -> [F(X, Y)|zipwith(F, Xs, Ys)]; zipwith(_, _, _) -> []. -clean_up_runtime(#state{run=R} = _S) -> +clean_up_runtime(R) -> %% io:format(user, "clean_up_runtime: run = ~p\n", [R]), catch corfurl_sequencer:stop(R#run.seq), [catch corfurl_flu:stop(F) || F <- R#run.flus], @@ -582,13 +590,16 @@ make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> end. setup(NumChains, ChainLen, PageSize, SeqType) -> + lamport_clock:init(), N = NumChains * ChainLen, FLUs = corfurl_test:setup_basic_flus(N, PageSize, ?MAX_PAGES), {ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType), Chains = make_chains(ChainLen, FLUs), %% io:format(user, "Cs = ~p\n", [Chains]), Proj = corfurl:new_simple_projection(1, 1, ?MAX_PAGES, Chains), - #run{seq=Seq, proj=Proj, flus=FLUs}. + Run = #run{seq=Seq, proj=Proj, flus=FLUs}, + ets:insert(?MY_TAB, {?MY_KEY, Run}), + Run. range_ify([]) -> []; @@ -644,12 +655,14 @@ pick_an_LPN(Seq, SeedInt) -> end. -define(LOG(Tag, MkCall), - event_logger:event(log_make_call(Tag)), + event_logger:event(log_make_call(Tag), lamport_clock:get()), LOG__Result = MkCall, - event_logger:event(log_make_result(LOG__Result)), + event_logger:event(log_make_result(LOG__Result), lamport_clock:get()), LOG__Result). append(#run{seq=Seq, proj=Proj}, Page) -> + lamport_clock:init(), + lamport_clock:incr(), ?LOG({append, Page}, begin Res = corfurl:append_page(Seq, Proj, Page), @@ -662,6 +675,8 @@ read_result_mangle(Else) -> Else. read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), LPN = pick_an_LPN(Seq, SeedInt), ?LOG({read, LPN}, begin @@ -670,6 +685,8 @@ read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> end). scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> + lamport_clock:init(), + lamport_clock:incr(), StartLPN = if SeedInt == 1 -> 1; true -> pick_an_LPN(Seq, SeedInt) end, @@ -679,11 +696,11 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> %% instead from a single-page read_page() call. ?LOG({scan_forward, StartLPN, NumPages}, begin - TS1 = event_logger:timestamp(), + TS1 = lamport_clock:get(), case corfurl:scan_forward(Proj, StartLPN, NumPages) of {ok, EndLPN, MoreP, Pages} -> PageIs = lists:zip(Pages, lists:seq(1, length(Pages))), - TS2 = event_logger:timestamp(), + TS2 = lamport_clock:get(), [begin PidI = {self(), s_f, I}, event_logger:event(log_make_call(PidI, {read, LPN}), @@ -700,6 +717,8 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end). fill(#run{seq=Seq, proj=Proj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), LPN = pick_an_LPN(Seq, SeedInt), ?LOG({fill, LPN}, begin @@ -708,6 +727,8 @@ fill(#run{seq=Seq, proj=Proj}, SeedInt) -> end). trim(#run{seq=Seq, proj=Proj}, SeedInt) -> + lamport_clock:init(), + lamport_clock:incr(), LPN = pick_an_LPN(Seq, SeedInt), ?LOG({trim, LPN}, begin diff --git a/prototype/corfurl/test/pulse_util/event_logger.erl b/prototype/corfurl/test/pulse_util/event_logger.erl index 063ed70..8633b99 100644 --- a/prototype/corfurl/test/pulse_util/event_logger.erl +++ b/prototype/corfurl/test/pulse_util/event_logger.erl @@ -126,10 +126,8 @@ code_change(_OldVsn, State, _Extra) -> %%-------------------------------------------------------------------- add_event(#event{timestamp = Now, data = Data}, State) -> - Event = #event{ timestamp = Now - State#state.start_time, data = Data }, + Event = #event{ timestamp = Now, data = Data }, State#state{ events = [Event|State#state.events] }. timestamp() -> - {A, B, C} = erlang:now(), - 1000000 * (1000000 * A + B) + C. - + lamport_clock:get(). diff --git a/prototype/corfurl/test/pulse_util/lamport_clock.erl b/prototype/corfurl/test/pulse_util/lamport_clock.erl new file mode 100644 index 0000000..65878be --- /dev/null +++ b/prototype/corfurl/test/pulse_util/lamport_clock.erl @@ -0,0 +1,67 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(lamport_clock). + +-export([init/0, get/0, update/1, incr/0]). + +-define(KEY, ?MODULE). + +-ifdef(TEST). + +init() -> + case get(?KEY) of + undefined -> + %% {Ca, Cb, _} = now(), + %% FakeTOD = ((Ca * 1000000) + Cb) * 1000000, + FakeTOD = 0, + put(?KEY, FakeTOD + 1); + N when is_integer(N) -> + ok + end. + +get() -> + get(?KEY). + +update(Remote) -> + New = erlang:max(get(?KEY), Remote) + 1, + put(?KEY, New), + New. + +incr() -> + New = get(?KEY) + 1, + put(?KEY, New), + New. + +-else. % TEST + +init() -> + ok. + +get() -> + ok. + +update(_) -> + ok. + +incr() -> + ok. + +-endif. % TEST From 479efce0b12111b9a11ebf0ec6ce9cb5f9a0767a Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 11:52:31 +0900 Subject: [PATCH 43/70] Make PULSE model aware of read-repair for 'error_trimmed' races The read operation isn't a read-only operation: it can trigger read-repair in the case where a hole is discovered. The PULSE model needs to be aware of this kind of thing. Imagine that we have a 3-way race, between an append to LPN 1, a read of LPN 1, and a trim of LPN 1. There is a single chain of length 3. The FLUs in the chain are annotated below with "F1", "F2", and "F3". Note also the indentation levels, with F1's indented is smaller than F2's << F3's. 2,{call,<0.8748.3>,{append,<<0>>,will_be,1}}}, 4,{call,<0.8746.3>,{read,1}}}, 6,{call,<0.8747.3>,{trim,1,will_fail,error_unwritten}}}, 6, Read has contacted tail of chain, it is unwritten. Time for repair. 6,{read_repair,1,[<0.8741.3>,<0.8742.3>,<0.8743.3>]}}, 6, F1:{flu,write,<0.8741.3>,1,ok}}, 7, F1:{flu,trim,<0.8741.3>,1,ok}}, % by repair 9,{read_repair,1,fill,<0.8742.3>}}, 9, F2:{flu,trim,<0.8742.3>,1,error_unwritten}}, 9,{read_repair,1,<0.8741.3>,trimmed}}, 10,{result,<0.8747.3>,error_unwritten}}, Trim operation from time=6 stops here 10, F2:{flu,write,<0.8742.3>,1,ok}}, 11, F2:{flu,fill,<0.8742.3>,1,error_overwritten}}, 12, F3:{flu,write,<0.8743.3>,1,ok}}, 12,{read_repair,1,fill,<0.8742.3>,overwritten,try_trim}}, 13,{result,<0.8748.3>,{ok,1}}}, % append/write to LPN 1 13, F2:{flu,trim,<0.8742.3>,1,ok}}, 14,{read_repair,1,fill,<0.8743.3>}}, 15, F3:{flu,fill,<0.8743.3>,1,error_overwritten}}, 16,{read_repair,1,fill,<0.8743.3>,overwritten,try_to_trim}}, 17, F3:{flu,trim,<0.8743.3>,1,ok}}, 18,{result,<0.8746.3>,error_trimmed}}] --- prototype/corfurl/test/corfurl_pulse.erl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 01ce1cc..e687dbe 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -320,7 +320,7 @@ check_trace(Trace0, _Cmds, _Seed) -> AllLPNsR = eqc_temporal:stateful( fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; - ({call, _Pid, {read, LPN}}) -> LPN; + ({call, _Pid, {read, LPN, _, _}}) -> LPN; ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN; ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN end, @@ -344,7 +344,9 @@ check_trace(Trace0, _Cmds, _Seed) -> ({call, Pid, {fill, LPN, will_be, ok}}) -> {mod_working, w_ft, LPN, fill, Pid}; ({call, Pid, {trim, LPN, will_be, ok}}) -> - {mod_working, w_tt, LPN, trim, Pid} + {mod_working, w_tt, LPN, trim, Pid}; + ({call, Pid, {read, LPN, will_fail, error_trimmed}}) -> + {mod_working, w_tt, LPN, read_repair_maybe, Pid} end, fun({mod_working, _Ttn, _LPN, _Pg, _Pid}, {result, _Pid, _Res})-> [] @@ -434,7 +436,7 @@ check_trace(Trace0, _Cmds, _Seed) -> %% that appear at any time during the read op's lifetime. Reads = eqc_temporal:stateful( - fun({call, Pid, {read, LPN}}) -> + fun({call, Pid, {read, LPN, _, _}}) -> {read, Pid, LPN, []} end, fun({read, Pid, LPN, V1s}, {values, Values}) -> @@ -519,6 +521,15 @@ add_LPN_to_append_calls([{TS, {call, Pid, {OpName, LPN}}}|Rest]) {TS, {call, Pid, {OpName, LPN, will_fail, Else}}} end, [New|add_LPN_to_append_calls(Rest)]; +add_LPN_to_append_calls([{TS, {call, Pid, {read, LPN}}}|Rest]) -> + Res = trace_lookahead_pid(Pid, Rest), + New = case Res of + Page when is_binary(Page) -> + {TS, {call, Pid, {read, LPN, will_be, Page}}}; + Else -> + {TS, {call, Pid, {read, LPN, will_fail, Else}}} + end, + [New|add_LPN_to_append_calls(Rest)]; add_LPN_to_append_calls([X|Rest]) -> [X|add_LPN_to_append_calls(Rest)]; add_LPN_to_append_calls([]) -> @@ -747,7 +758,7 @@ perhaps_trip_append_page(true, Else, _Page) -> perhaps_trip_read_approx(false, Res, _LPN) -> Res; perhaps_trip_read_approx(true, _Res, 3 = LPN) -> - io:format(user, "TRIP: read_approx LPN ~p", [LPN]), + io:format(user, "TRIP: read_approx LPN ~p\n", [LPN]), <<"FAKE!">>; perhaps_trip_read_approx(true, Res, _LPN) -> Res. From b7e3f91931a2b8f8c88d6be0e19274291dbd6b53 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 12:03:48 +0900 Subject: [PATCH 44/70] Add ?EVENT_LOG() to add extra trace info to corfurl and corfurl_flu --- prototype/corfurl/src/corfurl.erl | 49 +++++++++++++++++++----- prototype/corfurl/src/corfurl_flu.erl | 7 ++++ prototype/corfurl/test/corfurl_pulse.erl | 2 +- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index ab0b127..7a1dd0d 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -36,6 +36,10 @@ -endif. -endif. +%%% Debugging: for extra events in the PULSE event log, use the 2nd statement. +-define(EVENT_LOG(X), ok). +%%% -define(EVENT_LOG(X), event_logger:event(X)). + append_page(Sequencer, P, Page) -> append_page(Sequencer, P, Page, 1). @@ -77,7 +81,6 @@ write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> %% Whoa, partner, you're movin' kinda fast for a trim. %% This might've been due to us being too slow and someone %% else junked us. - %% TODO We should go trim our previously successful writes? error_trimmed; error_overwritten when Nth == 1 -> %% The sequencer lied, or we didn't use the sequencer and @@ -131,49 +134,75 @@ ok_or_trim(Else) -> Else. read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> + ?EVENT_LOG({read_repair, LPN, Chain, i_am, self()}), case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of {ok, Page} -> + ?EVENT_LOG({read_repair, LPN, Head, ok}), read_repair_chain2(Rest, Epoch, LPN, Page, Chain); error_badepoch -> + ?EVENT_LOG({read_repair, LPN, Head, badepoch}), error_badepoch; error_trimmed -> + ?EVENT_LOG({read_repair, LPN, Head, trimmed}), %% TODO: robustify - [ok = case ok_or_trim(corfurl_flu:fill(flu_pid(X), Epoch, LPN)) of - ok -> ok; - error_overwritten -> ok_or_trim(corfurl_flu:trim( - flu_pid(X), Epoch, LPN)); - Else -> Else - end || X <- Rest], + [begin + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X)}), + ok = case ok_or_trim(corfurl_flu:fill(flu_pid(X), Epoch, + LPN)) of + ok -> + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), ok}), + ok; + error_overwritten -> + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), overwritten, try_to_trim}), + Res2 = ok_or_trim(corfurl_flu:trim( + flu_pid(X), Epoch, LPN)), + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), trim, Res2}), + Res2; + Else -> + ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), Else}), + Else + end + end || X <- Rest], error_trimmed; error_unwritten -> + ?EVENT_LOG({read_repair, LPN, read, Head, unwritten}), error_unwritten %% Let it crash: error_overwritten end. read_repair_chain2([] = _Repairees, _Epoch, _LPN, Page, _OriginalChain) -> + ?EVENT_LOG({read_repair2, _LPN, finished, {ok, Page}}), {ok, Page}; read_repair_chain2([RepairFLU|Rest], Epoch, LPN, Page, OriginalChain) -> case corfurl_flu:write(flu_pid(RepairFLU), Epoch, LPN, Page) of ok -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), ok}), read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain); error_badepoch -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), badepoch}), error_badepoch; error_trimmed -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), trimmed}), error_trimmed; error_overwritten -> + ?EVENT_LOG({read_repair2, LPN, write, flu_pid(RepairFLU), overwritten}), %% We're going to do an optional sanity check here. %% TODO: make the sanity check configurable? case corfurl_flu:read(flu_pid(RepairFLU), Epoch, LPN) of {ok, Page2} when Page2 =:= Page -> - %% TODO: is there a need to continue working upstream - %% to fix problems? - {ok, Page2}; + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), exact_page}), + %% We're probably going to be racing against someone else + %% that's also doing repair, but so be it. + read_repair_chain2(Rest, Epoch, LPN, Page, OriginalChain); {ok, _Page2} -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), bad_page, _Page2}), giant_error({bummerbummer, ?MODULE, ?LINE, sanity_check_failure, lpn, LPN, epoch, Epoch}); error_badepoch -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), badepoch}), error_badepoch; error_trimmed -> + ?EVENT_LOG({read_repair2, LPN, read, flu_pid(RepairFLU), trimmed}), %% Start repair at the beginning to handle this case read_repair_chain(Epoch, LPN, OriginalChain) %% Let it crash: error_overwritten, error_unwritten diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index ba370da..d847d5d 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -45,6 +45,10 @@ -include_lib("kernel/include/file.hrl"). +%%% Debugging: for extra events in the PULSE event log, use the 2nd statement. +-define(EVENT_LOG(X), ok). +%%% -define(EVENT_LOG(X), event_logger(X)). + -record(state, { dir :: string(), mem_fh :: term(), @@ -159,6 +163,7 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, {ok, Offset} -> ok = write_page(Offset, LogicalPN, PageBin, State), NewMLPN = erlang:max(LogicalPN, MLPN), + ?EVENT_LOG({flu, write, self(), LogicalPN, ok}), {reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}}; Else -> {reply, {Else, LC2}, State} @@ -191,6 +196,7 @@ handle_call({{trim, ClientEpoch, _LogicalPN}, LC1}, _From, handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> LC2 = lamport_clock:update(LC1), {Reply, NewState} = do_trim_or_fill(trim, LogicalPN, State), + ?EVENT_LOG({flu, trim, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, NewState}; handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, @@ -201,6 +207,7 @@ handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> LC2 = lamport_clock:update(LC1), {Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State), + ?EVENT_LOG({flu, fill, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, NewState}; handle_call(get__mlp, _From, State) -> diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index e687dbe..7a3c645 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -595,7 +595,7 @@ make_chains(_ChainLen, [], SmallAcc, BigAcc) -> [lists:reverse(SmallAcc)|BigAcc]; make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> if length(SmallAcc) == ChainLen -> - make_chains(ChainLen, T, [H], [SmallAcc|BigAcc]); + make_chains(ChainLen, T, [H], [lists:reverse(SmallAcc)|BigAcc]); true -> make_chains(ChainLen, T, [H|SmallAcc], BigAcc) end. From d077148b47e159193fea971aa0d99cbb642ca973 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 18:22:40 +0900 Subject: [PATCH 45/70] Attempt to fix unimplemented corner case, thanks PULSE! --- prototype/corfurl/.gitignore | 1 + prototype/corfurl/src/corfurl.erl | 21 +++++++++++++++------ prototype/corfurl/src/corfurl_flu.erl | 20 +++++++++++++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/prototype/corfurl/.gitignore b/prototype/corfurl/.gitignore index 661af0e..91f0c9c 100644 --- a/prototype/corfurl/.gitignore +++ b/prototype/corfurl/.gitignore @@ -4,3 +4,4 @@ current_counterexample.eqc deps ebin/*.beam ebin/*.app +erl_crash.dump diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 7a1dd0d..0511518 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -62,14 +62,14 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> Chain = project_to_chain(LPN, P), - write_single_page_to_chain(Chain, Epoch, LPN, Page, 1). + write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). -write_single_page_to_chain([], _Epoch, _LPN, _Page, _Nth) -> +write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth) -> ok; -write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> +write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) -> case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of ok -> - write_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1); error_badepoch -> %% TODO: Interesting case: there may be cases where retrying with %% a new epoch & that epoch's projection is just fine (and @@ -96,13 +96,22 @@ write_single_page_to_chain([FLU|Rest], Epoch, LPN, Page, Nth) -> {ok, AlreadyThere} when AlreadyThere =:= Page -> %% Alright, well, let's go continue the repair/writing, %% since we agree on the page's value. - write_single_page_to_chain(Rest, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1); error_badepoch -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; + error_trimmed -> + %% PULSE can drive us to this case, excellent! + %% We had a race with read-repair and lost (the write). + %% Now this read failed with error_trimmed because we + %% lost a race with someone trimming this block. + %% Let's be paranoid and repair, just in case. + OurResult = error_trimmed, + error_trimmed = read_repair_chain(Epoch, LPN, Chain), + OurResult; Else -> %% Guess what?? PULSE can drive us to this case, excellent! - giant_error({left_off_here, ?MODULE, ?LINE, Else}) + giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth}) end end. diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index d847d5d..8951b5a 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -40,6 +40,7 @@ -compile(export_all). -ifdef(PULSE). -compile({parse_transform, pulse_instrument}). +-compile({pulse_skip,[{msc, 3}]}). -endif. -endif. @@ -47,6 +48,7 @@ %%% Debugging: for extra events in the PULSE event log, use the 2nd statement. -define(EVENT_LOG(X), ok). +%% -define(EVENT_LOG(X), erlang:display(X)). %%% -define(EVENT_LOG(X), event_logger(X)). -record(state, { @@ -96,7 +98,9 @@ fill(Pid, Epoch, LogicalPN) g_call(Pid, Arg, Timeout) -> LC1 = lamport_clock:get(), + msc(self(), Pid, Arg), {Res, LC2} = gen_server:call(Pid, {Arg, LC1}, Timeout), + msc(Pid, self(), Res), lamport_clock:update(LC2), Res. @@ -176,7 +180,9 @@ handle_call({{read, ClientEpoch, _LogicalPN}, LC1}, _From, {reply, {error_badepoch, LC2}, State}; handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> LC2 = lamport_clock:update(LC1), - {reply, {read_page(LogicalPN, State), LC2}, State}; + Reply = read_page(LogicalPN, State), + ?EVENT_LOG({flu, read, self(), LogicalPN, Reply}), + {reply, {Reply, LC2}, State}; handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch =< MinEpoch -> @@ -409,3 +415,15 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> true -> badarg end. + +-ifdef(PULSE_HACKING). +%% Create a trace file that can be formatted by "mscgen" utility. +%% Lots of hand-editing is required after creating the file, sorry! +msc(_From, _To, _Tag) -> + {ok, FH} = file:open("/tmp/goo", [write, append]), + io:format(FH, " \"~w\" -> \"~w\" [ label = \"~w\" ] ;\n", [_From, _To, _Tag]), + file:close(FH). +-else. % PULSE_HACKING +msc(_From, _To, _Tag) -> + ok. +-endif. % PULSE_HACkING From 13e15e0ecf8ae0e5e19ab98aca22c1f5f52dee09 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 18:24:07 +0900 Subject: [PATCH 46/70] Add MSC charts to help explain BAD-looking trim race --- .../corfurl/docs/corfurl/notes/README.md | 23 +++++++ .../corfurl/notes/read-repair-race.1.mscgen | 49 +++++++++++++++ .../corfurl/notes/read-repair-race.2.mscgen | 60 +++++++++++++++++++ .../corfurl/notes/read-repair-race.2b.mscgen | 57 ++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 prototype/corfurl/docs/corfurl/notes/README.md create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen diff --git a/prototype/corfurl/docs/corfurl/notes/README.md b/prototype/corfurl/docs/corfurl/notes/README.md new file mode 100644 index 0000000..337a34b --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/README.md @@ -0,0 +1,23 @@ + +## read-repair-race.1. + +First attempt at using "mscgen" to make some Message Sequence +Chart (MSC) for a race found at commit 087c2605ab. + + +## read-repair-race.2. + +Second attempt. This is almost exactly the trace that is +generated by this failing test case at commit 087c2605ab: + + C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]]. + eqc:check(corfurl_pulse:prop_pulse(), C2). + +## read-repair-race.2b.* + +Same basic condition as read-repair-race.2, but edited +substantially to make it clearer what is happening. +Also for commit 087c2605ab. + +I believe that I have a fix for the silver-colored +`error-overwritten`, but the correctness of it remains to be seen. diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen new file mode 100644 index 0000000..1cbec57 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen @@ -0,0 +1,49 @@ +msc { + "<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"]; + + "<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"]; + "<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ; + "<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"]; + "<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ; + "<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ; +} diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen new file mode 100644 index 0000000..9afffe2 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen @@ -0,0 +1,60 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; +} diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen new file mode 100644 index 0000000..978dc72 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen @@ -0,0 +1,57 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; +} From eabebac6f28ebc0054254973fd50a2dc7b7cb93b Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 21:34:09 +0900 Subject: [PATCH 47/70] Fix PULSE model difficulty of how to handle races between write & trim. This trim race is (as far as I can tell) fine -- I see no correctness problem with CORFU, on the client side or the server side. However, this race with a trim causes a model problem that I believe can be solved this way: 1. We must keep track of the fact that the page write is happening: someone can notice the write via read-repair or even a regular read by the tail. We do this in basically the way that all other writes are handled in the ValuesR relation. 2. Add new code to client-side writer: if there's a trim race, *and* if we're using PULSE, then return a special error code that says that the write was ok *and* that we raced with trim. 2b. If we aren't using pulse, just return {ok, LPN}. 3. For the transition check property, treat the new return code as if it is a w_tt. Actually, we use a special marker atom, w_special_trimmed for that purpose, but it is later treated the same way that w_tt is by the filter_transition_trimfill_suffixes() filter. --- prototype/corfurl/src/corfurl.erl | 37 ++++++++++++++---------- prototype/corfurl/src/corfurl_flu.erl | 16 ++++++++-- prototype/corfurl/test/corfurl_pulse.erl | 33 +++++++++++++++++---- prototype/corfurl/test/corfurl_test.erl | 3 +- 4 files changed, 64 insertions(+), 25 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 0511518..6936063 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -52,6 +52,8 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> X when X == error_overwritten; X == error_trimmed -> report_lost_race(LPN, X), append_page(Sequencer, P, Page); + {special_trimmed, LPN}=XX -> + XX; Else -> exit({todo, ?MODULE, line, ?LINE, Else}) end; @@ -62,14 +64,14 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> Chain = project_to_chain(LPN, P), - write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). + write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1, ok). -write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth) -> - ok; -write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) -> +write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) -> + Reply; +write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of ok -> - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); error_badepoch -> %% TODO: Interesting case: there may be cases where retrying with %% a new epoch & that epoch's projection is just fine (and @@ -77,11 +79,19 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) -> %% Figure out what those cases are, then for the %% destined-to-fail case, try to clean up (via trim?)? error_badepoch; - error_trimmed -> + error_trimmed when Nth == 1 -> %% Whoa, partner, you're movin' kinda fast for a trim. %% This might've been due to us being too slow and someone %% else junked us. error_trimmed; + error_trimmed when Nth > 1 -> + %% We're racing with a trimmer. We won the race at head, + %% but here in the middle or tail (Nth > 1), we lost. + %% Our strategy is keep racing down to the tail. + %% If we continue to lose the exact same race for the rest + %% of the chain, the 1st clause of this func will return 'ok'. + %% That is *exactly* our intent and purpose! + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); error_overwritten when Nth == 1 -> %% The sequencer lied, or we didn't use the sequencer and %% guessed and guessed poorly, or someone is accidentally @@ -96,21 +106,16 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) -> {ok, AlreadyThere} when AlreadyThere =:= Page -> %% Alright, well, let's go continue the repair/writing, %% since we agree on the page's value. - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1); + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); error_badepoch -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; error_trimmed -> - %% PULSE can drive us to this case, excellent! - %% We had a race with read-repair and lost (the write). - %% Now this read failed with error_trimmed because we - %% lost a race with someone trimming this block. - %% Let's be paranoid and repair, just in case. - OurResult = error_trimmed, - error_trimmed = read_repair_chain(Epoch, LPN, Chain), - OurResult; + %% This is the same as 'error_trimmed when Nth > 1' above. + %% Do the same thing. + write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); Else -> - %% Guess what?? PULSE can drive us to this case, excellent! + %% Can PULSE can drive us to this case? giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth}) end end. diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 8951b5a..c55dc72 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -48,8 +48,7 @@ %%% Debugging: for extra events in the PULSE event log, use the 2nd statement. -define(EVENT_LOG(X), ok). -%% -define(EVENT_LOG(X), erlang:display(X)). -%%% -define(EVENT_LOG(X), event_logger(X)). +%% -define(EVENT_LOG(X), event_logger:event(X)). -record(state, { dir :: string(), @@ -170,6 +169,7 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, ?EVENT_LOG({flu, write, self(), LogicalPN, ok}), {reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}}; Else -> + ?EVENT_LOG({flu, write, self(), LogicalPN, Else}), {reply, {Else, LC2}, State} end; @@ -416,6 +416,18 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) -> badarg end. +-ifdef(PULSE). +%% We do *not* want to remove any special PULSE return code. +undo_special_pulse_test_result(Res) -> + Res. +-else. % PULSE +undo_special_pulse_test_result({special_trimmed, LPN}) -> + {ok, LPN}; +undo_special_pulse_test_result(Res) -> + Res. +-endif. % PULSE + + -ifdef(PULSE_HACKING). %% Create a trace file that can be formatted by "mscgen" utility. %% Lots of hand-editing is required after creating the file, sorry! diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 7a3c645..58364b2 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -156,6 +156,9 @@ postcondition(_S, {call, _, setup, _}, #run{} = _V) -> true; postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) -> true; +postcondition(_S, {call, _, append, _}, {special_trimmed, LPN}) + when is_integer(LPN) -> + true; postcondition(_S, {call, _, append, _}, V) -> eqeq(V, todoTODO_fixit); postcondition(_S, {call, _, read_approx, _}, V) -> @@ -320,6 +323,7 @@ check_trace(Trace0, _Cmds, _Seed) -> AllLPNsR = eqc_temporal:stateful( fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN; + ({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN; ({call, _Pid, {read, LPN, _, _}}) -> LPN; ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN; ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN @@ -341,6 +345,14 @@ check_trace(Trace0, _Cmds, _Seed) -> Mods = eqc_temporal:stateful( fun({call, Pid, {append, Pg, will_be, LPN}}) -> {mod_working, w_1, LPN, Pg, Pid}; + ({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) -> + %% This is a special case for the model. We know that + %% a write raced with a trim and lost (at least some of + %% the time inside the chain). But the transition that + %% we model in this case is a special w_ type that is + %% is trated specially by the dictionary-making + %% creation of the ValuesR relation. + {mod_working, w_special_trimmed, LPN, Pg, Pid}; ({call, Pid, {fill, LPN, will_be, ok}}) -> {mod_working, w_ft, LPN, fill, Pid}; ({call, Pid, {trim, LPN, will_be, ok}}) -> @@ -396,13 +408,17 @@ check_trace(Trace0, _Cmds, _Seed) -> D; false -> orddict:append(LPN, error_trimmed,D) - end + end; + ({mod_start, w_special_trimmed, LPN, Pg}, D)-> + orddict:append(LPN, Pg, D) end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( fun({mod_end, w_1, LPN, Pg}, D) -> orddict:store(LPN, [Pg], D); ({mod_end, WType, LPN, _Pg}, D) when WType == w_ft; WType == w_tt -> + orddict:store(LPN, [error_trimmed], D); + ({mod_end, w_special_trimmed, LPN, _Pg}, D) -> orddict:store(LPN, [error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} @@ -635,14 +651,19 @@ filter_transition_trimfill_suffixes(Ttns) -> filter_1_transition_list([]) -> []; filter_1_transition_list(Old) -> - New = lists:reverse(lists:dropwhile(fun(w_tt) -> true; - (w_ft) -> true; + %% Strategy: Chop off all of the w_* at the end, then look at **Old** to + %% see if we chopped off any. If we did chop off any, then add back a + %% constant 'w_t+' as a suffix. + New = lists:reverse(lists:dropwhile(fun(w_tt) -> true; + (w_ft) -> true; + (w_special_trimmed) -> true; (_) -> false end, lists:reverse(Old))), Suffix = case lists:last(Old) of - w_ft -> ['w_t+']; - w_tt -> ['w_t+']; - _ -> [] + w_ft -> ['w_t+']; + w_tt -> ['w_t+']; + w_special_trimmed -> ['w_t+']; + _ -> [] end, New ++ Suffix. diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 4490131..1cf12e0 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -33,7 +33,8 @@ setup_flu_basedir() -> - "/tmp/" ++ atom_to_list(?MODULE) ++ ".". + "./tmp." ++ + atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".". setup_flu_dir(N) -> setup_flu_basedir() ++ integer_to_list(N). From 638a45e8cb1982d69de38f2003603be0b685a581 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 25 Feb 2014 14:53:35 +0900 Subject: [PATCH 48/70] Partial fix for model problem in honest write-vs-trim race --- prototype/corfurl/test/corfurl_pulse.erl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 58364b2..98f295b 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -414,7 +414,14 @@ check_trace(Trace0, _Cmds, _Seed) -> end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]), Dict3 = lists:foldl( fun({mod_end, w_1, LPN, Pg}, D) -> - orddict:store(LPN, [Pg], D); + Vs1 = orddict:fetch(LPN, D), + %% We've written a page. error_unwriten is + %% now impossible; any other binary() is + %% also impossible. However, there may be + %% a trim operation that's still in flight! + Vs2 = [V || V <- Vs1, V /= error_unwritten, + not is_binary(V)], + orddict:store(LPN, [Pg|Vs2], D); ({mod_end, WType, LPN, _Pg}, D) when WType == w_ft; WType == w_tt -> orddict:store(LPN, [error_trimmed], D); From 20a2a5164935fdcf94b3e639229a423b82e7aac8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 25 Feb 2014 15:00:43 +0900 Subject: [PATCH 49/70] Partial fix (#2 of 2) for model problem in honest write-vs-trim race --- prototype/corfurl/test/corfurl_pulse.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 98f295b..0f8eac6 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -425,8 +425,8 @@ check_trace(Trace0, _Cmds, _Seed) -> ({mod_end, WType, LPN, _Pg}, D) when WType == w_ft; WType == w_tt -> orddict:store(LPN, [error_trimmed], D); - ({mod_end, w_special_trimmed, LPN, _Pg}, D) -> - orddict:store(LPN, [error_trimmed], D) + ({mod_end, w_special_trimmed, LPN, Pg}, D) -> + orddict:store(LPN, [Pg,error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} end, InitialValDict, StartsDones), From a64a09338ddafa6bb5628d1404d2e64f0bb6efc3 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 25 Feb 2014 16:15:01 +0900 Subject: [PATCH 50/70] Fix broken EUnit tests (been in PULSE land too long) --- prototype/corfurl/src/corfurl.erl | 5 +- prototype/corfurl/src/corfurl_flu.erl | 53 ++++++++---- prototype/corfurl/src/corfurl_sequencer.erl | 74 ++++++----------- prototype/corfurl/test/corfurl_pulse.erl | 2 + .../corfurl/test/corfurl_sequencer_test.erl | 80 +++++++++++++++++++ prototype/corfurl/test/corfurl_test.erl | 2 +- 6 files changed, 151 insertions(+), 65 deletions(-) create mode 100644 prototype/corfurl/test/corfurl_sequencer_test.erl diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 6936063..38332a8 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -64,7 +64,10 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> Chain = project_to_chain(LPN, P), - write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1, ok). + write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). + +write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth) -> + write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth, ok). write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) -> Reply; diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index c55dc72..fc73173 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -93,14 +93,15 @@ trim(Pid, Epoch, LogicalPN) fill(Pid, Epoch, LogicalPN) when is_integer(Epoch), Epoch > 0, is_integer(LogicalPN), LogicalPN > 0 -> - g_call(Pid, {fill, Epoch, LogicalPN}, infinity). + Res = g_call(Pid, {fill, Epoch, LogicalPN}, infinity), + undo_special_pulse_test_result(Res). g_call(Pid, Arg, Timeout) -> - LC1 = lamport_clock:get(), + LC1 = lclock_get(), msc(self(), Pid, Arg), {Res, LC2} = gen_server:call(Pid, {Arg, LC1}, Timeout), msc(Pid, self(), Res), - lamport_clock:update(LC2), + lclock_update(LC2), Res. -ifdef(TEST). @@ -119,7 +120,7 @@ get__trim_watermark(Pid) -> %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% init({Dir, ExpPageSize, ExpMaxMem}) -> - lamport_clock:init(), + lclock_init(), MemFile = memfile_path(Dir), filelib:ensure_dir(MemFile), @@ -157,11 +158,11 @@ handle_call(Call, From, #state{max_logical_page=unknown} = State) -> handle_call({{write, ClientEpoch, _LogicalPN, _PageBin}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, #state{max_logical_page=MLPN} = State) -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), case check_write(LogicalPN, PageBin, State) of {ok, Offset} -> ok = write_page(Offset, LogicalPN, PageBin, State), @@ -176,20 +177,20 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From, handle_call({{read, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), Reply = read_page(LogicalPN, State), ?EVENT_LOG({flu, read, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, State}; handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch =< MinEpoch -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=State) -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), NewState = State#state{min_epoch=ClientEpoch}, ok = write_hard_state(NewState), {reply, {{ok, MLPN}, LC2}, NewState}; @@ -197,10 +198,10 @@ handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=Sta handle_call({{trim, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {Reply, NewState} = do_trim_or_fill(trim, LogicalPN, State), ?EVENT_LOG({flu, trim, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, NewState}; @@ -208,10 +209,10 @@ handle_call({{trim, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, #state{min_epoch=MinEpoch} = State) when ClientEpoch < MinEpoch -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> - LC2 = lamport_clock:update(LC1), + LC2 = lclock_update(LC1), {Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State), ?EVENT_LOG({flu, fill, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, NewState}; @@ -439,3 +440,27 @@ msc(_From, _To, _Tag) -> msc(_From, _To, _Tag) -> ok. -endif. % PULSE_HACkING + +-ifdef(PULSE). + +lclock_init() -> + lamport_clock:init(). + +lclock_get() -> + lamport_clock:get(). + +lclock_update(LC) -> + lamport_clock:update(LC). + +-else. % PULSE + +lclock_init() -> + ok. + +lclock_get() -> + ok. + +lclock_update(_LC) -> + ok. + +-endif. % PLUSE diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 4f14e66..f600713 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -33,6 +33,7 @@ -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-compile(export_all). -ifdef(PULSE). -compile({parse_transform, pulse_instrument}). -endif. @@ -50,15 +51,15 @@ stop(Pid) -> gen_server:call(Pid, stop, infinity). get(Pid, NumPages) -> - {LPN, LC} = gen_server:call(Pid, {get, NumPages, lamport_clock:get()}, + {LPN, LC} = gen_server:call(Pid, {get, NumPages, lclock_get()}, infinity), - lamport_clock:update(LC), + lclock_update(LC), LPN. %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% init({FLUs, TypeOrSeed}) -> - lamport_clock:init(), + lclock_init(), MLP = get_max_logical_page(FLUs), if TypeOrSeed == standard -> {ok, MLP + 1}; @@ -69,10 +70,10 @@ init({FLUs, TypeOrSeed}) -> end. handle_call({get, NumPages, LC}, _From, MLP) when is_integer(MLP) -> - NewLC = lamport_clock:update(LC), + NewLC = lclock_update(LC), {reply, {MLP, NewLC}, MLP + NumPages}; handle_call({get, NumPages, LC}, _From, {MLP, BadPercent, MaxDifference}) -> - NewLC = lamport_clock:update(LC), + NewLC = lclock_update(LC), Fudge = case random:uniform(100) of N when N < BadPercent -> random:uniform(MaxDifference * 2) - MaxDifference; @@ -94,7 +95,7 @@ handle_info(_Info, MLP) -> {noreply, MLP}. terminate(_Reason, _MLP) -> - %% io:format(user, "C=~w,", [lamport_clock:get()]), + %% io:format(user, "C=~w,", [lclock_get()]), ok. code_change(_OldVsn, MLP, _Extra) -> @@ -107,51 +108,26 @@ get_max_logical_page(FLUs) -> FLU <- FLUs, {ok, Ps} <- [corfurl_flu:status(FLU)]]). -%%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% %%%% +-ifdef(PULSE). --ifdef(TEST). --ifndef(PULSE). +lclock_init() -> + lamport_clock:init(). -smoke_test() -> - BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".", - PageSize = 8, - NumPages = 500, - NumFLUs = 4, - MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end, - Del = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) || - X <- lists:seq(1, NumFLUs)] end, +lclock_get() -> + lamport_clock:get(). - Del(), - FLUs = [begin - element(2, corfurl_flu:start_link(MyDir(X), - PageSize, NumPages*PageSize)) - end || X <- lists:seq(1, NumFLUs)], - FLUsNums = lists:zip(FLUs, lists:seq(1, NumFLUs)), - - try - [ok = corfurl_flu:write(FLU, 1, PageNum, <<42:(8*8)>>) || - {FLU, PageNum} <- FLUsNums], - MLP0 = NumFLUs, - NumFLUs = get_max_logical_page(FLUs), +lclock_update(LC) -> + lamport_clock:update(LC). - %% Excellent. Now let's start the sequencer and see if it gets - %% the same answer. If yes, then the first get will return MLP1, - %% yadda yadda. - MLP1 = MLP0 + 1, - MLP3 = MLP0 + 3, - MLP4 = MLP0 + 4, - {ok, Sequencer} = start_link(FLUs), - try - MLP1 = get(Sequencer, 2), - MLP3 = get(Sequencer, 1), - MLP4 = get(Sequencer, 1) - after - stop(Sequencer) - end - after - [ok = corfurl_flu:stop(FLU) || FLU <- FLUs], - Del() - end. +-else. % PULSE --endif. % not PULSE --endif. % TEST +lclock_init() -> + ok. + +lclock_get() -> + ok. + +lclock_update(_LC) -> + ok. + +-endif. % PLUSE diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 0f8eac6..72eb6bd 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -505,6 +505,8 @@ check_trace(Trace0, _Cmds, _Seed) -> ?QC_FMT("*Trace: ~p\n", [Trace]), ?QC_FMT("*ModsReads: ~p\n", [eqc_temporal:unions([Mods,Reads])]), ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), + ?QC_FMT("*ValuesR: ~p\n", [eqc_temporal:unions([ValuesR, StartsDones])]), + ?QC_FMT("*Calls: ~p\n", [Calls]), ?QC_FMT("*BadReads: ~p\n", [BadReads]) end, conjunction( diff --git a/prototype/corfurl/test/corfurl_sequencer_test.erl b/prototype/corfurl/test/corfurl_sequencer_test.erl new file mode 100644 index 0000000..0bef793 --- /dev/null +++ b/prototype/corfurl/test/corfurl_sequencer_test.erl @@ -0,0 +1,80 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_sequencer_test). + +-compile(export_all). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. +-endif. + +-define(M, corfurl_sequencer). + +-ifdef(TEST). +-ifndef(PULSE). + +smoke_test() -> + BaseDir = "/tmp/" ++ atom_to_list(?MODULE) ++ ".", + PageSize = 8, + NumPages = 500, + NumFLUs = 4, + MyDir = fun(X) -> BaseDir ++ integer_to_list(X) end, + Del = fun() -> [ok = corfurl_util:delete_dir(MyDir(X)) || + X <- lists:seq(1, NumFLUs)] end, + + Del(), + FLUs = [begin + element(2, corfurl_flu:start_link(MyDir(X), + PageSize, NumPages*PageSize)) + end || X <- lists:seq(1, NumFLUs)], + FLUsNums = lists:zip(FLUs, lists:seq(1, NumFLUs)), + + try + [ok = corfurl_flu:write(FLU, 1, PageNum, <<42:(8*8)>>) || + {FLU, PageNum} <- FLUsNums], + MLP0 = NumFLUs, + NumFLUs = ?M:get_max_logical_page(FLUs), + + %% Excellent. Now let's start the sequencer and see if it gets + %% the same answer. If yes, then the first get will return MLP1, + %% yadda yadda. + MLP1 = MLP0 + 1, + MLP3 = MLP0 + 3, + MLP4 = MLP0 + 4, + {ok, Sequencer} = ?M:start_link(FLUs), + try + MLP1 = ?M:get(Sequencer, 2), + MLP3 = ?M:get(Sequencer, 1), + MLP4 = ?M:get(Sequencer, 1) + after + ?M:stop(Sequencer) + end + after + [ok = corfurl_flu:stop(FLU) || FLU <- FLUs], + Del() + end. + +-endif. % not PULSE +-endif. % TEST diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 1cf12e0..c745538 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -106,7 +106,7 @@ smoke1_test() -> %% Simulate a failed write to the chain. [F6a, F6b, F6c] = Chain6 = ?M:project_to_chain(6, P1), NotHead6 = [F6b, F6c], - ok = ?M:write_single_page_to_chain([F6a], Epoch, 6, Pg6, 1), + ok = ?M:write_single_page_to_chain([F6a], [F6a], Epoch, 6, Pg6, 1), %% Does the chain look as expected? {ok, Pg6} = corfurl_flu:read(?M:flu_pid(F6a), Epoch, 6), From d5091358ffb4f878dec19dbd2eaaafbdd8e926e1 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 26 Feb 2014 16:59:28 +0900 Subject: [PATCH 51/70] Put the sequencer pid inside the projection --- prototype/corfurl/include/corfurl.hrl | 3 ++ prototype/corfurl/src/corfurl.erl | 19 +++++++---- prototype/corfurl/src/corfurl_sequencer.erl | 5 ++- prototype/corfurl/test/corfurl_pulse.erl | 35 +++++++++++---------- prototype/corfurl/test/corfurl_test.erl | 16 +++++----- 5 files changed, 47 insertions(+), 31 deletions(-) diff --git a/prototype/corfurl/include/corfurl.hrl b/prototype/corfurl/include/corfurl.hrl index e3b2b28..8bb452e 100644 --- a/prototype/corfurl/include/corfurl.hrl +++ b/prototype/corfurl/include/corfurl.hrl @@ -22,6 +22,8 @@ -type flu() :: pid() | flu_name(). -type flu_chain() :: [flu()]. +-type seq_name() :: {'undefined' | pid(), atom(), atom()}. + -record(range, { pn_start :: non_neg_integer(), % start page number pn_end :: non_neg_integer(), % end page number @@ -30,6 +32,7 @@ -record(proj, { % Projection epoch :: non_neg_integer(), + seq :: 'undefined' | seq_name(), r :: [#range{}] }). diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 38332a8..7ccc9c0 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -24,7 +24,7 @@ new_range/3, read_projection/2, save_projection/2]). --export([append_page/3, read_page/2, scan_forward/3, +-export([append_page/2, read_page/2, scan_forward/3, fill_page/2, trim_page/2]). -include("corfurl.hrl"). @@ -40,10 +40,17 @@ -define(EVENT_LOG(X), ok). %%% -define(EVENT_LOG(X), event_logger:event(X)). -append_page(Sequencer, P, Page) -> - append_page(Sequencer, P, Page, 1). +append_page(P, Page) -> + append_page(P, Page, 1). -append_page(Sequencer, P, Page, Retries) when Retries < 50 -> +append_page(#proj{seq={undefined, SeqHost, SeqName}} = P, Page, Retries) -> + case rpc:call(SeqHost, erlang, whereis, [SeqName]) of + SeqPid when is_pid(SeqPid) -> + append_page(P#proj{seq={SeqPid, SeqHost, SeqName}}, Page, Retries); + Else -> + exit({bummer, mod, ?MODULE, line, ?LINE, error, Else}) + end; +append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) when Retries < 50 -> case corfurl_sequencer:get(Sequencer, 1) of LPN when is_integer(LPN) -> case write_single_page(P, LPN, Page) of @@ -51,7 +58,7 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> {ok, LPN}; X when X == error_overwritten; X == error_trimmed -> report_lost_race(LPN, X), - append_page(Sequencer, P, Page); + append_page(P, Page); {special_trimmed, LPN}=XX -> XX; Else -> @@ -59,7 +66,7 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 -> end; _ -> timer:sleep(Retries), % TODO naive - append_page(Sequencer, P, Page, Retries * 2) + append_page(P, Page, Retries * 2) end. write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index f600713..8a0240f 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -45,7 +45,10 @@ start_link(FLUs) -> start_link(FLUs, standard). start_link(FLUs, SeqType) -> - gen_server:start_link(?MODULE, {FLUs, SeqType}, []). + start_link(FLUs, SeqType, ?SERVER). + +start_link(FLUs, SeqType, RegName) -> + gen_server:start_link({local, RegName}, ?MODULE, {FLUs, SeqType}, []). stop(Pid) -> gen_server:call(Pid, stop, infinity). diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 72eb6bd..fc74e7b 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -50,7 +50,6 @@ -define(MY_KEY, ?MY_TAB). -record(run, { - seq, % Sequencer proj, % Projection flus % List of FLUs }). @@ -607,11 +606,12 @@ zipwith(F, [X|Xs], [Y|Ys]) -> [F(X, Y)|zipwith(F, Xs, Ys)]; zipwith(_, _, _) -> []. -clean_up_runtime(R) -> +clean_up_runtime(#run{flus=Flus, proj=P}) -> %% io:format(user, "clean_up_runtime: run = ~p\n", [R]), - catch corfurl_sequencer:stop(R#run.seq), - [catch corfurl_flu:stop(F) || F <- R#run.flus], - corfurl_test:setup_del_all(length(R#run.flus)). + #proj{seq={Seq,_,_}} = P, + catch corfurl_sequencer:stop(Seq), + [catch corfurl_flu:stop(F) || F <- Flus], + corfurl_test:setup_del_all(length(Flus)). make_chains(ChainLen, FLUs) -> make_chains(ChainLen, FLUs, [], []). @@ -633,7 +633,8 @@ setup(NumChains, ChainLen, PageSize, SeqType) -> Chains = make_chains(ChainLen, FLUs), %% io:format(user, "Cs = ~p\n", [Chains]), Proj = corfurl:new_simple_projection(1, 1, ?MAX_PAGES, Chains), - Run = #run{seq=Seq, proj=Proj, flus=FLUs}, + Run = #run{proj=Proj#proj{seq={Seq, node(), 'corfurl pulse seq thingie'}}, + flus=FLUs}, ets:insert(?MY_TAB, {?MY_KEY, Run}), Run. @@ -688,7 +689,7 @@ log_make_result(Result) -> log_make_result(Pid, Result) -> {result, Pid, Result}. -pick_an_LPN(Seq, SeedInt) -> +pick_an_LPN(#proj{seq={Seq,_,_}}, SeedInt) -> Max = corfurl_sequencer:get(Seq, 0), %% The sequencer may be lying to us, shouganai. if SeedInt > Max -> (SeedInt rem Max) + 1; @@ -701,12 +702,12 @@ pick_an_LPN(Seq, SeedInt) -> event_logger:event(log_make_result(LOG__Result), lamport_clock:get()), LOG__Result). -append(#run{seq=Seq, proj=Proj}, Page) -> +append(#run{proj=Proj}, Page) -> lamport_clock:init(), lamport_clock:incr(), ?LOG({append, Page}, begin - Res = corfurl:append_page(Seq, Proj, Page), + Res = corfurl:append_page(Proj, Page), perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) end). @@ -715,21 +716,21 @@ read_result_mangle({ok, Page}) -> read_result_mangle(Else) -> Else. -read_approx(#run{seq=Seq, proj=Proj}, SeedInt) -> +read_approx(#run{proj=Proj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), - LPN = pick_an_LPN(Seq, SeedInt), + LPN = pick_an_LPN(Proj, SeedInt), ?LOG({read, LPN}, begin Res = read_result_mangle(corfurl:read_page(Proj, LPN)), perhaps_trip_read_approx(?TRIP_bad_read, Res, LPN) end). -scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> +scan_forward(#run{proj=Proj}, SeedInt, NumPages) -> lamport_clock:init(), lamport_clock:incr(), StartLPN = if SeedInt == 1 -> 1; - true -> pick_an_LPN(Seq, SeedInt) + true -> pick_an_LPN(Proj, SeedInt) end, %% Our job is complicated by the ?LOG() macro, which isn't good enough %% for our purpose: we must lie about the starting timestamp, to make @@ -757,20 +758,20 @@ scan_forward(#run{seq=Seq, proj=Proj}, SeedInt, NumPages) -> end end). -fill(#run{seq=Seq, proj=Proj}, SeedInt) -> +fill(#run{proj=Proj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), - LPN = pick_an_LPN(Seq, SeedInt), + LPN = pick_an_LPN(Proj, SeedInt), ?LOG({fill, LPN}, begin Res = corfurl:fill_page(Proj, LPN), perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) end). -trim(#run{seq=Seq, proj=Proj}, SeedInt) -> +trim(#run{proj=Proj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), - LPN = pick_an_LPN(Seq, SeedInt), + LPN = pick_an_LPN(Proj, SeedInt), ?LOG({trim, LPN}, begin Res = corfurl:trim_page(Proj, LPN), diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index c745538..1e221f3 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -83,8 +83,9 @@ smoke1_test() -> lists:flatten(io_lib:format("~8..0w", [X])))} || X <- lists:seq(1, 5)], try - P1 = ?M:new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), - [begin {ok, LPN} = ?M:append_page(Seq, P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + P0 = ?M:new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), + P1 = P0#proj{seq={Seq, unused, unused}}, + [begin {ok, LPN} = ?M:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], [begin {ok, Pg} = ?M:read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], @@ -153,11 +154,11 @@ forfun_test_() -> [forfun(Procs) || Procs <- [10,100,1000,5000]] end}. -forfun_append(0, _Seq, _P, _Page) -> +forfun_append(0, _P, _Page) -> ok; -forfun_append(N, Seq, P, Page) -> +forfun_append(N, #proj{seq={Seq, _, _}} = P, Page) -> {ok, _} = ?M:append_page(Seq, P, Page), - forfun_append(N - 1, Seq, P, Page). + forfun_append(N - 1, P, Page). %%% My MBP, SSD %%% The 1K and 5K procs shows full-mailbox-scan ickiness @@ -191,13 +192,14 @@ forfun(NumProcs) -> try Chains = [[F1, F2], [F3, F4]], %%Chains = [[F1], [F2], [F3], [F4]], - P = ?M:new_simple_projection(1, 1, NumPages*2, Chains), + P0 = ?M:new_simple_projection(1, 1, NumPages*2, Chains), + P = P0#proj{seq={Seq, unused, unused}}, Me = self(), Start = now(), Ws = [begin Page = <>, spawn_link(fun() -> - forfun_append(PagesPerProc, Seq, P, Page), + forfun_append(PagesPerProc, P, Page), Me ! {done, self()} end) end || X <- lists:seq(1, NumProcs)], From d93572c3911cd9682379d8980a431113a47ad033 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 26 Feb 2014 21:09:11 +0900 Subject: [PATCH 52/70] Refactoring to implement stop_sequencer command --- prototype/corfurl/include/corfurl.hrl | 1 + prototype/corfurl/src/corfurl.erl | 87 ++++---------- prototype/corfurl/src/corfurl_client.erl | 111 +++++++++++++++++ prototype/corfurl/src/corfurl_sequencer.erl | 35 ++++-- prototype/corfurl/test/corfurl_pulse.erl | 112 ++++++++++++++---- .../corfurl/test/corfurl_sequencer_test.erl | 6 +- prototype/corfurl/test/corfurl_test.erl | 15 ++- 7 files changed, 260 insertions(+), 107 deletions(-) create mode 100644 prototype/corfurl/src/corfurl_client.erl diff --git a/prototype/corfurl/include/corfurl.hrl b/prototype/corfurl/include/corfurl.hrl index 8bb452e..f932892 100644 --- a/prototype/corfurl/include/corfurl.hrl +++ b/prototype/corfurl/include/corfurl.hrl @@ -31,6 +31,7 @@ }). -record(proj, { % Projection + dir :: string(), epoch :: non_neg_integer(), seq :: 'undefined' | seq_name(), r :: [#range{}] diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 7ccc9c0..6820578 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -20,11 +20,12 @@ -module(corfurl). --export([new_simple_projection/4, +-export([new_simple_projection/5, new_range/3, read_projection/2, - save_projection/2]). --export([append_page/2, read_page/2, scan_forward/3, + save_projection/2, + latest_projection_epoch_number/1]). +-export([write_page/3, read_page/2, scan_forward/3, fill_page/2, trim_page/2]). -include("corfurl.hrl"). @@ -40,48 +41,19 @@ -define(EVENT_LOG(X), ok). %%% -define(EVENT_LOG(X), event_logger:event(X)). -append_page(P, Page) -> - append_page(P, Page, 1). - -append_page(#proj{seq={undefined, SeqHost, SeqName}} = P, Page, Retries) -> - case rpc:call(SeqHost, erlang, whereis, [SeqName]) of - SeqPid when is_pid(SeqPid) -> - append_page(P#proj{seq={SeqPid, SeqHost, SeqName}}, Page, Retries); - Else -> - exit({bummer, mod, ?MODULE, line, ?LINE, error, Else}) - end; -append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) when Retries < 50 -> - case corfurl_sequencer:get(Sequencer, 1) of - LPN when is_integer(LPN) -> - case write_single_page(P, LPN, Page) of - ok -> - {ok, LPN}; - X when X == error_overwritten; X == error_trimmed -> - report_lost_race(LPN, X), - append_page(P, Page); - {special_trimmed, LPN}=XX -> - XX; - Else -> - exit({todo, ?MODULE, line, ?LINE, Else}) - end; - _ -> - timer:sleep(Retries), % TODO naive - append_page(P, Page, Retries * 2) - end. - -write_single_page(#proj{epoch=Epoch} = P, LPN, Page) -> +write_page(#proj{epoch=Epoch} = P, LPN, Page) -> Chain = project_to_chain(LPN, P), - write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). + write_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1). -write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth) -> - write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth, ok). +write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth) -> + write_page_to_chain(Chain, Chain, Epoch, LPN, Page, Nth, ok). -write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) -> +write_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) -> Reply; -write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> +write_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of ok -> - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); error_badepoch -> %% TODO: Interesting case: there may be cases where retrying with %% a new epoch & that epoch's projection is just fine (and @@ -101,7 +73,7 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> %% If we continue to lose the exact same race for the rest %% of the chain, the 1st clause of this func will return 'ok'. %% That is *exactly* our intent and purpose! - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); error_overwritten when Nth == 1 -> %% The sequencer lied, or we didn't use the sequencer and %% guessed and guessed poorly, or someone is accidentally @@ -116,14 +88,14 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) -> {ok, AlreadyThere} when AlreadyThere =:= Page -> %% Alright, well, let's go continue the repair/writing, %% since we agree on the page's value. - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply); error_badepoch -> %% TODO: same TODO as the above error_badepoch case. error_badepoch; error_trimmed -> %% This is the same as 'error_trimmed when Nth > 1' above. %% Do the same thing. - write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); + write_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN}); Else -> %% Can PULSE can drive us to this case? giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth}) @@ -313,8 +285,9 @@ new_range(Start, End, ChainList) -> %% TODO: sanity checking of ChainList, Start < End, yadda #range{pn_start=Start, pn_end=End, chains=list_to_tuple(ChainList)}. -new_simple_projection(Epoch, Start, End, ChainList) -> - #proj{epoch=Epoch, r=[new_range(Start, End, ChainList)]}. +new_simple_projection(Dir, Epoch, Start, End, ChainList) -> + ok = filelib:ensure_dir(Dir ++ "/unused"), + #proj{dir=Dir, epoch=Epoch, r=[new_range(Start, End, ChainList)]}. make_projection_path(Dir, Epoch) -> lists:flatten(io_lib:format("~s/~12..0w.proj", [Dir, Epoch])). @@ -346,6 +319,10 @@ save_projection(Dir, #proj{epoch=Epoch} = P) -> Else % TODO API corner case end. +latest_projection_epoch_number(Dir) -> + {Epoch, _} = string:to_integer(lists:last(filelib:wildcard("*.proj", Dir))), + Epoch. + project_to_chain(LPN, P) -> %% TODO fixme %% TODO something other than round-robin? @@ -354,25 +331,3 @@ project_to_chain(LPN, P) -> I = ((LPN - Start) rem tuple_size(Chains)) + 1, element(I, Chains) end. - --ifdef(TEST). --ifdef(PULSE). -report_lost_race(_LPN, _Reason) -> - %% It's interesting (sometime?) to know if a page was overwritten - %% because the sequencer was configured by QuickCheck to hand out - %% duplicate LPNs. If this gets too annoying, this can be a no-op - %% function. - io:format(user, "o", []). --else. % PULSE -report_lost_race(LPN, Reason) -> - io:format(user, "LPN ~p race lost: ~p\n", [LPN, Reason]). --endif. % PULSE --else. % TEST - -report_lost_race(LPN, Reason) -> - %% Perhaps it's an interesting event, but the rest of the system - %% should react correctly whenever this happens, so it shouldn't - %% ever cause an external consistency problem. - error_logger:debug_msg("LPN ~p race lost: ~p\n", [LPN, Reason]). - --endif. % TEST diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl new file mode 100644 index 0000000..df01512 --- /dev/null +++ b/prototype/corfurl/src/corfurl_client.erl @@ -0,0 +1,111 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(corfurl_client). + +-export([append_page/2]). +-export([restart_sequencer/1]). + +-include("corfurl.hrl"). + +-define(LONG_TIME, 5*1000). +%% -define(LONG_TIME, 30*1000). + +append_page(P, Page) -> + append_page(P, Page, 1). + +append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) + when Retries < 50 -> + try + case corfurl_sequencer:get(Sequencer, 1) of + {ok, LPN} -> + case append_page2(P, LPN, Page) of + lost_race -> + append_page(P, Page, Retries - 1); + Else -> + {Else, P} + end + end + catch + exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} + when Reason == noproc; Reason == normal -> + append_page(restart_sequencer(P), Page, Retries); + exit:Exit -> + {failed, incomplete_code, Exit} + end. + +append_page2(P, LPN, Page) -> + case corfurl:write_page(P, LPN, Page) of + ok -> + {ok, LPN}; + X when X == error_overwritten; X == error_trimmed -> + report_lost_race(LPN, X), + lost_race; + {special_trimmed, LPN}=XX -> + XX + %% Let it crash: error_unwritten + end. + +restart_sequencer(#proj{seq={OldSequencer, _SeqHost, SeqName}, + epoch=Epoch, r=Ranges} = P) -> + spawn(fun() -> + (catch corfurl_sequencer:stop(OldSequencer)) + end), + TODO_type = standard, % TODO: fix this hard-coding + FLUs = lists:usort( + [FLU || R <- Ranges, + C <- tuple_to_list(R#range.chains), FLU <- C]), + case corfurl_sequencer:start_link(FLUs, TODO_type, SeqName) of + {ok, Pid} -> + NewP = P#proj{seq={Pid, node(), SeqName}, epoch=Epoch+1}, + save_projection_or_get_latest(NewP) + end. + +save_projection_or_get_latest(#proj{dir=Dir} = P) -> + case corfurl:save_projection(Dir, P) of + ok -> + P; + error_overwritten -> + NewEpoch = corfurl:latest_projection_epoch_number(Dir), + {ok, NewP} = corfurl:read_projection(Dir, NewEpoch), + NewP + end. + +-ifdef(TEST). +-ifdef(PULSE). +report_lost_race(_LPN, _Reason) -> + %% It's interesting (sometime?) to know if a page was overwritten + %% because the sequencer was configured by QuickCheck to hand out + %% duplicate LPNs. If this gets too annoying, this can be a no-op + %% function. + io:format(user, "o", []). +-else. % PULSE +report_lost_race(LPN, Reason) -> + io:format(user, "LPN ~p race lost: ~p\n", [LPN, Reason]). +-endif. % PULSE +-else. % TEST + +report_lost_race(LPN, Reason) -> + %% Perhaps it's an interesting event, but the rest of the system + %% should react correctly whenever this happens, so it shouldn't + %% ever cause an external consistency problem. + error_logger:debug_msg("LPN ~p race lost: ~p\n", [LPN, Reason]). + +-endif. % TEST diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 8a0240f..1f7a3d3 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -22,7 +22,8 @@ -behaviour(gen_server). --export([start_link/1, stop/1, get/2]). +-export([start_link/1, stop/1, stop/2, + get/2]). -ifdef(TEST). -export([start_link/2]). -compile(export_all). @@ -40,6 +41,8 @@ -endif. -define(SERVER, ?MODULE). +%% -define(LONG_TIME, 30*1000). +-define(LONG_TIME, 5*1000). start_link(FLUs) -> start_link(FLUs, standard). @@ -48,14 +51,31 @@ start_link(FLUs, SeqType) -> start_link(FLUs, SeqType, ?SERVER). start_link(FLUs, SeqType, RegName) -> - gen_server:start_link({local, RegName}, ?MODULE, {FLUs, SeqType}, []). + case gen_server:start_link({local, RegName}, ?MODULE, {FLUs, SeqType},[]) of + {ok, Pid} -> + {ok, Pid}; + {error, {already_started, Pid}} -> + {ok, Pid}; + Else -> + Else + end. stop(Pid) -> - gen_server:call(Pid, stop, infinity). + stop(Pid, stop). + +stop(Pid, Method) -> + Res = gen_server:call(Pid, stop, infinity), + if Method == kill -> + io:format("stop(kill)"), + %% Emulate gen.erl's client-side behavior when the server process + %% is killed. + exit(killed); + true -> + Res + end. get(Pid, NumPages) -> - {LPN, LC} = gen_server:call(Pid, {get, NumPages, lclock_get()}, - infinity), + {LPN, LC} = gen_server:call(Pid, {get, NumPages, lclock_get()}, ?LONG_TIME), lclock_update(LC), LPN. @@ -74,7 +94,7 @@ init({FLUs, TypeOrSeed}) -> handle_call({get, NumPages, LC}, _From, MLP) when is_integer(MLP) -> NewLC = lclock_update(LC), - {reply, {MLP, NewLC}, MLP + NumPages}; + {reply, {{ok, MLP}, NewLC}, MLP + NumPages}; handle_call({get, NumPages, LC}, _From, {MLP, BadPercent, MaxDifference}) -> NewLC = lclock_update(LC), Fudge = case random:uniform(100) of @@ -83,7 +103,7 @@ handle_call({get, NumPages, LC}, _From, {MLP, BadPercent, MaxDifference}) -> _ -> 0 end, - {reply, {erlang:max(1, MLP + Fudge), NewLC}, + {reply, {{ok, erlang:max(1, MLP + Fudge)}, NewLC}, {MLP + NumPages, BadPercent, MaxDifference}}; handle_call(stop, _From, MLP) -> {stop, normal, ok, MLP}; @@ -98,7 +118,6 @@ handle_info(_Info, MLP) -> {noreply, MLP}. terminate(_Reason, _MLP) -> - %% io:format(user, "C=~w,", [lclock_get()]), ok. code_change(_OldVsn, MLP, _Extra) -> diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index fc74e7b..5330080 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -34,7 +34,7 @@ -compile({parse_transform, pulse_instrument}). --compile({pulse_skip,[{prop_pulse_test_,0},{clean_up_runtime,1}]}). +-compile({pulse_skip,[{prop_pulse_test_,0},{clean_up_runtime,1},{delete_dir,1}]}). %% -compile({pulse_no_side_effect,[{file,'_','_'}, {erlang, now, 0}]}). %% Used for output within EUnit... @@ -48,6 +48,8 @@ -define(MAX_PAGES, 50000). -define(MY_TAB, i_have_a_name). -define(MY_KEY, ?MY_TAB). +-define(PROJECTION_DIR, "./tmp.projection." ++ os:getpid()). +-define(SEQUENCER_NAME, 'corfurl pulse seq thingie'). -record(run, { proj, % Projection @@ -105,21 +107,26 @@ gen_approx_page() -> gen_scan_forward_start() -> oneof([1, gen_approx_page()]). +gen_stop_method() -> + oneof([stop, kill]). + command(#state{run=Run} = S) -> ?LET({NumChains, ChainLen, PageSize}, {parameter(num_chains), parameter(chain_len), parameter(page_size)}, frequency( - [{10, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize, gen_sequencer()]}} + [{50, {call, ?MODULE, setup, [NumChains, ChainLen, PageSize, gen_sequencer()]}} || not S#state.is_setup] ++ - [{10, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} + [{50, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ - [{3, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} || S#state.is_setup] ++ - [{5, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} - || S#state.is_setup] ++ - [{4, {call, ?MODULE, fill, [Run, gen_approx_page()]}} - || S#state.is_setup] ++ - [{4, {call, ?MODULE, trim, [Run, gen_approx_page()]}} + %% [{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} + %% || S#state.is_setup] ++ + %% [{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}} + %% || S#state.is_setup] ++ + %% [{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}} + %% || S#state.is_setup] ++ + [{ 1, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}} || S#state.is_setup] ++ [])). @@ -146,6 +153,8 @@ next_state(S, _, {call, _, scan_forward, _}) -> next_state(S, _, {call, _, fill, _}) -> S; next_state(S, _, {call, _, trim, _}) -> + S; +next_state(S, _, {call, _, stop_sequencer, _}) -> S. eqeq(X, X) -> true; @@ -182,7 +191,9 @@ postcondition(_S, {call, _, FillTrim, _}, V) error_unwritten -> true; error_overwritten -> true; _ -> eqeq(V, {error, FillTrim, V}) - end. + end; +postcondition(_S, {call, _, stop_sequencer, _}, _V) -> + true. valid_read_result(Pg) when is_binary(Pg) -> true; valid_read_result(error_unwritten) -> true; @@ -195,6 +206,7 @@ run_commands_on_node(LocalOrSlave, Cmds, Seed) -> end, event_logger:start_link(), pulse:start(), + delete_dir(?PROJECTION_DIR), error_logger:tty(false), error_logger:add_report_handler(handle_errors), event_logger:start_logging(), @@ -606,12 +618,17 @@ zipwith(F, [X|Xs], [Y|Ys]) -> [F(X, Y)|zipwith(F, Xs, Ys)]; zipwith(_, _, _) -> []. +delete_dir(Dir) -> + corfurl_util:delete_dir(Dir). + clean_up_runtime(#run{flus=Flus, proj=P}) -> %% io:format(user, "clean_up_runtime: run = ~p\n", [R]), #proj{seq={Seq,_,_}} = P, catch corfurl_sequencer:stop(Seq), [catch corfurl_flu:stop(F) || F <- Flus], - corfurl_test:setup_del_all(length(Flus)). + corfurl_test:setup_del_all(length(Flus)), + delete_dir(?PROJECTION_DIR), + (catch exit(whereis(?SEQUENCER_NAME), kill)). make_chains(ChainLen, FLUs) -> make_chains(ChainLen, FLUs, [], []). @@ -626,14 +643,21 @@ make_chains(ChainLen, [H|T], SmallAcc, BigAcc) -> end. setup(NumChains, ChainLen, PageSize, SeqType) -> + (catch exit(whereis(?SEQUENCER_NAME), kill)), lamport_clock:init(), + N = NumChains * ChainLen, FLUs = corfurl_test:setup_basic_flus(N, PageSize, ?MAX_PAGES), {ok, Seq} = corfurl_sequencer:start_link(FLUs, SeqType), Chains = make_chains(ChainLen, FLUs), %% io:format(user, "Cs = ~p\n", [Chains]), - Proj = corfurl:new_simple_projection(1, 1, ?MAX_PAGES, Chains), - Run = #run{proj=Proj#proj{seq={Seq, node(), 'corfurl pulse seq thingie'}}, + Proj = corfurl:new_simple_projection(?PROJECTION_DIR, + 1, 1, ?MAX_PAGES, Chains), + ok = corfurl:save_projection(?PROJECTION_DIR, Proj), + error_overwritten = corfurl:save_projection(?PROJECTION_DIR, Proj), + 1 = corfurl:latest_projection_epoch_number(?PROJECTION_DIR), + {ok, Proj} = corfurl:read_projection(?PROJECTION_DIR, 1), + Run = #run{proj=Proj#proj{seq={Seq, node(), ?SEQUENCER_NAME}}, flus=FLUs}, ets:insert(?MY_TAB, {?MY_KEY, Run}), Run. @@ -689,11 +713,15 @@ log_make_result(Result) -> log_make_result(Pid, Result) -> {result, Pid, Result}. -pick_an_LPN(#proj{seq={Seq,_,_}}, SeedInt) -> - Max = corfurl_sequencer:get(Seq, 0), - %% The sequencer may be lying to us, shouganai. - if SeedInt > Max -> (SeedInt rem Max) + 1; - true -> SeedInt +pick_an_LPN(#proj{seq={Seq,_,_}} = P, SeedInt) -> + case (catch corfurl_sequencer:get(Seq, 0)) of + {ok, Max} -> + %% The sequencer may be lying to us, shouganai. + if SeedInt > Max -> (SeedInt rem Max) + 1; + true -> SeedInt + end; + _Else -> + pick_an_LPN(corfurl_client:restart_sequencer(P), SeedInt) end. -define(LOG(Tag, MkCall), @@ -702,13 +730,17 @@ pick_an_LPN(#proj{seq={Seq,_,_}}, SeedInt) -> event_logger:event(log_make_result(LOG__Result), lamport_clock:get()), LOG__Result). -append(#run{proj=Proj}, Page) -> +append(#run{proj=OriginalProj}, Page) -> lamport_clock:init(), lamport_clock:incr(), + Proj = get_projection(OriginalProj), ?LOG({append, Page}, - begin - Res = corfurl:append_page(Proj, Page), + try + {Res, Proj2} = (catch corfurl_client:append_page(Proj, Page)), + put_projection(Proj2), perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) + catch X:Y -> + {error, append, X, Y, erlang:get_stacktrace()} end). read_result_mangle({ok, Page}) -> @@ -716,19 +748,22 @@ read_result_mangle({ok, Page}) -> read_result_mangle(Else) -> Else. -read_approx(#run{proj=Proj}, SeedInt) -> +read_approx(#run{proj=OriginalProj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), + Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({read, LPN}, begin Res = read_result_mangle(corfurl:read_page(Proj, LPN)), + put_projection(Proj), perhaps_trip_read_approx(?TRIP_bad_read, Res, LPN) end). -scan_forward(#run{proj=Proj}, SeedInt, NumPages) -> +scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) -> lamport_clock:init(), lamport_clock:incr(), + Proj = get_projection(OriginalProj), StartLPN = if SeedInt == 1 -> 1; true -> pick_an_LPN(Proj, SeedInt) end, @@ -758,9 +793,10 @@ scan_forward(#run{proj=Proj}, SeedInt, NumPages) -> end end). -fill(#run{proj=Proj}, SeedInt) -> +fill(#run{proj=OriginalProj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), + Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({fill, LPN}, begin @@ -768,16 +804,40 @@ fill(#run{proj=Proj}, SeedInt) -> perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) end). -trim(#run{proj=Proj}, SeedInt) -> +trim(#run{proj=OriginalProj}, SeedInt) -> lamport_clock:init(), lamport_clock:incr(), + Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({trim, LPN}, begin - Res = corfurl:trim_page(Proj, LPN), +io:format(user, "LPN = ~p\n", [LPN]), +io:format(user, "PROJ = ~p\n", [Proj]), + Res = (catch corfurl:trim_page(Proj, LPN)), perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN) end). +stop_sequencer(#run{proj=OriginalProj}, Method) -> + Proj = get_projection(OriginalProj), + Seq = element(1,Proj#proj.seq), + try + corfurl_sequencer:stop(Seq, Method), + ok + catch _:_ -> + ok + end. + +get_projection(OriginalProj) -> + case get(projection) of + undefined -> + OriginalProj; + Proj -> + Proj + end. + +put_projection(Proj) -> + put(projection, Proj). + perhaps_trip_append_page(false, Res, _Page) -> Res; perhaps_trip_append_page(true, {ok, LPN}, _Page) when LPN > 3 -> diff --git a/prototype/corfurl/test/corfurl_sequencer_test.erl b/prototype/corfurl/test/corfurl_sequencer_test.erl index 0bef793..17ec42d 100644 --- a/prototype/corfurl/test/corfurl_sequencer_test.erl +++ b/prototype/corfurl/test/corfurl_sequencer_test.erl @@ -65,9 +65,9 @@ smoke_test() -> MLP4 = MLP0 + 4, {ok, Sequencer} = ?M:start_link(FLUs), try - MLP1 = ?M:get(Sequencer, 2), - MLP3 = ?M:get(Sequencer, 1), - MLP4 = ?M:get(Sequencer, 1) + {ok, MLP1} = ?M:get(Sequencer, 2), + {ok, MLP3} = ?M:get(Sequencer, 1), + {ok, MLP4} = ?M:get(Sequencer, 1) after ?M:stop(Sequencer) end diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 1e221f3..21191c2 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -54,8 +54,9 @@ setup_basic_flus(NumFLUs, PageSize, NumPages) -> save_read_test() -> Dir = "/tmp/" ++ atom_to_list(?MODULE) ++".save-read", + PDir = Dir ++ ".projection", Chain = [a,b], - P1 = ?M:new_simple_projection(1, 1, 1*100, [Chain]), + P1 = ?M:new_simple_projection(PDir, 1, 1, 1*100, [Chain]), try filelib:ensure_dir(Dir ++ "/ignored"), @@ -67,10 +68,12 @@ save_read_test() -> ok after - ok = corfurl_util:delete_dir(Dir) + ok = corfurl_util:delete_dir(Dir), + ok = corfurl_util:delete_dir(PDir) end. smoke1_test() -> + PDir = "./tmp.smoke1.projection", NumFLUs = 6, PageSize = 8, NumPages = 10, @@ -83,7 +86,8 @@ smoke1_test() -> lists:flatten(io_lib:format("~8..0w", [X])))} || X <- lists:seq(1, 5)], try - P0 = ?M:new_simple_projection(1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), + P0 = ?M:new_simple_projection(PDir, 1, 1, 1*100, + [[F1, F2, F3], [F4, F5, F6]]), P1 = P0#proj{seq={Seq, unused, unused}}, [begin {ok, LPN} = ?M:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], @@ -142,6 +146,7 @@ smoke1_test() -> ok after + corfurl_util:delete_dir(PDir), corfurl_sequencer:stop(Seq), [corfurl_flu:stop(F) || F <- FLUs], setup_del_all(NumFLUs) @@ -180,6 +185,7 @@ forfun_append(N, #proj{seq={Seq, _, _}} = P, Page) -> %%%% forfun: 5000 procs writing 200000 pages of 4096 bytes/page to 2 chains of 4 total FLUs in 38.972076 sec forfun(NumProcs) -> + PDir = "./tmp.forfun.projection", io:format(user, "\n", []), NumFLUs = 4, PageSize = 8, @@ -192,7 +198,7 @@ forfun(NumProcs) -> try Chains = [[F1, F2], [F3, F4]], %%Chains = [[F1], [F2], [F3], [F4]], - P0 = ?M:new_simple_projection(1, 1, NumPages*2, Chains), + P0 = ?M:new_simple_projection(PDir, 1, 1, NumPages*2, Chains), P = P0#proj{seq={Seq, unused, unused}}, Me = self(), Start = now(), @@ -209,6 +215,7 @@ forfun(NumProcs) -> [NumProcs, NumPages, PageSize, length(Chains), length(lists:flatten(Chains)), timer:now_diff(End, Start) / 1000000]), ok after + corfur_util:delete_dir(PDir), corfurl_sequencer:stop(Seq), [corfurl_flu:stop(F) || F <- FLUs], setup_del_all(NumFLUs) From 96b561cde975ad28a6e0633f515049bd1a657623 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 26 Feb 2014 22:03:28 +0900 Subject: [PATCH 53/70] Fix broken EUnit tests --- prototype/corfurl/test/corfurl_test.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 21191c2..f2a6b4b 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -89,7 +89,7 @@ smoke1_test() -> P0 = ?M:new_simple_projection(PDir, 1, 1, 1*100, [[F1, F2, F3], [F4, F5, F6]]), P1 = P0#proj{seq={Seq, unused, unused}}, - [begin {ok, LPN} = ?M:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + [begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], [begin {ok, Pg} = ?M:read_page(P1, LPN) end || {LPN, Pg} <- LPN_Pgs], @@ -111,7 +111,7 @@ smoke1_test() -> %% Simulate a failed write to the chain. [F6a, F6b, F6c] = Chain6 = ?M:project_to_chain(6, P1), NotHead6 = [F6b, F6c], - ok = ?M:write_single_page_to_chain([F6a], [F6a], Epoch, 6, Pg6, 1), + ok = ?M:write_page_to_chain([F6a], [F6a], Epoch, 6, Pg6, 1), %% Does the chain look as expected? {ok, Pg6} = corfurl_flu:read(?M:flu_pid(F6a), Epoch, 6), From 63d1c93fc953f6c57a29ec329700716c4f546501 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 00:19:25 +0900 Subject: [PATCH 54/70] Fix silly-dumb errors in seal epoch comparisons --- prototype/corfurl/src/corfurl_flu.erl | 4 +-- prototype/corfurl/test/corfurl_flu_test.erl | 27 +++++++------- prototype/corfurl/test/corfurl_test.erl | 39 +++++++++++++++++++++ 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index fc73173..759afca 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -186,12 +186,12 @@ handle_call({{read, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> {reply, {Reply, LC2}, State}; handle_call({{seal, ClientEpoch}, LC1}, _From, #state{min_epoch=MinEpoch} = State) - when ClientEpoch =< MinEpoch -> + when ClientEpoch < MinEpoch -> LC2 = lclock_update(LC1), {reply, {error_badepoch, LC2}, State}; handle_call({{seal, ClientEpoch}, LC1}, _From, #state{max_logical_page=MLPN}=State) -> LC2 = lclock_update(LC1), - NewState = State#state{min_epoch=ClientEpoch}, + NewState = State#state{min_epoch=ClientEpoch+1}, ok = write_hard_state(NewState), {reply, {{ok, MLPN}, LC2}, NewState}; diff --git a/prototype/corfurl/test/corfurl_flu_test.erl b/prototype/corfurl/test/corfurl_flu_test.erl index 21d0f15..1c198b4 100644 --- a/prototype/corfurl/test/corfurl_flu_test.erl +++ b/prototype/corfurl/test/corfurl_flu_test.erl @@ -56,6 +56,7 @@ basic_test() -> try Epoch1 = 1, Epoch2 = 2, + Epoch3 = 3, LPN = 1, Bin1 = <<42:64>>, Bin2 = <<42042:64>>, @@ -71,13 +72,13 @@ basic_test() -> 0 = ?M:get__min_epoch(P1), 0 = ?M:get__trim_watermark(P1), {ok, LPN} = ?M:seal(P1, Epoch1), - 1 = ?M:get__min_epoch(P1), + 2 = ?M:get__min_epoch(P1), error_overwritten = ?M:write(P1, Epoch2, LPN, Bin1), ok = ?M:write(P1, Epoch2, LPN+1, Bin2), - Epoch1 = ?M:get__min_epoch(P1), + Epoch2 = ?M:get__min_epoch(P1), - {ok, Bin1} = ?M:read(P1, Epoch1, LPN), + error_badepoch = ?M:read(P1, Epoch1, LPN), {ok, Bin2} = ?M:read(P1, Epoch2, LPN+1), error_unwritten = ?M:read(P1, Epoch2, LPN+2), badarg = ?M:read(P1, Epoch2, 1 bsl 2982), @@ -88,23 +89,23 @@ basic_test() -> error_badepoch = ?M:read(P1, Epoch1, LPN), error_badepoch = ?M:read(P1, Epoch1, LPN+1), - {ok, Bin1} = ?M:read(P1, Epoch2, LPN), - {ok, Bin2} = ?M:read(P1, Epoch2, LPN+1), + {ok, Bin1} = ?M:read(P1, Epoch3, LPN), + {ok, Bin2} = ?M:read(P1, Epoch3, LPN+1), error_badepoch = ?M:trim(P1, Epoch1, LPN+1), - ok = ?M:trim(P1, Epoch2, LPN+1), - error_trimmed = ?M:trim(P1, Epoch2, LPN+1), + ok = ?M:trim(P1, Epoch3, LPN+1), + error_trimmed = ?M:trim(P1, Epoch3, LPN+1), %% Current watermark processing is broken. But we'll test what's %% there now. ExpectedWaterFixMe = LPN+1, ExpectedWaterFixMe = ?M:get__trim_watermark(P1), - ok = ?M:fill(P1, Epoch2, LPN+3), - error_trimmed = ?M:read(P1, Epoch2, LPN+3), - error_trimmed = ?M:fill(P1, Epoch2, LPN+3), - error_trimmed = ?M:trim(P1, Epoch2, LPN+3), + ok = ?M:fill(P1, Epoch3, LPN+3), + error_trimmed = ?M:read(P1, Epoch3, LPN+3), + error_trimmed = ?M:fill(P1, Epoch3, LPN+3), + error_trimmed = ?M:trim(P1, Epoch3, LPN+3), - Epoch2 = ?M:get__min_epoch(P1), + Epoch3 = ?M:get__min_epoch(P1), ok = ?M:stop(P1), ok after @@ -117,7 +118,7 @@ seal_persistence_test() -> try 0 = ?M:get__min_epoch(P1), Epoch = 665, - {ok, LPN} = ?M:seal(P1, Epoch), + {ok, LPN} = ?M:seal(P1, Epoch-1), Epoch = ?M:get__min_epoch(P1), ok = ?M:stop(P1), diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index f2a6b4b..0051471 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -152,6 +152,45 @@ smoke1_test() -> setup_del_all(NumFLUs) end. +smoke_append_badepoch_test() -> + PDir = "./tmp.smoke2.projection", + NumFLUs = 6, + PageSize = 8, + NumPages = 10, + FLUs = [F1, F2, F3, F4, F5, F6] = + setup_basic_flus(NumFLUs, PageSize, NumPages), + {ok, Seq} = corfurl_sequencer:start_link(FLUs), + + %% We know that the first LPN will be 1. + LPN_Pgs = [{X, list_to_binary( + lists:flatten(io_lib:format("~8..0w", [X])))} || + X <- lists:seq(1, 5)], + try + LittleEpoch = 4, + BigEpoch = 42, + P0 = ?M:new_simple_projection(PDir, BigEpoch, 1, 1*100, + [[F1, F2, F3], [F4, F5, F6]]), + P1 = P0#proj{seq={Seq, unused, unused}}, + [begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], + + [{ok, _} = corfurl_flu:seal(FLU, BigEpoch) || FLU <- FLUs], + [begin + {error_badepoch, _} = corfurl_client:append_page(P1, Pg) + end || {_LPN, Pg} <- LPN_Pgs], + + P2 = P1#proj{epoch=LittleEpoch}, + [begin + {error_badepoch, _} = corfurl_client:append_page(P2, Pg) + end || {_LPN, Pg} <- LPN_Pgs], + + ok + after + corfurl_util:delete_dir(PDir), + corfurl_sequencer:stop(Seq), + [corfurl_flu:stop(F) || F <- FLUs], + setup_del_all(NumFLUs) + end. + -ifdef(TIMING_TEST). forfun_test_() -> From fb1216649c0620636a922d054470e8c429647162 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 01:36:54 +0900 Subject: [PATCH 55/70] Finish very basic PULSE testing of stopping & restarting the sequencer --- prototype/corfurl/rebar.config.script | 3 ++ prototype/corfurl/src/corfurl.erl | 9 +++- prototype/corfurl/src/corfurl_client.erl | 48 ++++++++++++++++++++- prototype/corfurl/src/corfurl_sequencer.erl | 1 - prototype/corfurl/src/corfurl_util.erl | 4 ++ prototype/corfurl/test/corfurl_pulse.erl | 25 ++++++----- prototype/corfurl/test/corfurl_test.erl | 9 ++-- 7 files changed, 76 insertions(+), 23 deletions(-) diff --git a/prototype/corfurl/rebar.config.script b/prototype/corfurl/rebar.config.script index 79df2a3..0eb68b7 100644 --- a/prototype/corfurl/rebar.config.script +++ b/prototype/corfurl/rebar.config.script @@ -18,6 +18,9 @@ case PulseBuild of , {corfurl_flu, trim, '_'} , {corfurl_flu, fill, '_'} + , {corfurl, read_projection, '_'} + , {corfurl, save_projection, '_'} + , {prim_file, '_', '_'} , {file, '_', '_'} , {filelib, '_', '_'} diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 6820578..71ae149 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -320,8 +320,13 @@ save_projection(Dir, #proj{epoch=Epoch} = P) -> end. latest_projection_epoch_number(Dir) -> - {Epoch, _} = string:to_integer(lists:last(filelib:wildcard("*.proj", Dir))), - Epoch. + case filelib:wildcard("*.proj", Dir) of + [] -> + -1; + Files -> + {Epoch, _} = string:to_integer(lists:last(Files)), + Epoch + end. project_to_chain(LPN, P) -> %% TODO fixme diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index df01512..37805e7 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -39,6 +39,13 @@ append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) case append_page2(P, LPN, Page) of lost_race -> append_page(P, Page, Retries - 1); + error_badepoch -> + case poll_for_new_epoch_projection(P) of + {ok, NewP} -> + append_page(NewP, Page, Retries-1); + Else -> + {Else, P} + end; Else -> {Else, P} end @@ -49,7 +56,9 @@ append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) append_page(restart_sequencer(P), Page, Retries); exit:Exit -> {failed, incomplete_code, Exit} - end. + end; +append_page(P, _Page, _Retries) -> + {error_badepoch, P}. append_page2(P, LPN, Page) -> case corfurl:write_page(P, LPN, Page) of @@ -59,6 +68,8 @@ append_page2(P, LPN, Page) -> report_lost_race(LPN, X), lost_race; {special_trimmed, LPN}=XX -> + XX; + error_badepoch=XX-> XX %% Let it crash: error_unwritten end. @@ -72,10 +83,45 @@ restart_sequencer(#proj{seq={OldSequencer, _SeqHost, SeqName}, FLUs = lists:usort( [FLU || R <- Ranges, C <- tuple_to_list(R#range.chains), FLU <- C]), + %% TODO: We can proceed if we can seal at least one FLU in + %% each chain. Robustify and sanity check. + [begin + _Res = corfurl_flu:seal(FLU, Epoch) + end || FLU <- lists:reverse(FLUs)], + case get(goo) of undefined -> put(goo, 0); _Q -> ok end, case corfurl_sequencer:start_link(FLUs, TODO_type, SeqName) of {ok, Pid} -> NewP = P#proj{seq={Pid, node(), SeqName}, epoch=Epoch+1}, save_projection_or_get_latest(NewP) + %% case put(goo, get(goo) + 1) of + %% N when N < 2 -> + %% io:format(user, "hiiiiiiiiiiiiiiiiiiiiiiiiiiiii", []), + %% P#proj{seq={Pid, node(), SeqName}, epoch=Epoch}; + %% _ -> + %% save_projection_or_get_latest(NewP) + %% end + end. + +poll_for_new_epoch_projection(P) -> + poll_for_new_epoch_projection(P, 25). + +poll_for_new_epoch_projection(P, 0) -> + %% TODO: The client that caused the seal may have crashed before + %% writing a new projection. We should try to pick up here, + %% write a new projection, and bully forward. + case corfurl:latest_projection_epoch_number(P#proj.dir) of + Neg when Neg < 0 -> + error_badepoch; + Other -> + exit({bummer, ?MODULE, ?LINE, latest_epoch, Other}) + end; +poll_for_new_epoch_projection(#proj{dir=Dir, epoch=Epoch} = P, Tries) -> + case corfurl:latest_projection_epoch_number(Dir) of + NewEpoch when NewEpoch > Epoch -> + corfurl:read_projection(Dir, NewEpoch); + _ -> + timer:sleep(50), + poll_for_new_epoch_projection(P, Tries - 1) end. save_projection_or_get_latest(#proj{dir=Dir} = P) -> diff --git a/prototype/corfurl/src/corfurl_sequencer.erl b/prototype/corfurl/src/corfurl_sequencer.erl index 1f7a3d3..429e8d8 100644 --- a/prototype/corfurl/src/corfurl_sequencer.erl +++ b/prototype/corfurl/src/corfurl_sequencer.erl @@ -66,7 +66,6 @@ stop(Pid) -> stop(Pid, Method) -> Res = gen_server:call(Pid, stop, infinity), if Method == kill -> - io:format("stop(kill)"), %% Emulate gen.erl's client-side behavior when the server process %% is killed. exit(killed); diff --git a/prototype/corfurl/src/corfurl_util.erl b/prototype/corfurl/src/corfurl_util.erl index c88da23..7a69055 100644 --- a/prototype/corfurl/src/corfurl_util.erl +++ b/prototype/corfurl/src/corfurl_util.erl @@ -22,6 +22,10 @@ -export([delete_dir/1]). +-ifdef(PULSE). +-compile({parse_transform, pulse_instrument}). +-endif. + delete_dir(Dir) -> %% We don't recursively delete directories, the ok pattern match will fail. [ok = file:delete(X) || X <- filelib:wildcard(Dir ++ "/*")], diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 5330080..f6ae41e 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -118,15 +118,15 @@ command(#state{run=Run} = S) -> || not S#state.is_setup] ++ [{50, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ - [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} - || S#state.is_setup] ++ + %% [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + %% || S#state.is_setup] ++ %% [{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} %% || S#state.is_setup] ++ %% [{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}} %% || S#state.is_setup] ++ %% [{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}} %% || S#state.is_setup] ++ - [{ 1, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}} + [{10, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}} || S#state.is_setup] ++ [])). @@ -162,13 +162,13 @@ eqeq(X, Y) -> {X, '/=', Y}. postcondition(_S, {call, _, setup, _}, #run{} = _V) -> true; -postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) -> - true; -postcondition(_S, {call, _, append, _}, {special_trimmed, LPN}) - when is_integer(LPN) -> - true; postcondition(_S, {call, _, append, _}, V) -> - eqeq(V, todoTODO_fixit); + case V of + {ok, LPN} when is_integer(LPN) -> true; + {special_trimmed, LPN} when is_integer(LPN) -> true; + error_badepoch -> true; + _ -> eqeq(V, todoTODO_fixit) + end; postcondition(_S, {call, _, read_approx, _}, V) -> valid_read_result(V); postcondition(_S, {call, _, scan_forward, _}, V) -> @@ -736,10 +736,11 @@ append(#run{proj=OriginalProj}, Page) -> Proj = get_projection(OriginalProj), ?LOG({append, Page}, try - {Res, Proj2} = (catch corfurl_client:append_page(Proj, Page)), + {Res, Proj2} = corfurl_client:append_page(Proj, Page), put_projection(Proj2), perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) catch X:Y -> + io:format("APPEND ~p\n", [{error, append, X, Y, erlang:get_stacktrace()}]), {error, append, X, Y, erlang:get_stacktrace()} end). @@ -811,9 +812,7 @@ trim(#run{proj=OriginalProj}, SeedInt) -> LPN = pick_an_LPN(Proj, SeedInt), ?LOG({trim, LPN}, begin -io:format(user, "LPN = ~p\n", [LPN]), -io:format(user, "PROJ = ~p\n", [Proj]), - Res = (catch corfurl:trim_page(Proj, LPN)), + Res = corfurl:trim_page(Proj, LPN), perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN) end). diff --git a/prototype/corfurl/test/corfurl_test.erl b/prototype/corfurl/test/corfurl_test.erl index 0051471..45423eb 100644 --- a/prototype/corfurl/test/corfurl_test.erl +++ b/prototype/corfurl/test/corfurl_test.erl @@ -174,14 +174,11 @@ smoke_append_badepoch_test() -> [begin {{ok, LPN}, _} = corfurl_client:append_page(P1, Pg) end || {LPN, Pg} <- LPN_Pgs], [{ok, _} = corfurl_flu:seal(FLU, BigEpoch) || FLU <- FLUs], - [begin - {error_badepoch, _} = corfurl_client:append_page(P1, Pg) - end || {_LPN, Pg} <- LPN_Pgs], + {_LPN, Pg} = hd(LPN_Pgs), + {error_badepoch, _} = corfurl_client:append_page(P1, Pg), P2 = P1#proj{epoch=LittleEpoch}, - [begin - {error_badepoch, _} = corfurl_client:append_page(P2, Pg) - end || {_LPN, Pg} <- LPN_Pgs], + {error_badepoch, _} = corfurl_client:append_page(P2, Pg), ok after From 0b031bcf0afc29e1806779b482b9eb7242e207a6 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 11:52:33 +0900 Subject: [PATCH 56/70] Change polling constants for to deal with PULSE's evil --- prototype/corfurl/src/corfurl_client.erl | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 37805e7..41b8942 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -103,7 +103,8 @@ restart_sequencer(#proj{seq={OldSequencer, _SeqHost, SeqName}, end. poll_for_new_epoch_projection(P) -> - poll_for_new_epoch_projection(P, 25). + put(silly_poll_counter, 0), + poll_for_new_epoch_projection(P, get_poll_retries()). poll_for_new_epoch_projection(P, 0) -> %% TODO: The client that caused the seal may have crashed before @@ -120,7 +121,11 @@ poll_for_new_epoch_projection(#proj{dir=Dir, epoch=Epoch} = P, Tries) -> NewEpoch when NewEpoch > Epoch -> corfurl:read_projection(Dir, NewEpoch); _ -> - timer:sleep(50), + timer:sleep(get_poll_sleep_time()), + case put(silly_poll_counter, get(silly_poll_counter) + 1) div 10*1000 of + 0 -> io:format(user, "P", []); + _ -> ok + end, poll_for_new_epoch_projection(P, Tries - 1) end. @@ -155,3 +160,19 @@ report_lost_race(LPN, Reason) -> error_logger:debug_msg("LPN ~p race lost: ~p\n", [LPN, Reason]). -endif. % TEST + +-ifdef(PULSE). +get_poll_retries() -> + 9999*1000. + +get_poll_sleep_time() -> + 1. + +-else. +get_poll_retries() -> + 25. + +get_poll_sleep_time() -> + 50. + +-endif. From 8df5326b0cfc10d48761a3f052f65ca927a83191 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 12:09:25 +0900 Subject: [PATCH 57/70] Try to restart the sequencer only if it looks like nobody else has --- prototype/corfurl/src/corfurl_client.erl | 31 +++++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 41b8942..e3528de 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -74,7 +74,17 @@ append_page2(P, LPN, Page) -> %% Let it crash: error_unwritten end. -restart_sequencer(#proj{seq={OldSequencer, _SeqHost, SeqName}, +restart_sequencer(#proj{epoch=Epoch, dir=Dir} = P) -> + case corfurl:latest_projection_epoch_number(Dir) of + N when N > Epoch -> + %% Yay, someone else has intervened. Perhaps they've solved + %% our sequencer problem for us? + read_latest_projection(P); + _ -> + restart_sequencer2(P) + end. + +restart_sequencer2(#proj{seq={OldSequencer, _SeqHost, SeqName}, epoch=Epoch, r=Ranges} = P) -> spawn(fun() -> (catch corfurl_sequencer:stop(OldSequencer)) @@ -88,18 +98,10 @@ restart_sequencer(#proj{seq={OldSequencer, _SeqHost, SeqName}, [begin _Res = corfurl_flu:seal(FLU, Epoch) end || FLU <- lists:reverse(FLUs)], - case get(goo) of undefined -> put(goo, 0); _Q -> ok end, case corfurl_sequencer:start_link(FLUs, TODO_type, SeqName) of {ok, Pid} -> NewP = P#proj{seq={Pid, node(), SeqName}, epoch=Epoch+1}, save_projection_or_get_latest(NewP) - %% case put(goo, get(goo) + 1) of - %% N when N < 2 -> - %% io:format(user, "hiiiiiiiiiiiiiiiiiiiiiiiiiiiii", []), - %% P#proj{seq={Pid, node(), SeqName}, epoch=Epoch}; - %% _ -> - %% save_projection_or_get_latest(NewP) - %% end end. poll_for_new_epoch_projection(P) -> @@ -110,6 +112,8 @@ poll_for_new_epoch_projection(P, 0) -> %% TODO: The client that caused the seal may have crashed before %% writing a new projection. We should try to pick up here, %% write a new projection, and bully forward. + %% NOTE: When that new logic is added, the huge polling interval + %% that PULSE uses should be reduced to something tiny. case corfurl:latest_projection_epoch_number(P#proj.dir) of Neg when Neg < 0 -> error_badepoch; @@ -134,11 +138,14 @@ save_projection_or_get_latest(#proj{dir=Dir} = P) -> ok -> P; error_overwritten -> - NewEpoch = corfurl:latest_projection_epoch_number(Dir), - {ok, NewP} = corfurl:read_projection(Dir, NewEpoch), - NewP + read_latest_projection(P) end. +read_latest_projection(#proj{dir=Dir}) -> + NewEpoch = corfurl:latest_projection_epoch_number(Dir), + {ok, NewP} = corfurl:read_projection(Dir, NewEpoch), + NewP. + -ifdef(TEST). -ifdef(PULSE). report_lost_race(_LPN, _Reason) -> From 04f2105df08c32899d8dc927b1ffb4e84ad65ebe Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 12:29:49 +0900 Subject: [PATCH 58/70] Var renaming in corfurl_client:append_page() --- prototype/corfurl/src/corfurl_client.erl | 28 ++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index e3528de..78dafb3 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -20,7 +20,7 @@ -module(corfurl_client). --export([append_page/2]). +-export([append_page/2, read_page/2]). -export([restart_sequencer/1]). -include("corfurl.hrl"). @@ -28,21 +28,21 @@ -define(LONG_TIME, 5*1000). %% -define(LONG_TIME, 30*1000). -append_page(P, Page) -> - append_page(P, Page, 1). +append_page(Proj, Page) -> + append_page(Proj, Page, 1). -append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) +append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) when Retries < 50 -> try case corfurl_sequencer:get(Sequencer, 1) of {ok, LPN} -> - case append_page2(P, LPN, Page) of + case append_page2(Proj, LPN, Page) of lost_race -> - append_page(P, Page, Retries - 1); + append_page(Proj, Page, Retries - 1); error_badepoch -> case poll_for_new_epoch_projection(P) of {ok, NewP} -> - append_page(NewP, Page, Retries-1); + append_page(NewProj, Page, Retries-1); Else -> {Else, P} end; @@ -57,11 +57,11 @@ append_page(#proj{seq={Sequencer,_,_}} = P, Page, Retries) exit:Exit -> {failed, incomplete_code, Exit} end; -append_page(P, _Page, _Retries) -> - {error_badepoch, P}. +append_page(Proj, _Page, _Retries) -> + {error_badepoch, Proj}. -append_page2(P, LPN, Page) -> - case corfurl:write_page(P, LPN, Page) of +append_page2(Proj, LPN, Page) -> + case corfurl:write_page(Proj, LPN, Page) of ok -> {ok, LPN}; X when X == error_overwritten; X == error_trimmed -> @@ -74,6 +74,12 @@ append_page2(P, LPN, Page) -> %% Let it crash: error_unwritten end. +%% read_page(Proj, LPN) -> +%% case corfurl:read_page(Proj, + + +%%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% + restart_sequencer(#proj{epoch=Epoch, dir=Dir} = P) -> case corfurl:latest_projection_epoch_number(Dir) of N when N > Epoch -> From 1f0e43d33fb9f76c7d9fc0c6d306a0643a3b3f80 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 12:36:02 +0900 Subject: [PATCH 59/70] Fix dumb think-o in corfurl_client:append_page() retry counter --- prototype/corfurl/src/corfurl_client.erl | 38 +++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 78dafb3..2c5fc1d 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -20,7 +20,8 @@ -module(corfurl_client). --export([append_page/2, read_page/2]). +-export([append_page/2]). +%% -export([append_page/2, read_page/2]). -export([restart_sequencer/1]). -include("corfurl.hrl"). @@ -29,10 +30,11 @@ %% -define(LONG_TIME, 30*1000). append_page(Proj, Page) -> - append_page(Proj, Page, 1). + append_page(Proj, Page, 50). -append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) - when Retries < 50 -> +append_page(Proj, _Page, 0) -> + {error_failed, Proj}; +append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> try case corfurl_sequencer:get(Sequencer, 1) of {ok, LPN} -> @@ -40,25 +42,23 @@ append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) lost_race -> append_page(Proj, Page, Retries - 1); error_badepoch -> - case poll_for_new_epoch_projection(P) of - {ok, NewP} -> - append_page(NewProj, Page, Retries-1); + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + append_page(NewProj, Page, Retries - 1); Else -> - {Else, P} + {Else, Proj} end; Else -> - {Else, P} + {Else, Proj} end end catch exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} when Reason == noproc; Reason == normal -> - append_page(restart_sequencer(P), Page, Retries); + append_page(restart_sequencer(Proj), Page, Retries); exit:Exit -> {failed, incomplete_code, Exit} - end; -append_page(Proj, _Page, _Retries) -> - {error_badepoch, Proj}. + end. append_page2(Proj, LPN, Page) -> case corfurl:write_page(Proj, LPN, Page) of @@ -74,8 +74,18 @@ append_page2(Proj, LPN, Page) -> %% Let it crash: error_unwritten end. +%% read_page(Proj, Page) -> +%% read_page(Proj, Page, 10). + %% read_page(Proj, LPN) -> -%% case corfurl:read_page(Proj, +%% case corfurl:read_page(Proj, LPN) of +%% error_badepoch -> +%% case poll_for_new_epoch_projection(P) of +%% {ok, NewP} -> +%% read_page(NewProj, Page); +%% Else -> +%% {Else, P} + %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% From 7ac1e7f178c174fc1b2a722f95110e4b98fecca8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 12:51:24 +0900 Subject: [PATCH 60/70] Add retry loop for read_page/2, fill_page/2, trim_page/2 --- prototype/corfurl/src/corfurl_client.erl | 43 ++++++++++++++---------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 2c5fc1d..8ec895d 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -20,8 +20,7 @@ -module(corfurl_client). --export([append_page/2]). -%% -export([append_page/2, read_page/2]). +-export([append_page/2, read_page/2, fill_page/2, trim_page/2]). -export([restart_sequencer/1]). -include("corfurl.hrl"). @@ -30,10 +29,10 @@ %% -define(LONG_TIME, 30*1000). append_page(Proj, Page) -> - append_page(Proj, Page, 50). + append_page(Proj, Page, 5). append_page(Proj, _Page, 0) -> - {error_failed, Proj}; + {{error_failed, ?MODULE, ?LINE}, Proj}; append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> try case corfurl_sequencer:get(Sequencer, 1) of @@ -57,7 +56,7 @@ append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> when Reason == noproc; Reason == normal -> append_page(restart_sequencer(Proj), Page, Retries); exit:Exit -> - {failed, incomplete_code, Exit} + {{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit} end. append_page2(Proj, LPN, Page) -> @@ -74,22 +73,32 @@ append_page2(Proj, LPN, Page) -> %% Let it crash: error_unwritten end. -%% read_page(Proj, Page) -> -%% read_page(Proj, Page, 10). +read_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:read_page(P, LPN) end, 10). -%% read_page(Proj, LPN) -> -%% case corfurl:read_page(Proj, LPN) of -%% error_badepoch -> -%% case poll_for_new_epoch_projection(P) of -%% {ok, NewP} -> -%% read_page(NewProj, Page); -%% Else -> -%% {Else, P} - - +trim_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:trim_page(P, LPN) end, 10). + +fill_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:fill_page(P, LPN) end, 10). %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% +retry_loop(Proj, _Fun, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +retry_loop(Proj, Fun, Retries) -> + case Fun(Proj) of + error_badepoch -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + retry_loop(NewProj, Fun, Retries - 1); + _Else -> + {{error_failed, ?MODULE, ?LINE}, Proj} + end; + Else -> + {Else, Proj} + end. + restart_sequencer(#proj{epoch=Epoch, dir=Dir} = P) -> case corfurl:latest_projection_epoch_number(Dir) of N when N > Epoch -> From 40c28b79bbf9701a637161dd83933db8c060efc7 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 14:36:07 +0900 Subject: [PATCH 61/70] PULSE test now uses corfurl_client (retry logic) for all ops --- prototype/corfurl/src/corfurl_client.erl | 25 +++++++++-- prototype/corfurl/test/corfurl_pulse.erl | 54 +++++++++++++++--------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 8ec895d..2edac44 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -20,7 +20,7 @@ -module(corfurl_client). --export([append_page/2, read_page/2, fill_page/2, trim_page/2]). +-export([append_page/2, read_page/2, fill_page/2, trim_page/2, scan_forward/3]). -export([restart_sequencer/1]). -include("corfurl.hrl"). @@ -76,11 +76,30 @@ append_page2(Proj, LPN, Page) -> read_page(Proj, LPN) -> retry_loop(Proj, fun(P) -> corfurl:read_page(P, LPN) end, 10). +fill_page(Proj, LPN) -> + retry_loop(Proj, fun(P) -> corfurl:fill_page(P, LPN) end, 10). + trim_page(Proj, LPN) -> retry_loop(Proj, fun(P) -> corfurl:trim_page(P, LPN) end, 10). -fill_page(Proj, LPN) -> - retry_loop(Proj, fun(P) -> corfurl:fill_page(P, LPN) end, 10). +scan_forward(Proj, LPN, MaxPages) -> + %% This is fiddly stuff that I'll get 0.7% wrong if I try to be clever. + %% So, do something simple and (I hope) obviously correct. + %% TODO: do something "smarter". + case corfurl:scan_forward(Proj, LPN, MaxPages) of + {error_badepoch, _LPN2, _MoreP, _Pages} = Res -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + {Res, NewProj}; + _Else -> + %% TODO: What is the risk of getting caught in a situation + %% where we can never make any forward progress when pages + %% really are being written? + {Res, Proj} + end; + Res -> + {Res, Proj} + end. %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% %%%%% diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index f6ae41e..48491a7 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -118,14 +118,14 @@ command(#state{run=Run} = S) -> || not S#state.is_setup] ++ [{50, {call, ?MODULE, append, [Run, gen_page(PageSize)]}} || S#state.is_setup] ++ - %% [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} - %% || S#state.is_setup] ++ - %% [{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} - %% || S#state.is_setup] ++ - %% [{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}} - %% || S#state.is_setup] ++ - %% [{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}} - %% || S#state.is_setup] ++ + [{15, {call, ?MODULE, read_approx, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{15, {call, ?MODULE, scan_forward, [Run, gen_scan_forward_start(), nat()]}} + || S#state.is_setup] ++ + [{12, {call, ?MODULE, fill, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ + [{12, {call, ?MODULE, trim, [Run, gen_approx_page()]}} + || S#state.is_setup] ++ [{10, {call, ?MODULE, stop_sequencer, [Run, gen_stop_method()]}} || S#state.is_setup] ++ [])). @@ -740,8 +740,7 @@ append(#run{proj=OriginalProj}, Page) -> put_projection(Proj2), perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) catch X:Y -> - io:format("APPEND ~p\n", [{error, append, X, Y, erlang:get_stacktrace()}]), - {error, append, X, Y, erlang:get_stacktrace()} + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} end). read_result_mangle({ok, Page}) -> @@ -755,10 +754,13 @@ read_approx(#run{proj=OriginalProj}, SeedInt) -> Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({read, LPN}, - begin - Res = read_result_mangle(corfurl:read_page(Proj, LPN)), - put_projection(Proj), - perhaps_trip_read_approx(?TRIP_bad_read, Res, LPN) + try + {Res, Proj2} = corfurl_client:read_page(Proj, LPN), + put_projection(Proj2), + Res2 = read_result_mangle(Res), + perhaps_trip_read_approx(?TRIP_bad_read, Res2, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} end). scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) -> @@ -773,10 +775,12 @@ scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) -> %% it appear as if each LPN result that scan_forward() gives us came %% instead from a single-page read_page() call. ?LOG({scan_forward, StartLPN, NumPages}, - begin + try TS1 = lamport_clock:get(), - case corfurl:scan_forward(Proj, StartLPN, NumPages) of - {ok, EndLPN, MoreP, Pages} -> + case corfurl_client:scan_forward(Proj, StartLPN, NumPages) of + {{Res, EndLPN, MoreP, Pages}, Proj2} + when Res == ok; Res == error_badepoch -> + put_projection(Proj2), PageIs = lists:zip(Pages, lists:seq(1, length(Pages))), TS2 = lamport_clock:get(), [begin @@ -792,6 +796,8 @@ scan_forward(#run{proj=OriginalProj}, SeedInt, NumPages) -> {LPN, P} <- Pages], {ok, EndLPN, MoreP, Ps} end + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} end). fill(#run{proj=OriginalProj}, SeedInt) -> @@ -800,9 +806,12 @@ fill(#run{proj=OriginalProj}, SeedInt) -> Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({fill, LPN}, - begin - Res = corfurl:fill_page(Proj, LPN), + try + {Res, Proj2} = corfurl_client:fill_page(Proj, LPN), + put_projection(Proj2), perhaps_trip_fill_page(?TRIP_bad_fill, Res, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} end). trim(#run{proj=OriginalProj}, SeedInt) -> @@ -811,9 +820,12 @@ trim(#run{proj=OriginalProj}, SeedInt) -> Proj = get_projection(OriginalProj), LPN = pick_an_LPN(Proj, SeedInt), ?LOG({trim, LPN}, - begin - Res = corfurl:trim_page(Proj, LPN), + try + {Res, Proj2} = corfurl_client:trim_page(Proj, LPN), + put_projection(Proj2), perhaps_trip_trim_page(?TRIP_bad_trim, Res, LPN) + catch X:Y -> + {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} end). stop_sequencer(#run{proj=OriginalProj}, Method) -> From 6858041c7dda5e7fac505e2c54bc07138d138c10 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 14:59:26 +0900 Subject: [PATCH 62/70] See comments added by this commit for append_page() bug found, racing with epoch change --- prototype/corfurl/src/corfurl_client.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index 2edac44..c10d691 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -18,6 +18,11 @@ %% %% ------------------------------------------------------------------- +%% TODO: fix this failure case with append_page(Bin) -> {ok, 2}, +%% but read_page(LPN=1) can read Bin, and model believes that +%% LPN=1 was never written +%% Ca = [{2,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,stop_sequencer,[{var,1},kill]}},{set,{var,4},{call,corfurl_pulse,trim,[{var,1},386862782]}},{set,{var,5},{call,corfurl_pulse,scan_forward,[{var,1},5412106233,1]}}]]},{24672,25300,90107}},[{events,[[{no_bad_reads,[]}]]}]]. + -module(corfurl_client). -export([append_page/2, read_page/2, fill_page/2, trim_page/2, scan_forward/3]). From b7b9255f5f7a855d7ef867347bf004108a212491 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 27 Feb 2014 15:18:22 +0900 Subject: [PATCH 63/70] Partial fix for bug in last commit, but not good enough --- prototype/corfurl/src/corfurl_client.erl | 40 +++++++++++------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index c10d691..d15ce9b 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -18,11 +18,6 @@ %% %% ------------------------------------------------------------------- -%% TODO: fix this failure case with append_page(Bin) -> {ok, 2}, -%% but read_page(LPN=1) can read Bin, and model believes that -%% LPN=1 was never written -%% Ca = [{2,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[2,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,stop_sequencer,[{var,1},kill]}},{set,{var,4},{call,corfurl_pulse,trim,[{var,1},386862782]}},{set,{var,5},{call,corfurl_pulse,scan_forward,[{var,1},5412106233,1]}}]]},{24672,25300,90107}},[{events,[[{no_bad_reads,[]}]]}]]. - -module(corfurl_client). -export([append_page/2, read_page/2, fill_page/2, trim_page/2, scan_forward/3]). @@ -40,22 +35,8 @@ append_page(Proj, _Page, 0) -> {{error_failed, ?MODULE, ?LINE}, Proj}; append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> try - case corfurl_sequencer:get(Sequencer, 1) of - {ok, LPN} -> - case append_page2(Proj, LPN, Page) of - lost_race -> - append_page(Proj, Page, Retries - 1); - error_badepoch -> - case poll_for_new_epoch_projection(Proj) of - {ok, NewProj} -> - append_page(NewProj, Page, Retries - 1); - Else -> - {Else, Proj} - end; - Else -> - {Else, Proj} - end - end + {ok, LPN} = corfurl_sequencer:get(Sequencer, 1), + append_page1(Proj, LPN, Page, 5) catch exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} when Reason == noproc; Reason == normal -> @@ -64,6 +45,23 @@ append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> {{error_failed, ?MODULE, ?LINE}, incomplete_code, Exit} end. +append_page1(Proj, _LPN, _Page, 0) -> + {{error_failed, ?MODULE, ?LINE}, Proj}; +append_page1(Proj, LPN, Page, Retries) -> + case append_page2(Proj, LPN, Page) of + lost_race -> + append_page(Proj, Page, Retries - 1); + error_badepoch -> + case poll_for_new_epoch_projection(Proj) of + {ok, NewProj} -> + append_page1(NewProj, LPN, Page, Retries - 1); + Else -> + {Else, Proj} + end; + Else -> + {Else, Proj} + end. + append_page2(Proj, LPN, Page) -> case corfurl:write_page(Proj, LPN, Page) of ok -> From 8b105672b1d03a216db5e95a18a1fdee15179885 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sat, 1 Mar 2014 20:26:11 +0900 Subject: [PATCH 64/70] Bugfix for read-repair (thanks PULSE), model change to handle handle aborted writes --- prototype/corfurl/src/corfurl.erl | 21 +++++- prototype/corfurl/src/corfurl_client.erl | 33 +++++++++- prototype/corfurl/test/corfurl_pulse.erl | 84 ++++++++++++++++++++---- 3 files changed, 120 insertions(+), 18 deletions(-) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index 71ae149..cfb8f82 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -129,7 +129,15 @@ ok_or_trim(error_trimmed) -> ok_or_trim(Else) -> Else. -read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> +read_repair_chain(Epoch, LPN, Chain) -> + try + read_repair_chain1(Epoch, LPN, Chain) + catch + throw:{i_give_up,Res} -> + Res + end. + +read_repair_chain1(Epoch, LPN, [Head|Rest] = Chain) -> ?EVENT_LOG({read_repair, LPN, Chain, i_am, self()}), case corfurl_flu:read(flu_pid(Head), Epoch, LPN) of {ok, Page} -> @@ -153,10 +161,17 @@ read_repair_chain(Epoch, LPN, [Head|Rest] = Chain) -> Res2 = ok_or_trim(corfurl_flu:trim( flu_pid(X), Epoch, LPN)), ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), trim, Res2}), - Res2; + case Res2 of ok -> ok; + _ -> throw({i_give_up,Res2}) + end; Else -> + %% We're too deeply nested for the current code + %% to deal with, and we're racing. Fine, let + %% our opponent continue. We'll give up, and if + %% the client wants to try again, we can try + %% again from the top. ?EVENT_LOG({read_repair, LPN, fill, flu_pid(X), Else}), - Else + throw({i_give_up,Else}) end end || X <- Rest], error_trimmed; diff --git a/prototype/corfurl/src/corfurl_client.erl b/prototype/corfurl/src/corfurl_client.erl index d15ce9b..b875f07 100644 --- a/prototype/corfurl/src/corfurl_client.erl +++ b/prototype/corfurl/src/corfurl_client.erl @@ -22,6 +22,8 @@ -export([append_page/2, read_page/2, fill_page/2, trim_page/2, scan_forward/3]). -export([restart_sequencer/1]). +%% For debugging/verification only +-export([pulse_tracing_start/1, pulse_tracing_add/2, pulse_tracing_get/1]). -include("corfurl.hrl"). @@ -36,6 +38,7 @@ append_page(Proj, _Page, 0) -> append_page(#proj{seq={Sequencer,_,_}} = Proj, Page, Retries) -> try {ok, LPN} = corfurl_sequencer:get(Sequencer, 1), + pulse_tracing_add(write, LPN), append_page1(Proj, LPN, Page, 5) catch exit:{Reason,{_gen_server_or_pulse_gen_server,call,[Sequencer|_]}} @@ -217,7 +220,7 @@ report_lost_race(LPN, Reason) -> -ifdef(PULSE). get_poll_retries() -> - 9999*1000. + 999*1000. get_poll_sleep_time() -> 1. @@ -230,3 +233,31 @@ get_poll_sleep_time() -> 50. -endif. + +-ifdef(PULSE). + +pulse_tracing_start(Type) -> + put({?MODULE, Type}, []). + +pulse_tracing_add(Type, Stuff) -> + List = case pulse_tracing_get(Type) of + undefined -> []; + L -> L + end, + put({?MODULE, Type}, [Stuff|List]). + +pulse_tracing_get(Type) -> + get({?MODULE, Type}). + +-else. + +pulse_tracing_start(_Type) -> + ok. + +pulse_tracing_add(_Type, _Stuff) -> + ok. + +pulse_tracing_get(_Type) -> + ok. + +-endif. diff --git a/prototype/corfurl/test/corfurl_pulse.erl b/prototype/corfurl/test/corfurl_pulse.erl index 48491a7..d00c2d7 100644 --- a/prototype/corfurl/test/corfurl_pulse.erl +++ b/prototype/corfurl/test/corfurl_pulse.erl @@ -337,7 +337,8 @@ check_trace(Trace0, _Cmds, _Seed) -> ({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN; ({call, _Pid, {read, LPN, _, _}}) -> LPN; ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN; - ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN + ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN; + ({call, _Pid, {goo_write, LPN, _Pg}}) -> LPN end, fun(x) -> [] end, Calls), @@ -405,8 +406,7 @@ check_trace(Trace0, _Cmds, _Seed) -> InitialValDict = orddict:from_list([{LPN, [error_unwritten]} || LPN <- AllLPNs]), - {ValuesR, _} = - lists:mapfoldl( + ValuesRFun = fun({TS1, TS2, StEnds}, Dict1) -> Dict2 = lists:foldl( fun({mod_start, w_1, LPN, Pg}, D) -> @@ -440,7 +440,8 @@ check_trace(Trace0, _Cmds, _Seed) -> orddict:store(LPN, [Pg,error_trimmed], D) end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]), {{TS1, TS2, [{values, Dict3}]}, Dict3} - end, InitialValDict, StartsDones), + end, + {ValuesR, _} = lists:mapfoldl(ValuesRFun, InitialValDict, StartsDones), InitialTtnDict = orddict:from_list([{LPN, [w_0]} || LPN <- AllLPNs]), {TransitionsR, _} = @@ -469,6 +470,14 @@ check_trace(Trace0, _Cmds, _Seed) -> %% Instead, we need to merge together all possible values from ValuesR %% that appear at any time during the read op's lifetime. + PerhapsR = eqc_temporal:stateful( + fun({call, _Pid, {goo_write, LPN, Pg}}) -> + {perhaps, LPN, Pg} + end, + fun(x)-> [] end, + Events), + {_, _, Perhaps} = lists:last(eqc_temporal:all_future(PerhapsR)), + %%?QC_FMT("*Perhaps: ~p\n", [Perhaps]), Reads = eqc_temporal:stateful( fun({call, Pid, {read, LPN, _, _}}) -> {read, Pid, LPN, []} @@ -483,10 +492,27 @@ check_trace(Trace0, _Cmds, _Seed) -> false = NewVs == V1s, {read, Pid, LPN, NewVs}; ({read, Pid, LPN, Vs}, {result, Pid, Pg}) -> + %% case lists:member(Pg, Vs) orelse + %% lists:member({perhaps, LPN, Pg}, Perhaps) of case lists:member(Pg, Vs) of - true -> []; - false -> [{bad, read, LPN, Pid, got, Pg, - possible, Vs}] + true -> + []; + false -> + case lists:member({perhaps, LPN, Pg}, Perhaps) of + true -> + %% The checking of the Perhaps list in + %% this manner is not strictly + %% temporally valid. It is possible + %% for the {perhaps,...} event to be + %% after the event we're checking here. + %% TODO work is to make this check 100% + %% temporally valid. + io:format(user, "Yo, found ~p ~p in Perhaps\n", [LPN, Pg]), + []; + false -> + [{bad, read, LPN, Pid, got, Pg, + possible, Vs}] + end end end, eqc_temporal:union(Events, ValuesR)), BadFilter = fun(bad) -> true; @@ -518,7 +544,8 @@ check_trace(Trace0, _Cmds, _Seed) -> ?QC_FMT("*InvalidTtns: ~p\n", [InvalidTransitions]), ?QC_FMT("*ValuesR: ~p\n", [eqc_temporal:unions([ValuesR, StartsDones])]), ?QC_FMT("*Calls: ~p\n", [Calls]), - ?QC_FMT("*BadReads: ~p\n", [BadReads]) + ?QC_FMT("*BadReads: ~p\n", [BadReads]), + ?QC_FMT("*Perhaps: ~p\n", [Perhaps]) end, conjunction( [ @@ -724,23 +751,52 @@ pick_an_LPN(#proj{seq={Seq,_,_}} = P, SeedInt) -> pick_an_LPN(corfurl_client:restart_sequencer(P), SeedInt) end. --define(LOG(Tag, MkCall), - event_logger:event(log_make_call(Tag), lamport_clock:get()), - LOG__Result = MkCall, - event_logger:event(log_make_result(LOG__Result), lamport_clock:get()), - LOG__Result). +-define(LOG3(Tag, MkCall, PostCall), + begin + LOG__Start = lamport_clock:get(), + event_logger:event(log_make_call(Tag), LOG__Start), + LOG__Result = MkCall, + LOG__End = lamport_clock:get(), + PostCall, + event_logger:event(log_make_result(LOG__Result), LOG__End), + LOG__Result + end). + +-define(LOG(Tag, MkCall), ?LOG3(Tag, MkCall, okqq)). append(#run{proj=OriginalProj}, Page) -> lamport_clock:init(), lamport_clock:incr(), Proj = get_projection(OriginalProj), - ?LOG({append, Page}, + ?LOG3({append, Page}, try + corfurl_client:pulse_tracing_start(write), {Res, Proj2} = corfurl_client:append_page(Proj, Page), put_projection(Proj2), + OtherPages0 = lists:usort(corfurl_client:pulse_tracing_get(write)), + OtherPages = case Res of + {ok, LPN} -> + OtherPages0 -- [LPN]; + _ -> + OtherPages0 + end, + put(zzzOtherPages, OtherPages), perhaps_trip_append_page(?TRIP_no_append_duplicates, Res, Page) catch X:Y -> {caught, ?MODULE, ?LINE, X, Y, erlang:get_stacktrace()} + end, + try + OPages = get(zzzOtherPages), + %%if OPages /= [] -> io:format("OPages = ~w\n", [OPages]); true -> ok end, + GooPid = {self(), goo, now()}, + [begin + event_logger:event(log_make_call(GooPid, {goo_write, OP, Page}), + LOG__Start), + event_logger:event(log_make_result(GooPid, who_knows), + LOG__End) + end || OP <- OPages] + catch XX:YY -> + exit({oops, ?MODULE, ?LINE, XX, YY, erlang:get_stacktrace()}) end). read_result_mangle({ok, Page}) -> From c9764bf5f6d6f5a250f1f1d35177d2cd911677bf Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Sat, 1 Mar 2014 20:33:13 +0900 Subject: [PATCH 65/70] Add new docs/corfurl/notes/README.md stuff and also: Add CORFU papers section Merge corfurl.md and CONCEPTS.md Add one more CORFU-related paper Delete prototype/corfurl/docs/CONCEPTS.md --- prototype/corfurl/README.md | 17 ++++ prototype/corfurl/docs/corfurl.md | 85 +++++++++++++++++++ ...02-27.chain-repair-need-write-twice.mscgen | 35 ++++++++ .../corfurl/docs/corfurl/notes/README.md | 71 +++++++++++++++- .../corfurl/notes/two-clients-race.1.mscgen | 33 +++++++ 5 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 prototype/corfurl/README.md create mode 100644 prototype/corfurl/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen create mode 100644 prototype/corfurl/docs/corfurl/notes/two-clients-race.1.mscgen diff --git a/prototype/corfurl/README.md b/prototype/corfurl/README.md new file mode 100644 index 0000000..95f10aa --- /dev/null +++ b/prototype/corfurl/README.md @@ -0,0 +1,17 @@ +This is a repo that has other stuff that Greg Burd was noodling +around with wrt distributed indexing. I haven't bothered weeding +any of it out, sorry! + +The corfurl code is in the 'src' and 'include' directories. In +addition, there are docs here: + +https://github.com/basho/corfurl/blob/master/docs/corfurl.md + +This is a README-style collection of CORFU-related papers, +building instructions, and testing instructions. + +https://github.com/basho/corfurl/tree/master/docs/corfurl/notes +https://github.com/basho/corfurl/tree/master/docs/corfurl/notes#two-clients-try-to-write-the-exact-same-data-at-the-same-time-to-the-same-lpn + +The above are some notes about testing problems & solutions that +I was/am/?? hoping might find their way into a paper someday. diff --git a/prototype/corfurl/docs/corfurl.md b/prototype/corfurl/docs/corfurl.md index fd02134..08960dc 100644 --- a/prototype/corfurl/docs/corfurl.md +++ b/prototype/corfurl/docs/corfurl.md @@ -1,3 +1,88 @@ +## CORFU papers + +I recommend the "5 pages" paper below first, to give a flavor of +what the CORFU is about. When Scott first read the CORFU paper +back in 2011 (and the Hyder paper), he thought it was insanity. +He recommends waiting before judging quite so hastily. :-) + +After that, then perhaps take a step back are skim over the +Hyder paper. Hyder started before CORFU, but since CORFU, the +Hyder folks at Microsoft have rewritten Hyder to use CORFU as +the shared log underneath it. But the Hyder paper has lots of +interesting bits about how you'd go about creating a distributed +DB where the transaction log *is* the DB. + +### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log" + +MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN +PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI, +University of California, San Diego, TED WOBBER, Microsoft Research +Silicon Valley + +Long version of introduction to CORFU (~30 pages) +http://www.snookles.com/scottmp/corfu/corfu.a10-balakrishnan.pdf + +### "CORFU: A Shared Log Design for Flash Clusters" + +Same authors as above + +Short version of introduction to CORFU paper above (~12 pages) + +http://www.snookles.com/scottmp/corfu/corfu-shared-log-design.nsdi12-final30.pdf + +### "From Paxos to CORFU: A Flash-Speed Shared Log" + +Same authors as above + +5 pages, a short summary of CORFU basics and some trial applications +that have been implemented on top of it. + +http://www.snookles.com/scottmp/corfu/paxos-to-corfu.malki-acmstyle.pdf + +### "Beyond Block I/O: Implementing a Distributed Shared Log in Hardware" + +Wei, Davis, Wobber, Balakrishnan, Malkhi + +Summary report of implmementing the CORFU server-side in +FPGA-style hardware. (~11 pages) + +http://www.snookles.com/scottmp/corfu/beyond-block-io.CameraReady.pdf + +### "Tango: Distributed Data Structures over a Shared Log" + +Balakrishnan, Malkhi, Wobber, Wu, Brabhakaran, Wei, Davis, Rao, Zou, Zuck + +Describes a framework for developing data structures that reside +persistently within a CORFU log: the log *is* the database/data +structure store. + +http://www.snookles.com/scottmp/corfu/Tango.pdf + +### "Dynamically Scalable, Fault-Tolerant Coordination on a Shared Logging Service" + +Wei, Balakrishnan, Davis, Malkhi, Prabhakaran, Wobber + +The ZooKeeper inter-server communication is replaced with CORFU. +Faster, fewer lines of code than ZK, and more features than the +original ZK code base. + +http://www.snookles.com/scottmp/corfu/zookeeper-techreport.pdf + +### "Hyder – A Transactional Record Manager for Shared Flash" + +Bernstein, Reid, Das + +Describes a distributed log-based DB system where the txn log is +treated quite oddly: a "txn intent" record is written to a +shared common log All participants read the shared log in +parallel and make commit/abort decisions in parallel, based on +what conflicts (or not) that they see in the log. Scott's first +reading was "No way, wacky" ... and has since changed his mind. + +http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf +pages 9-20 + + ## Fiddling with PULSE diff --git a/prototype/corfurl/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen b/prototype/corfurl/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen new file mode 100644 index 0000000..3e01ac1 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/2014-02-27.chain-repair-need-write-twice.mscgen @@ -0,0 +1,35 @@ +msc { + client1, FLU1, FLU2, client2, client3; + + client1 box client3 [label="Epoch #1: chain = FLU1 -> FLU2"]; + client1 -> FLU1 [label="{write,epoch1,<>}"]; + client1 <- FLU1 [label="ok"]; + client1 box client1 [label="Client crash", textcolour="red"]; + + FLU1 box FLU1 [label="FLU crash", textcolour="red"]; + + client1 box client3 [label="Epoch #2: chain = FLU2"]; + + client2 -> FLU2 [label="{write,epoch2,<>}"]; + client2 <- FLU2 [label="ok"]; + + client3 box client3 [label="Read repair starts", textbgcolour="aqua"]; + + client3 -> FLU2 [label="{read,epoch2}"]; + client3 <- FLU2 [label="{ok,<>}"]; + client3 -> FLU1 [label="{write,epoch2,<>}"]; + FLU1 box FLU1 [label="What do we do here? Our current value is <>.", textcolour="red"] ; + FLU1 box FLU1 [label="If we do not accept the repair value, then we are effectively UNREPAIRABLE.", textcolour="red"] ; + FLU1 box FLU1 [label="If we do accept the repair value, then we are mutating an already-written value.", textcolour="red"] ; + FLU1 -> client3 [label="I'm sorry, Dave, I cannot do that."]; + + FLU1 box FLU1 [label = "In theory, while repair is still happening, nobody will ever ask FLU1 for its value.", textcolour="black"] ; + + client3 -> FLU1 [label="{write,epoch2,<>,repair,witnesses=[FLU2]}", textbgcolour="silver"]; + FLU1 box FLU1 [label="Start an async process to ask the witness list to corroborate this repair."]; + FLU1 -> FLU2 [label="{read,epoch2}", textbgcolour="aqua"]; + FLU1 <- FLU2 [label="{ok,<>}", textbgcolour="aqua"]; + FLU1 box FLU1 [label="Overwrite local storage with repair page.", textbgcolour="silver"]; + client3 <- FLU1 [label="Async proc replies: ok", textbgcolour="silver"]; + +} diff --git a/prototype/corfurl/docs/corfurl/notes/README.md b/prototype/corfurl/docs/corfurl/notes/README.md index 337a34b..b5757aa 100644 --- a/prototype/corfurl/docs/corfurl/notes/README.md +++ b/prototype/corfurl/docs/corfurl/notes/README.md @@ -20,4 +20,73 @@ substantially to make it clearer what is happening. Also for commit 087c2605ab. I believe that I have a fix for the silver-colored -`error-overwritten`, but the correctness of it remains to be seen. +`error-overwritten` ... and it was indeed added to the code soon +afterward, but it turns out that it doesn't solve the entire problem +of "two clients try to write the exact same data at the same time to +the same LPN". + + +## "Two Clients Try to Write the Exact Same Data at the Same Time to the Same LPN" + +This situation is something that CORFU cannot protect against, IMO. + +I have been struggling for a while, to try to find a way for CORFU +clients to know *always* when there is a conflict with another +writer. It usually works: the basic nature of write-once registers is +very powerful. However, in the case where two clients are trying to +write the same page data to the same LPN, it looks impossible to +resolve. + +How do you tell the difference between: + +1. A race between a client A writing page P at address LPN and + read-repair fixing P. P *is* A's data and no other's, so this race + doesn't confuse anyone. + +1. A race between a client A writing page P at address LPN and client + B writing the exact same page data P at the same LPN. + A's page P = B's page P, but clients A & B don't know that. + + If CORFU tells both A & B that they were successful, A & B assume + that the CORFU log has two new pages appended to it, but in truth + only one new page was appended. + +If we try to solve this by always avoiding the same LPN address +conflict, we are deluding ourselves. If we assume that the sequencer +is 100% correct in that it never assigns the same LPN twice, and if we +assume that a client must never write a block without an assignment +from the sequencer, then the problem is solved. But the problem has a +_heavy_ price: the log is only available when the sequencer is +available, and only when never more than one sequencer running at a +time. + +The CORFU base system promises correct operation, even if: + +* Zero sequencers are running, and clients might choose the same LPN + to write to. +* Two more more sequencers are running, and different sequencers + assign the same LPN to two different clients. + +But CORFU's "correct" behavior does not include detecting the same +page at the same LPN. The papers don't specifically say it, alas. +But IMO it's impossible to guarantee, so all docs ought to explicitly +say that it's impossible and that clients must not assume it. + +See also +* two-clients-race.1.png + +## A scenario of chain repair & write-once registers + +See: +* 2014-02-27.chain-repair-write-twice.png + +... for a scenario where write-once registers that are truly only +write-once-ever-for-the-rest-of-the-future are "inconvenient" when it +comes to chain repair. Client 3 is attempting to do chain repair ops, +bringing FLU1 back into sync with FLU2. + +The diagram proposes one possible idea for making overwriting a +read-once register a bit safer: ask another node in the chain to +verify that the page you've been asked to repair is exactly the same +as that other FLU's page. + diff --git a/prototype/corfurl/docs/corfurl/notes/two-clients-race.1.mscgen b/prototype/corfurl/docs/corfurl/notes/two-clients-race.1.mscgen new file mode 100644 index 0000000..ce8e614 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/two-clients-race.1.mscgen @@ -0,0 +1,33 @@ +msc { + client1, FLU1, FLU2, client2, client3; + + client1 -> FLU1 [label="{write,epoch1,<>}"]; + client1 <- FLU1 [label="ok"]; + + client3 -> FLU2 [label="{seal,epoch1}"]; + client3 <- FLU2 [label="{ok,...}"]; + client3 -> FLU1 [label="{seal,epoch1}"]; + client3 <- FLU1 [label="{ok,...}"]; + + client2 -> FLU1 [label="{write,epoch1,<>}"]; + client2 <- FLU1 [label="error_epoch"]; + client2 abox client2 [label="Ok, get the new epoch info....", textbgcolour="silver"]; + client2 -> FLU1 [label="{write,epoch2,<>}"]; + client2 <- FLU1 [label="error_overwritten"]; + + client1 -> FLU2 [label="{write,epoch1,<>}"]; + client1 <- FLU2 [label="error_epoch"]; + client1 abox client1 [label="Ok, hrm.", textbgcolour="silver"]; + + client3 abox client3 [ label = "Start read repair", textbgcolour="aqua"] ; + client3 -> FLU1 [label="{read,epoch2}"]; + client3 <- FLU1 [label="{ok,<>}"]; + client3 -> FLU2 [label="{write,epoch2,<>}"]; + client3 <- FLU2 [label="ok"]; + client3 abox client3 [ label = "End read repair", textbgcolour="aqua"] ; + client3 abox client3 [ label = "We saw <>", textbgcolour="silver"] ; + + client1 -> FLU2 [label="{write,epoch2,<>}"]; + client1 <- FLU2 [label="error_overwritten"]; + +} From 305cf34a2d95cd55e96a135b0605dea2225333a3 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 5 May 2014 22:09:03 +0900 Subject: [PATCH 66/70] Move old README.md -> README.old.md, create new README.md --- prototype/corfurl/README.old.md | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 prototype/corfurl/README.old.md diff --git a/prototype/corfurl/README.old.md b/prototype/corfurl/README.old.md new file mode 100644 index 0000000..0286e08 --- /dev/null +++ b/prototype/corfurl/README.old.md @@ -0,0 +1,52 @@ +Shiny prototype/sandbox of distributed DBMS concepts. + +Compile: + + ./rebar get-deps compile + +Starting: + + erl -pa ebin deps/*/ebin -s dbms + +Things to try: + +Prettyprint a record: + + dbms:print_record(). + +Start some load: + + dbms:loadgen(). + +Show all the debug messages: + + lager:set_loglevel(lager_console_backend, debug). + +Reset the loglevel because your console is flooding: + + lager:set_loglevel(lager_console_backend, info). + +Show all the read-repair operations: + + lager:trace_console([{operation, repair}]). + +Reset the traces: + + lager:clear_all_traces(). + +Show all the PUTs against vnode 13: + + lager:trace_console([{operation, put}, {id, 13}]). + +Show all the keys being read between 500 and 600: + + lager:trace_console([{operation, get}, {key, '>', 500}, {key, '<', 600}]). + +Trace specific GET/PUT operstions: + + dbms:traced_get(Key). + dbms:traced_put(Key). + +Manually erase a vnode: + + dbms_dynamo:wipe(VnodeID). From edd5b625631bf62125e6f325c86204540512bad9 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 2 Mar 2015 18:09:56 +0900 Subject: [PATCH 67/70] del prototype/corfurl/README.old.md --- prototype/corfurl/README.old.md | 52 --------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 prototype/corfurl/README.old.md diff --git a/prototype/corfurl/README.old.md b/prototype/corfurl/README.old.md deleted file mode 100644 index 0286e08..0000000 --- a/prototype/corfurl/README.old.md +++ /dev/null @@ -1,52 +0,0 @@ -Shiny prototype/sandbox of distributed DBMS concepts. - -Compile: - - ./rebar get-deps compile - -Starting: - - erl -pa ebin deps/*/ebin -s dbms - -Things to try: - -Prettyprint a record: - - dbms:print_record(). - -Start some load: - - dbms:loadgen(). - -Show all the debug messages: - - lager:set_loglevel(lager_console_backend, debug). - -Reset the loglevel because your console is flooding: - - lager:set_loglevel(lager_console_backend, info). - -Show all the read-repair operations: - - lager:trace_console([{operation, repair}]). - -Reset the traces: - - lager:clear_all_traces(). - -Show all the PUTs against vnode 13: - - lager:trace_console([{operation, put}, {id, 13}]). - -Show all the keys being read between 500 and 600: - - lager:trace_console([{operation, get}, {key, '>', 500}, {key, '<', 600}]). - -Trace specific GET/PUT operstions: - - dbms:traced_get(Key). - dbms:traced_put(Key). - -Manually erase a vnode: - - dbms_dynamo:wipe(VnodeID). From 1c5e8d372629de27d6b65718402f190552b8b5a0 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Tue, 6 May 2014 16:46:07 +0900 Subject: [PATCH 68/70] Change env var BITCASK_PULSE -> USE_PULSE --- prototype/corfurl/Makefile | 4 ++-- prototype/corfurl/rebar.config.script | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/prototype/corfurl/Makefile b/prototype/corfurl/Makefile index 5a67094..c3bdbe5 100644 --- a/prototype/corfurl/Makefile +++ b/prototype/corfurl/Makefile @@ -22,5 +22,5 @@ eunit: $(REBAR_BIN) -v skip_deps=true eunit pulse: compile - env BITCASK_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile - env BITCASK_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit + env USE_PULSE=1 $(REBAR_BIN) skip_deps=true clean compile + env USE_PULSE=1 $(REBAR_BIN) skip_deps=true -D PULSE eunit diff --git a/prototype/corfurl/rebar.config.script b/prototype/corfurl/rebar.config.script index 0eb68b7..b625bd2 100644 --- a/prototype/corfurl/rebar.config.script +++ b/prototype/corfurl/rebar.config.script @@ -1,4 +1,4 @@ -PulseBuild = case os:getenv("BITCASK_PULSE") of +PulseBuild = case os:getenv("USE_PULSE") of false -> false; _ -> From 22f46c329d6bab74f365d1290832a4f22934dce8 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 1 Oct 2014 18:58:50 +0900 Subject: [PATCH 69/70] Add annoying & verbose TODO reminder for FILL implementation fixing! --- prototype/corfurl/src/corfurl.erl | 1 + prototype/corfurl/src/corfurl_flu.erl | 1 + 2 files changed, 2 insertions(+) diff --git a/prototype/corfurl/src/corfurl.erl b/prototype/corfurl/src/corfurl.erl index cfb8f82..660e419 100644 --- a/prototype/corfurl/src/corfurl.erl +++ b/prototype/corfurl/src/corfurl.erl @@ -261,6 +261,7 @@ trim_page(#proj{epoch=Epoch} = P, LPN) -> fill_or_trim_page([], _Epoch, _LPN, _Func) -> ok; fill_or_trim_page([H|T], Epoch, LPN, Func) -> + %% io:format(user, "~s.erl line ~w: TODO: this 'fill or trim' logic is probably stupid, due to mis-remembering the CORFU paper, sorry! Commenting out this warning line is OK, if you wish to proceed with testing Corfurl. This code can change a fill into a trim. Those things are supposed to be separate, silly me, a fill should never automagically change to a trim.\n", [?MODULE, ?LINE]), case corfurl_flu:Func(flu_pid(H), Epoch, LPN) of Res when Res == ok; Res == error_trimmed -> %% Detecting a race here between fills and trims is too crazy, diff --git a/prototype/corfurl/src/corfurl_flu.erl b/prototype/corfurl/src/corfurl_flu.erl index 759afca..c092761 100644 --- a/prototype/corfurl/src/corfurl_flu.erl +++ b/prototype/corfurl/src/corfurl_flu.erl @@ -213,6 +213,7 @@ handle_call({{fill, ClientEpoch, _LogicalPN}, LC1}, _From, {reply, {error_badepoch, LC2}, State}; handle_call({{fill, _ClientEpoch, LogicalPN}, LC1}, _From, State) -> LC2 = lclock_update(LC1), + io:format(user, "~s.erl line ~w: TODO: this 'fill or trim' logic is probably stupid, due to mis-remembering the CORFU paper, sorry! Commenting out this warning line is OK, if you wish to proceed with testing Corfurl. This code can change a fill into a trim. Those things are supposed to be separate, silly me, a fill should never automagically change to a trim.\n", [?MODULE, ?LINE]), {Reply, NewState} = do_trim_or_fill(fill, LogicalPN, State), ?EVENT_LOG({flu, fill, self(), LogicalPN, Reply}), {reply, {Reply, LC2}, NewState}; From 2bf28122c103c8c812f20ea90e9a4470c80f61a4 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 20 Nov 2014 15:56:01 +0900 Subject: [PATCH 70/70] Fix typos in docs/corfurl.md --- prototype/corfurl/docs/corfurl.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype/corfurl/docs/corfurl.md b/prototype/corfurl/docs/corfurl.md index 08960dc..8801454 100644 --- a/prototype/corfurl/docs/corfurl.md +++ b/prototype/corfurl/docs/corfurl.md @@ -12,7 +12,7 @@ the shared log underneath it. But the Hyder paper has lots of interesting bits about how you'd go about creating a distributed DB where the transaction log *is* the DB. -### "CORFU: A Distributed Shared LogCORFU: A Distributed Shared Log" +### "CORFU: A Distributed Shared Log" MAHESH BALAKRISHNAN, DAHLIA MALKHI, JOHN D. DAVIS, and VIJAYAN PRABHAKARAN, Microsoft Research Silicon Valley, MICHAEL WEI, @@ -79,7 +79,7 @@ parallel and make commit/abort decisions in parallel, based on what conflicts (or not) that they see in the log. Scott's first reading was "No way, wacky" ... and has since changed his mind. -http://www.snookles.com/scottmp/corfu/CIDR2011Proceedings.pdf +http://www.snookles.com/scottmp/corfu/CIDR11Proceedings.pdf pages 9-20