From 13e15e0ecf8ae0e5e19ab98aca22c1f5f52dee09 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Mon, 24 Feb 2014 18:24:07 +0900 Subject: [PATCH] Add MSC charts to help explain BAD-looking trim race --- .../corfurl/docs/corfurl/notes/README.md | 23 +++++++ .../corfurl/notes/read-repair-race.1.mscgen | 49 +++++++++++++++ .../corfurl/notes/read-repair-race.2.mscgen | 60 +++++++++++++++++++ .../corfurl/notes/read-repair-race.2b.mscgen | 57 ++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 prototype/corfurl/docs/corfurl/notes/README.md create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen create mode 100644 prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen diff --git a/prototype/corfurl/docs/corfurl/notes/README.md b/prototype/corfurl/docs/corfurl/notes/README.md new file mode 100644 index 0000000..337a34b --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/README.md @@ -0,0 +1,23 @@ + +## read-repair-race.1. + +First attempt at using "mscgen" to make some Message Sequence +Chart (MSC) for a race found at commit 087c2605ab. + + +## read-repair-race.2. + +Second attempt. This is almost exactly the trace that is +generated by this failing test case at commit 087c2605ab: + + C2 = [{1,2,1},{{[{set,{var,1},{call,corfurl_pulse,setup,[1,2,1,standard]}}],[[{set,{var,3},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,2},{call,corfurl_pulse,read_approx,[{var,1},6201864198]}},{set,{var,5},{call,corfurl_pulse,append,[{var,1},<<0>>]}}],[{set,{var,4},{call,corfurl_pulse,append,[{var,1},<<0>>]}},{set,{var,6},{call,corfurl_pulse,trim,[{var,1},510442857]}}]]},{25152,1387,78241}},[{events,[[{no_bad_reads,[]}]]}]]. + eqc:check(corfurl_pulse:prop_pulse(), C2). + +## read-repair-race.2b.* + +Same basic condition as read-repair-race.2, but edited +substantially to make it clearer what is happening. +Also for commit 087c2605ab. + +I believe that I have a fix for the silver-colored +`error-overwritten`, but the correctness of it remains to be seen. diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen new file mode 100644 index 0000000..1cbec57 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.1.mscgen @@ -0,0 +1,49 @@ +msc { + "<0.12583.0>" [label="Client1"], "<0.12574.0>" [label="FLU1"], "<0.12575.0>" [label="FLU2"], "<0.12576.0>" [label="FLU3"], "<0.12584.0>" [label="Client2"], "<0.12585.0>" [label="Client3"]; + + "<0.12585.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "error_unwritten" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair starts", textbgcolour="yellow"]; + "<0.12585.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "ok" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12585.0>" [ label = "{ok,<<0>>}" ,textcolour="red"] ; + "<0.12585.0>" -> "<0.12575.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{read,1,1}" ] ; + "<0.12576.0>" -> "<0.12585.0>" [ label = "ok" ] ; + "<0.12585.0>" abox "<0.12585.0>" [ label="Read Repair SUCCESS", textbgcolour="green"]; + "<0.12585.0>" abox "<0.12585.0>" [ label="Our problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.12584.0>" abox "<0.12584.0>" [ label = "Client2 decides to trim LPN 1", textbgcolour="orange" ] ; + "<0.12584.0>" -> "<0.12574.0>" [ label = "{trim,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "{ok,<<0>>}"] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value matches, yay!", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Continue writing", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.12574.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12575.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_overwritten" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Race with read repair? Read to double-check", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{read,1,1}" ] ; + "<0.12575.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12584.0>" -> "<0.12576.0>" [ label = "{trim,1,1}" ] ; + "<0.12576.0>" -> "<0.12584.0>" [ label = "ok" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Value MISMATCH!", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Read repair", textbgcolour="yellow" ] ; + "<0.12583.0>" -> "<0.12574.0>" [ label = "{read,1,1}" ] ; + "<0.12574.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12575.0>" [ label = "{fill,1,1}" ] ; + "<0.12575.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" -> "<0.12576.0>" [ label = "{fill,1,1}" ] ; + "<0.12576.0>" -> "<0.12583.0>" [ label = "error_trimmed" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "At this point, we give up on LPN 1.", textcolour="red" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 2", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "LPN 2 has been filled (not shown).", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "Sequencer gives us LPN 3", textbgcolour="yellow" ] ; + "<0.12583.0>" abox "<0.12583.0>" [ label = "We write LPN 3 successfully", textbgcolour="green" ] ; +} diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen new file mode 100644 index 0000000..9afffe2 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2.mscgen @@ -0,0 +1,60 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1"], "<0.32552.4>" [label="FLU2"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 3 is written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; + "<0.32555.4>" abox "<0.32557.4>" [ label="", textcolour="red"]; +} diff --git a/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen new file mode 100644 index 0000000..978dc72 --- /dev/null +++ b/prototype/corfurl/docs/corfurl/notes/read-repair-race.2b.mscgen @@ -0,0 +1,57 @@ +msc { + "<0.32555.4>" [label="Client1"], "<0.32551.4>" [label="FLU1=Head"], "<0.32552.4>" [label="FLU2=Tail"], "<0.32556.4>" [label="Client2"], "<0.32557.4>" [label="Client3"]; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Writer", textbgcolour="orange"], + "<0.32556.4>" abox "<0.32556.4>" [ label = "Reader", textbgcolour="orange"], + "<0.32557.4>" abox "<0.32557.4>" [ label = "Trimmer", textbgcolour="orange"]; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page()", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 1", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "error_unwritten" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32556.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "ok" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "See red stuff at bottom....", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "But PULSE thinks that LPN 1 was never written.", textcolour="red"] ; +# "<0.32556.4>" abox "<0.32556.4>" [ label = "Fixing this requires ... lots of pondering...", textcolour="red"] ; + "<0.32557.4>" -> "<0.32551.4>" [ label = "{trim,1,1}" ] ; + "<0.32551.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32557.4>" -> "<0.32552.4>" [ label = "{trim,1,1}" ] ; + "<0.32552.4>" -> "<0.32557.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,1,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_overwritten", textbgcolour="silver" ] ; + + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 is interrupted", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Check if an eager read-repair has written our data for us.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{read,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Wow, an eager trimmer got us, ouch.", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Start read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Read repair here is for sanity checking, not really necessary.", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{read,1,1}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{fill,1,1}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "error_trimmed" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "End read repair", textbgcolour="aqua"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Our attempt to write LPN 1 has failed. Must ask sequencer for a new LPN.", textbgcolour="yellow"] ; + "<0.32551.4>" abox "<0.32552.4>" [ label = "LPN 2 and 3 are written (race details omitted)", textbgcolour="orange"] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "Sequencer assigns LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" -> "<0.32551.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32551.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" -> "<0.32552.4>" [ label = "{write,1,4,<<0>>}" ] ; + "<0.32552.4>" -> "<0.32555.4>" [ label = "ok" ] ; + "<0.32555.4>" abox "<0.32555.4>" [ label = "append_page() -> LPN 4", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Small problem: the PULSE model never believes that append_page ever wrote LPN 1", textcolour="red"]; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 1)", textbgcolour="yellow"] ; + "<0.32556.4>" -> "<0.32552.4>" [ label = "{read,1,4}" ] ; + "<0.32552.4>" -> "<0.32556.4>" [ label = "{ok,<<0>>}" ] ; + "<0.32556.4>" abox "<0.32556.4>" [ label = "read_page(LPN 4) -> {ok, <<0>>}", textbgcolour="yellow"] ; + "<0.32555.4>" abox "<0.32557.4>" [ label="Big problem: Client2 has witnessed the same page written at LPN 1 and at LPN 4.", textcolour="red"]; +}