Fix PULSE model difficulty of how to handle races between write & trim.

This trim race is (as far as I can tell) fine -- I see no correctness problem with CORFU, on the client side or the server side. However, this race with a trim causes a model problem that I believe can be solved this way: 1. We must keep track of the fact that the page write is happening: someone can notice the write via read-repair or even a regular read by the tail. We do this in basically the way that all other writes are handled in the ValuesR relation. 2. Add new code to client-side writer: if there's a trim race, *and* if we're using PULSE, then return a special error code that says that the write was ok *and* that we raced with trim. 2b. If we aren't using pulse, just return {ok, LPN}. 3. For the transition check property, treat the new return code as if it is a w_tt. Actually, we use a special marker atom, w_special_trimmed for that purpose, but it is later treated the same way that w_tt is by the filter_transition_trimfill_suffixes() filter.
2014-02-24 21:34:09 +09:00 · 2014-02-24 21:34:09 +09:00 · eabebac6f2
commit eabebac6f2
parent 13e15e0ecf
4 changed files with 64 additions and 25 deletions
--- a/prototype/corfurl/src/corfurl.erl
+++ b/prototype/corfurl/src/corfurl.erl
@ -52,6 +52,8 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
                X when X == error_overwritten; X == error_trimmed ->
                    report_lost_race(LPN, X),
                    append_page(Sequencer, P, Page);
                {special_trimmed, LPN}=XX ->
                    XX;
                Else ->
                    exit({todo, ?MODULE, line, ?LINE, Else})
            end;
@ -62,14 +64,14 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
 write_single_page(#proj{epoch=Epoch} = P, LPN, Page) ->
    Chain = project_to_chain(LPN, P),
-    write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1).
+    write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1, ok).
-write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth) ->
+write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) ->
-    ok;
+    Reply;
-write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
+write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) ->
    case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of
        ok ->
-            write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
+            write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
        error_badepoch ->
            %% TODO: Interesting case: there may be cases where retrying with
            %%       a new epoch & that epoch's projection is just fine (and
@ -77,11 +79,19 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
            %%       Figure out what those cases are, then for the
            %%       destined-to-fail case, try to clean up (via trim?)?
            error_badepoch;
-        error_trimmed ->
+        error_trimmed when Nth == 1 ->
            %% Whoa, partner, you're movin' kinda fast for a trim.
            %% This might've been due to us being too slow and someone
            %% else junked us.
            error_trimmed;
        error_trimmed when Nth > 1 ->
            %% We're racing with a trimmer.  We won the race at head,
            %% but here in the middle or tail (Nth > 1), we lost.
            %% Our strategy is keep racing down to the tail.
            %% If we continue to lose the exact same race for the rest
            %% of the chain, the 1st clause of this func will return 'ok'.
            %% That is *exactly* our intent and purpose!
            write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
        error_overwritten when Nth == 1 ->
            %% The sequencer lied, or we didn't use the sequencer and
            %% guessed and guessed poorly, or someone is accidentally
@ -96,21 +106,16 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
                {ok, AlreadyThere} when AlreadyThere =:= Page ->
                    %% Alright, well, let's go continue the repair/writing,
                    %% since we agree on the page's value.
-                    write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
+                    write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
                error_badepoch ->
                    %% TODO: same TODO as the above error_badepoch case.
                    error_badepoch;
                error_trimmed ->
-                    %% PULSE can drive us to this case, excellent!
+                    %% This is the same as 'error_trimmed when Nth > 1' above.
-                    %% We had a race with read-repair and lost (the write).
+                    %% Do the same thing.
-                    %% Now this read failed with error_trimmed because we
+                    write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
                    %% lost a race with someone trimming this block.
                    %% Let's be paranoid and repair, just in case.
                    OurResult = error_trimmed,
                    error_trimmed = read_repair_chain(Epoch, LPN, Chain),
                    OurResult;
                Else ->
-                    %% Guess what?? PULSE can drive us to this case, excellent!
+                    %% Can PULSE can drive us to this case?
                    giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth})
            end
    end.
--- a/prototype/corfurl/src/corfurl_flu.erl
+++ b/prototype/corfurl/src/corfurl_flu.erl
@ -48,8 +48,7 @@
 %%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
 -define(EVENT_LOG(X), ok).
-%% -define(EVENT_LOG(X), erlang:display(X)).
+%% -define(EVENT_LOG(X), event_logger:event(X)).
 %%% -define(EVENT_LOG(X), event_logger(X)).
 -record(state, {
          dir :: string(),
@ -170,6 +169,7 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From,
            ?EVENT_LOG({flu, write, self(), LogicalPN, ok}),
            {reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}};
        Else ->
            ?EVENT_LOG({flu, write, self(), LogicalPN, Else}),
            {reply, {Else, LC2}, State}
    end;
@ -416,6 +416,18 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) ->
            badarg
    end.
 -ifdef(PULSE).
 %% We do *not* want to remove any special PULSE return code.
 undo_special_pulse_test_result(Res) ->
    Res.
 -else.  % PULSE
 undo_special_pulse_test_result({special_trimmed, LPN}) ->
    {ok, LPN};
 undo_special_pulse_test_result(Res) ->
    Res.
 -endif. % PULSE
 -ifdef(PULSE_HACKING).
 %% Create a trace file that can be formatted by "mscgen" utility.
 %% Lots of hand-editing is required after creating the file, sorry!
--- a/prototype/corfurl/test/corfurl_pulse.erl
+++ b/prototype/corfurl/test/corfurl_pulse.erl
@ -156,6 +156,9 @@ postcondition(_S, {call, _, setup, _}, #run{} = _V) ->
    true;
 postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) ->
    true;
 postcondition(_S, {call, _, append, _}, {special_trimmed, LPN})
  when is_integer(LPN) ->
    true;
 postcondition(_S, {call, _, append, _}, V) ->
    eqeq(V, todoTODO_fixit);
 postcondition(_S, {call, _, read_approx, _}, V) ->
@ -320,6 +323,7 @@ check_trace(Trace0, _Cmds, _Seed) ->
    AllLPNsR = eqc_temporal:stateful(
                fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN;
                   ({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN;
                   ({call, _Pid, {read, LPN, _, _}}) -> LPN;
                   ({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN;
                   ({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN
@ -341,6 +345,14 @@ check_trace(Trace0, _Cmds, _Seed) ->
    Mods = eqc_temporal:stateful(
               fun({call, Pid, {append, Pg, will_be, LPN}}) ->
                       {mod_working, w_1, LPN, Pg, Pid};
                  ({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) ->
                       %% This is a special case for the model.  We know that
                       %% a write raced with a trim and lost (at least some of
                       %% the time inside the chain).  But the transition that
                       %% we model in this case is a special w_ type that is
                       %% is trated specially by the dictionary-making
                       %% creation of the ValuesR relation.
                       {mod_working, w_special_trimmed, LPN, Pg, Pid};
                  ({call, Pid, {fill, LPN, will_be, ok}}) ->
                       {mod_working, w_ft, LPN, fill, Pid};
                  ({call, Pid, {trim, LPN, will_be, ok}}) ->
@ -396,13 +408,17 @@ check_trace(Trace0, _Cmds, _Seed) ->
                                            D;
                                        false ->
                                            orddict:append(LPN, error_trimmed,D)
-                                    end
+                                    end;
                               ({mod_start, w_special_trimmed, LPN, Pg}, D)->
                                       orddict:append(LPN, Pg, D)
                            end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]),
                  Dict3 = lists:foldl(
                            fun({mod_end, w_1, LPN, Pg}, D) ->
                                    orddict:store(LPN, [Pg], D);
                               ({mod_end, WType, LPN, _Pg}, D)
                                  when WType == w_ft; WType == w_tt ->
                                    orddict:store(LPN, [error_trimmed], D);
                               ({mod_end, w_special_trimmed, LPN, _Pg}, D) ->
                                    orddict:store(LPN, [error_trimmed], D)
                            end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]),
                  {{TS1, TS2, [{values, Dict3}]}, Dict3}
@ -635,13 +651,18 @@ filter_transition_trimfill_suffixes(Ttns) ->
 filter_1_transition_list([]) ->
    [];
 filter_1_transition_list(Old) ->
    %% Strategy: Chop off all of the w_* at the end, then look at **Old** to
    %% see if we chopped off any.  If we did chop off any, then add back a
    %% constant 'w_t+' as a suffix.
    New = lists:reverse(lists:dropwhile(fun(w_tt)              -> true;
                                           (w_ft)              -> true;
                                           (w_special_trimmed) -> true;
                                           (_)    -> false
                                        end, lists:reverse(Old))),
    Suffix = case lists:last(Old) of
                 w_ft ->              ['w_t+'];
                 w_tt ->              ['w_t+'];
                 w_special_trimmed -> ['w_t+'];
                 _ ->                 []
             end,
    New ++ Suffix.
--- a/prototype/corfurl/test/corfurl_test.erl
+++ b/prototype/corfurl/test/corfurl_test.erl
@ -33,7 +33,8 @@
 setup_flu_basedir() ->
-    "/tmp/" ++ atom_to_list(?MODULE) ++ ".".    
+    "./tmp." ++
        atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".".
 setup_flu_dir(N) ->
    setup_flu_basedir() ++ integer_to_list(N).