Fix PULSE model difficulty of how to handle races between write & trim.

This trim race is (as far as I can tell) fine -- I see no correctness
problem with CORFU, on the client side or the server side.  However,
this race with a trim causes a model problem that I believe can be
solved this way:

1. We must keep track of the fact that the page write is happening:
someone can notice the write via read-repair or even a regular read by
the tail.  We do this in basically the way that all other writes
are handled in the ValuesR relation.

2. Add new code to client-side writer: if there's a trim race, *and*
if we're using PULSE, then return a special error code that says that
the write was ok *and* that we raced with trim.

2b. If we aren't using pulse, just return {ok, LPN}.

3. For the transition check property, treat the new return code as if
it is a w_tt.  Actually, we use a special marker atom, w_special_trimmed
for that purpose, but it is later treated the same way that w_tt is by the
filter_transition_trimfill_suffixes() filter.
This commit is contained in:
Scott Lystig Fritchie 2014-02-24 21:34:09 +09:00
parent 13e15e0ecf
commit eabebac6f2
4 changed files with 64 additions and 25 deletions

View file

@ -52,6 +52,8 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
X when X == error_overwritten; X == error_trimmed ->
report_lost_race(LPN, X),
append_page(Sequencer, P, Page);
{special_trimmed, LPN}=XX ->
XX;
Else ->
exit({todo, ?MODULE, line, ?LINE, Else})
end;
@ -62,14 +64,14 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
write_single_page(#proj{epoch=Epoch} = P, LPN, Page) ->
Chain = project_to_chain(LPN, P),
write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1).
write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1, ok).
write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth) ->
ok;
write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) ->
Reply;
write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) ->
case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of
ok ->
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
error_badepoch ->
%% TODO: Interesting case: there may be cases where retrying with
%% a new epoch & that epoch's projection is just fine (and
@ -77,11 +79,19 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
%% Figure out what those cases are, then for the
%% destined-to-fail case, try to clean up (via trim?)?
error_badepoch;
error_trimmed ->
error_trimmed when Nth == 1 ->
%% Whoa, partner, you're movin' kinda fast for a trim.
%% This might've been due to us being too slow and someone
%% else junked us.
error_trimmed;
error_trimmed when Nth > 1 ->
%% We're racing with a trimmer. We won the race at head,
%% but here in the middle or tail (Nth > 1), we lost.
%% Our strategy is keep racing down to the tail.
%% If we continue to lose the exact same race for the rest
%% of the chain, the 1st clause of this func will return 'ok'.
%% That is *exactly* our intent and purpose!
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
error_overwritten when Nth == 1 ->
%% The sequencer lied, or we didn't use the sequencer and
%% guessed and guessed poorly, or someone is accidentally
@ -96,21 +106,16 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
{ok, AlreadyThere} when AlreadyThere =:= Page ->
%% Alright, well, let's go continue the repair/writing,
%% since we agree on the page's value.
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
error_badepoch ->
%% TODO: same TODO as the above error_badepoch case.
error_badepoch;
error_trimmed ->
%% PULSE can drive us to this case, excellent!
%% We had a race with read-repair and lost (the write).
%% Now this read failed with error_trimmed because we
%% lost a race with someone trimming this block.
%% Let's be paranoid and repair, just in case.
OurResult = error_trimmed,
error_trimmed = read_repair_chain(Epoch, LPN, Chain),
OurResult;
%% This is the same as 'error_trimmed when Nth > 1' above.
%% Do the same thing.
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
Else ->
%% Guess what?? PULSE can drive us to this case, excellent!
%% Can PULSE can drive us to this case?
giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth})
end
end.

View file

@ -48,8 +48,7 @@
%%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
-define(EVENT_LOG(X), ok).
%% -define(EVENT_LOG(X), erlang:display(X)).
%%% -define(EVENT_LOG(X), event_logger(X)).
%% -define(EVENT_LOG(X), event_logger:event(X)).
-record(state, {
dir :: string(),
@ -170,6 +169,7 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From,
?EVENT_LOG({flu, write, self(), LogicalPN, ok}),
{reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}};
Else ->
?EVENT_LOG({flu, write, self(), LogicalPN, Else}),
{reply, {Else, LC2}, State}
end;
@ -416,6 +416,18 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) ->
badarg
end.
-ifdef(PULSE).
%% We do *not* want to remove any special PULSE return code.
undo_special_pulse_test_result(Res) ->
Res.
-else. % PULSE
undo_special_pulse_test_result({special_trimmed, LPN}) ->
{ok, LPN};
undo_special_pulse_test_result(Res) ->
Res.
-endif. % PULSE
-ifdef(PULSE_HACKING).
%% Create a trace file that can be formatted by "mscgen" utility.
%% Lots of hand-editing is required after creating the file, sorry!

View file

@ -156,6 +156,9 @@ postcondition(_S, {call, _, setup, _}, #run{} = _V) ->
true;
postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) ->
true;
postcondition(_S, {call, _, append, _}, {special_trimmed, LPN})
when is_integer(LPN) ->
true;
postcondition(_S, {call, _, append, _}, V) ->
eqeq(V, todoTODO_fixit);
postcondition(_S, {call, _, read_approx, _}, V) ->
@ -320,6 +323,7 @@ check_trace(Trace0, _Cmds, _Seed) ->
AllLPNsR = eqc_temporal:stateful(
fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN;
({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN;
({call, _Pid, {read, LPN, _, _}}) -> LPN;
({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN;
({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN
@ -341,6 +345,14 @@ check_trace(Trace0, _Cmds, _Seed) ->
Mods = eqc_temporal:stateful(
fun({call, Pid, {append, Pg, will_be, LPN}}) ->
{mod_working, w_1, LPN, Pg, Pid};
({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) ->
%% This is a special case for the model. We know that
%% a write raced with a trim and lost (at least some of
%% the time inside the chain). But the transition that
%% we model in this case is a special w_ type that is
%% is trated specially by the dictionary-making
%% creation of the ValuesR relation.
{mod_working, w_special_trimmed, LPN, Pg, Pid};
({call, Pid, {fill, LPN, will_be, ok}}) ->
{mod_working, w_ft, LPN, fill, Pid};
({call, Pid, {trim, LPN, will_be, ok}}) ->
@ -396,13 +408,17 @@ check_trace(Trace0, _Cmds, _Seed) ->
D;
false ->
orddict:append(LPN, error_trimmed,D)
end
end;
({mod_start, w_special_trimmed, LPN, Pg}, D)->
orddict:append(LPN, Pg, D)
end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]),
Dict3 = lists:foldl(
fun({mod_end, w_1, LPN, Pg}, D) ->
orddict:store(LPN, [Pg], D);
({mod_end, WType, LPN, _Pg}, D)
when WType == w_ft; WType == w_tt ->
orddict:store(LPN, [error_trimmed], D);
({mod_end, w_special_trimmed, LPN, _Pg}, D) ->
orddict:store(LPN, [error_trimmed], D)
end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]),
{{TS1, TS2, [{values, Dict3}]}, Dict3}
@ -635,14 +651,19 @@ filter_transition_trimfill_suffixes(Ttns) ->
filter_1_transition_list([]) ->
[];
filter_1_transition_list(Old) ->
New = lists:reverse(lists:dropwhile(fun(w_tt) -> true;
(w_ft) -> true;
%% Strategy: Chop off all of the w_* at the end, then look at **Old** to
%% see if we chopped off any. If we did chop off any, then add back a
%% constant 'w_t+' as a suffix.
New = lists:reverse(lists:dropwhile(fun(w_tt) -> true;
(w_ft) -> true;
(w_special_trimmed) -> true;
(_) -> false
end, lists:reverse(Old))),
Suffix = case lists:last(Old) of
w_ft -> ['w_t+'];
w_tt -> ['w_t+'];
_ -> []
w_ft -> ['w_t+'];
w_tt -> ['w_t+'];
w_special_trimmed -> ['w_t+'];
_ -> []
end,
New ++ Suffix.

View file

@ -33,7 +33,8 @@
setup_flu_basedir() ->
"/tmp/" ++ atom_to_list(?MODULE) ++ ".".
"./tmp." ++
atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".".
setup_flu_dir(N) ->
setup_flu_basedir() ++ integer_to_list(N).