Fix PULSE model difficulty of how to handle races between write & trim.
This trim race is (as far as I can tell) fine -- I see no correctness problem with CORFU, on the client side or the server side. However, this race with a trim causes a model problem that I believe can be solved this way: 1. We must keep track of the fact that the page write is happening: someone can notice the write via read-repair or even a regular read by the tail. We do this in basically the way that all other writes are handled in the ValuesR relation. 2. Add new code to client-side writer: if there's a trim race, *and* if we're using PULSE, then return a special error code that says that the write was ok *and* that we raced with trim. 2b. If we aren't using pulse, just return {ok, LPN}. 3. For the transition check property, treat the new return code as if it is a w_tt. Actually, we use a special marker atom, w_special_trimmed for that purpose, but it is later treated the same way that w_tt is by the filter_transition_trimfill_suffixes() filter.
This commit is contained in:
parent
13e15e0ecf
commit
eabebac6f2
4 changed files with 64 additions and 25 deletions
|
@ -52,6 +52,8 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
|
||||||
X when X == error_overwritten; X == error_trimmed ->
|
X when X == error_overwritten; X == error_trimmed ->
|
||||||
report_lost_race(LPN, X),
|
report_lost_race(LPN, X),
|
||||||
append_page(Sequencer, P, Page);
|
append_page(Sequencer, P, Page);
|
||||||
|
{special_trimmed, LPN}=XX ->
|
||||||
|
XX;
|
||||||
Else ->
|
Else ->
|
||||||
exit({todo, ?MODULE, line, ?LINE, Else})
|
exit({todo, ?MODULE, line, ?LINE, Else})
|
||||||
end;
|
end;
|
||||||
|
@ -62,14 +64,14 @@ append_page(Sequencer, P, Page, Retries) when Retries < 50 ->
|
||||||
|
|
||||||
write_single_page(#proj{epoch=Epoch} = P, LPN, Page) ->
|
write_single_page(#proj{epoch=Epoch} = P, LPN, Page) ->
|
||||||
Chain = project_to_chain(LPN, P),
|
Chain = project_to_chain(LPN, P),
|
||||||
write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1).
|
write_single_page_to_chain(Chain, Chain, Epoch, LPN, Page, 1, ok).
|
||||||
|
|
||||||
write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth) ->
|
write_single_page_to_chain([], _Chain, _Epoch, _LPN, _Page, _Nth, Reply) ->
|
||||||
ok;
|
Reply;
|
||||||
write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
|
write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth, Reply) ->
|
||||||
case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of
|
case corfurl_flu:write(flu_pid(FLU), Epoch, LPN, Page) of
|
||||||
ok ->
|
ok ->
|
||||||
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
|
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
|
||||||
error_badepoch ->
|
error_badepoch ->
|
||||||
%% TODO: Interesting case: there may be cases where retrying with
|
%% TODO: Interesting case: there may be cases where retrying with
|
||||||
%% a new epoch & that epoch's projection is just fine (and
|
%% a new epoch & that epoch's projection is just fine (and
|
||||||
|
@ -77,11 +79,19 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
|
||||||
%% Figure out what those cases are, then for the
|
%% Figure out what those cases are, then for the
|
||||||
%% destined-to-fail case, try to clean up (via trim?)?
|
%% destined-to-fail case, try to clean up (via trim?)?
|
||||||
error_badepoch;
|
error_badepoch;
|
||||||
error_trimmed ->
|
error_trimmed when Nth == 1 ->
|
||||||
%% Whoa, partner, you're movin' kinda fast for a trim.
|
%% Whoa, partner, you're movin' kinda fast for a trim.
|
||||||
%% This might've been due to us being too slow and someone
|
%% This might've been due to us being too slow and someone
|
||||||
%% else junked us.
|
%% else junked us.
|
||||||
error_trimmed;
|
error_trimmed;
|
||||||
|
error_trimmed when Nth > 1 ->
|
||||||
|
%% We're racing with a trimmer. We won the race at head,
|
||||||
|
%% but here in the middle or tail (Nth > 1), we lost.
|
||||||
|
%% Our strategy is keep racing down to the tail.
|
||||||
|
%% If we continue to lose the exact same race for the rest
|
||||||
|
%% of the chain, the 1st clause of this func will return 'ok'.
|
||||||
|
%% That is *exactly* our intent and purpose!
|
||||||
|
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
|
||||||
error_overwritten when Nth == 1 ->
|
error_overwritten when Nth == 1 ->
|
||||||
%% The sequencer lied, or we didn't use the sequencer and
|
%% The sequencer lied, or we didn't use the sequencer and
|
||||||
%% guessed and guessed poorly, or someone is accidentally
|
%% guessed and guessed poorly, or someone is accidentally
|
||||||
|
@ -96,21 +106,16 @@ write_single_page_to_chain([FLU|Rest], Chain, Epoch, LPN, Page, Nth) ->
|
||||||
{ok, AlreadyThere} when AlreadyThere =:= Page ->
|
{ok, AlreadyThere} when AlreadyThere =:= Page ->
|
||||||
%% Alright, well, let's go continue the repair/writing,
|
%% Alright, well, let's go continue the repair/writing,
|
||||||
%% since we agree on the page's value.
|
%% since we agree on the page's value.
|
||||||
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1);
|
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, Reply);
|
||||||
error_badepoch ->
|
error_badepoch ->
|
||||||
%% TODO: same TODO as the above error_badepoch case.
|
%% TODO: same TODO as the above error_badepoch case.
|
||||||
error_badepoch;
|
error_badepoch;
|
||||||
error_trimmed ->
|
error_trimmed ->
|
||||||
%% PULSE can drive us to this case, excellent!
|
%% This is the same as 'error_trimmed when Nth > 1' above.
|
||||||
%% We had a race with read-repair and lost (the write).
|
%% Do the same thing.
|
||||||
%% Now this read failed with error_trimmed because we
|
write_single_page_to_chain(Rest, Chain, Epoch, LPN, Page, Nth+1, {special_trimmed, LPN});
|
||||||
%% lost a race with someone trimming this block.
|
|
||||||
%% Let's be paranoid and repair, just in case.
|
|
||||||
OurResult = error_trimmed,
|
|
||||||
error_trimmed = read_repair_chain(Epoch, LPN, Chain),
|
|
||||||
OurResult;
|
|
||||||
Else ->
|
Else ->
|
||||||
%% Guess what?? PULSE can drive us to this case, excellent!
|
%% Can PULSE can drive us to this case?
|
||||||
giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth})
|
giant_error({left_off_here, ?MODULE, ?LINE, Else, nth, Nth})
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
|
@ -48,8 +48,7 @@
|
||||||
|
|
||||||
%%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
|
%%% Debugging: for extra events in the PULSE event log, use the 2nd statement.
|
||||||
-define(EVENT_LOG(X), ok).
|
-define(EVENT_LOG(X), ok).
|
||||||
%% -define(EVENT_LOG(X), erlang:display(X)).
|
%% -define(EVENT_LOG(X), event_logger:event(X)).
|
||||||
%%% -define(EVENT_LOG(X), event_logger(X)).
|
|
||||||
|
|
||||||
-record(state, {
|
-record(state, {
|
||||||
dir :: string(),
|
dir :: string(),
|
||||||
|
@ -170,6 +169,7 @@ handle_call({{write, _ClientEpoch, LogicalPN, PageBin}, LC1}, _From,
|
||||||
?EVENT_LOG({flu, write, self(), LogicalPN, ok}),
|
?EVENT_LOG({flu, write, self(), LogicalPN, ok}),
|
||||||
{reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}};
|
{reply, {ok, LC2}, State#state{max_logical_page=NewMLPN}};
|
||||||
Else ->
|
Else ->
|
||||||
|
?EVENT_LOG({flu, write, self(), LogicalPN, Else}),
|
||||||
{reply, {Else, LC2}, State}
|
{reply, {Else, LC2}, State}
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
@ -416,6 +416,18 @@ trim_page(Op, LogicalPN, #state{max_mem=MaxMem, mem_fh=FH} = S) ->
|
||||||
badarg
|
badarg
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-ifdef(PULSE).
|
||||||
|
%% We do *not* want to remove any special PULSE return code.
|
||||||
|
undo_special_pulse_test_result(Res) ->
|
||||||
|
Res.
|
||||||
|
-else. % PULSE
|
||||||
|
undo_special_pulse_test_result({special_trimmed, LPN}) ->
|
||||||
|
{ok, LPN};
|
||||||
|
undo_special_pulse_test_result(Res) ->
|
||||||
|
Res.
|
||||||
|
-endif. % PULSE
|
||||||
|
|
||||||
|
|
||||||
-ifdef(PULSE_HACKING).
|
-ifdef(PULSE_HACKING).
|
||||||
%% Create a trace file that can be formatted by "mscgen" utility.
|
%% Create a trace file that can be formatted by "mscgen" utility.
|
||||||
%% Lots of hand-editing is required after creating the file, sorry!
|
%% Lots of hand-editing is required after creating the file, sorry!
|
||||||
|
|
|
@ -156,6 +156,9 @@ postcondition(_S, {call, _, setup, _}, #run{} = _V) ->
|
||||||
true;
|
true;
|
||||||
postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) ->
|
postcondition(_S, {call, _, append, _}, {ok, LPN}) when is_integer(LPN) ->
|
||||||
true;
|
true;
|
||||||
|
postcondition(_S, {call, _, append, _}, {special_trimmed, LPN})
|
||||||
|
when is_integer(LPN) ->
|
||||||
|
true;
|
||||||
postcondition(_S, {call, _, append, _}, V) ->
|
postcondition(_S, {call, _, append, _}, V) ->
|
||||||
eqeq(V, todoTODO_fixit);
|
eqeq(V, todoTODO_fixit);
|
||||||
postcondition(_S, {call, _, read_approx, _}, V) ->
|
postcondition(_S, {call, _, read_approx, _}, V) ->
|
||||||
|
@ -320,6 +323,7 @@ check_trace(Trace0, _Cmds, _Seed) ->
|
||||||
|
|
||||||
AllLPNsR = eqc_temporal:stateful(
|
AllLPNsR = eqc_temporal:stateful(
|
||||||
fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN;
|
fun({call, _Pid, {append, _Pg, will_be, LPN}}) -> LPN;
|
||||||
|
({call, _Pid, {append, _Pg, will_fail, {special_trimmed, LPN}}}) -> LPN;
|
||||||
({call, _Pid, {read, LPN, _, _}}) -> LPN;
|
({call, _Pid, {read, LPN, _, _}}) -> LPN;
|
||||||
({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN;
|
({call, _Pid, {fill, LPN, will_be, ok}}) -> LPN;
|
||||||
({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN
|
({call, _Pid, {trim, LPN, will_be, ok}}) -> LPN
|
||||||
|
@ -341,6 +345,14 @@ check_trace(Trace0, _Cmds, _Seed) ->
|
||||||
Mods = eqc_temporal:stateful(
|
Mods = eqc_temporal:stateful(
|
||||||
fun({call, Pid, {append, Pg, will_be, LPN}}) ->
|
fun({call, Pid, {append, Pg, will_be, LPN}}) ->
|
||||||
{mod_working, w_1, LPN, Pg, Pid};
|
{mod_working, w_1, LPN, Pg, Pid};
|
||||||
|
({call, Pid, {append, Pg, will_fail, {special_trimmed, LPN}}}) ->
|
||||||
|
%% This is a special case for the model. We know that
|
||||||
|
%% a write raced with a trim and lost (at least some of
|
||||||
|
%% the time inside the chain). But the transition that
|
||||||
|
%% we model in this case is a special w_ type that is
|
||||||
|
%% is trated specially by the dictionary-making
|
||||||
|
%% creation of the ValuesR relation.
|
||||||
|
{mod_working, w_special_trimmed, LPN, Pg, Pid};
|
||||||
({call, Pid, {fill, LPN, will_be, ok}}) ->
|
({call, Pid, {fill, LPN, will_be, ok}}) ->
|
||||||
{mod_working, w_ft, LPN, fill, Pid};
|
{mod_working, w_ft, LPN, fill, Pid};
|
||||||
({call, Pid, {trim, LPN, will_be, ok}}) ->
|
({call, Pid, {trim, LPN, will_be, ok}}) ->
|
||||||
|
@ -396,13 +408,17 @@ check_trace(Trace0, _Cmds, _Seed) ->
|
||||||
D;
|
D;
|
||||||
false ->
|
false ->
|
||||||
orddict:append(LPN, error_trimmed,D)
|
orddict:append(LPN, error_trimmed,D)
|
||||||
end
|
end;
|
||||||
|
({mod_start, w_special_trimmed, LPN, Pg}, D)->
|
||||||
|
orddict:append(LPN, Pg, D)
|
||||||
end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]),
|
end, Dict1, [X || X={mod_start,_,_,_} <- StEnds]),
|
||||||
Dict3 = lists:foldl(
|
Dict3 = lists:foldl(
|
||||||
fun({mod_end, w_1, LPN, Pg}, D) ->
|
fun({mod_end, w_1, LPN, Pg}, D) ->
|
||||||
orddict:store(LPN, [Pg], D);
|
orddict:store(LPN, [Pg], D);
|
||||||
({mod_end, WType, LPN, _Pg}, D)
|
({mod_end, WType, LPN, _Pg}, D)
|
||||||
when WType == w_ft; WType == w_tt ->
|
when WType == w_ft; WType == w_tt ->
|
||||||
|
orddict:store(LPN, [error_trimmed], D);
|
||||||
|
({mod_end, w_special_trimmed, LPN, _Pg}, D) ->
|
||||||
orddict:store(LPN, [error_trimmed], D)
|
orddict:store(LPN, [error_trimmed], D)
|
||||||
end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]),
|
end, Dict2, [X || X={mod_end,_,_,_} <- StEnds]),
|
||||||
{{TS1, TS2, [{values, Dict3}]}, Dict3}
|
{{TS1, TS2, [{values, Dict3}]}, Dict3}
|
||||||
|
@ -635,13 +651,18 @@ filter_transition_trimfill_suffixes(Ttns) ->
|
||||||
filter_1_transition_list([]) ->
|
filter_1_transition_list([]) ->
|
||||||
[];
|
[];
|
||||||
filter_1_transition_list(Old) ->
|
filter_1_transition_list(Old) ->
|
||||||
|
%% Strategy: Chop off all of the w_* at the end, then look at **Old** to
|
||||||
|
%% see if we chopped off any. If we did chop off any, then add back a
|
||||||
|
%% constant 'w_t+' as a suffix.
|
||||||
New = lists:reverse(lists:dropwhile(fun(w_tt) -> true;
|
New = lists:reverse(lists:dropwhile(fun(w_tt) -> true;
|
||||||
(w_ft) -> true;
|
(w_ft) -> true;
|
||||||
|
(w_special_trimmed) -> true;
|
||||||
(_) -> false
|
(_) -> false
|
||||||
end, lists:reverse(Old))),
|
end, lists:reverse(Old))),
|
||||||
Suffix = case lists:last(Old) of
|
Suffix = case lists:last(Old) of
|
||||||
w_ft -> ['w_t+'];
|
w_ft -> ['w_t+'];
|
||||||
w_tt -> ['w_t+'];
|
w_tt -> ['w_t+'];
|
||||||
|
w_special_trimmed -> ['w_t+'];
|
||||||
_ -> []
|
_ -> []
|
||||||
end,
|
end,
|
||||||
New ++ Suffix.
|
New ++ Suffix.
|
||||||
|
|
|
@ -33,7 +33,8 @@
|
||||||
|
|
||||||
|
|
||||||
setup_flu_basedir() ->
|
setup_flu_basedir() ->
|
||||||
"/tmp/" ++ atom_to_list(?MODULE) ++ ".".
|
"./tmp." ++
|
||||||
|
atom_to_list(?MODULE) ++ "." ++ os:getpid() ++ ".".
|
||||||
|
|
||||||
setup_flu_dir(N) ->
|
setup_flu_dir(N) ->
|
||||||
setup_flu_basedir() ++ integer_to_list(N).
|
setup_flu_basedir() ++ integer_to_list(N).
|
||||||
|
|
Loading…
Reference in a new issue