WIP: narrowing in on repair problems due to double-write errors
This commit is contained in:
parent
fbb0203f67
commit
a7f42d636e
5 changed files with 32 additions and 11 deletions
|
@ -2967,7 +2967,8 @@ zerf_find_last_annotated(FLU, MajoritySize, S) ->
|
|||
end.
|
||||
|
||||
perhaps_verbose_c111(P_latest2, S) ->
|
||||
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
||||
case true of
|
||||
%%TODO put me back: case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
||||
true ->
|
||||
Dbg2X = lists:keydelete(react, 1,
|
||||
P_latest2#projection_v1.dbg2) ++
|
||||
|
@ -2975,16 +2976,18 @@ perhaps_verbose_c111(P_latest2, S) ->
|
|||
P_latest2x = P_latest2#projection_v1{dbg2=Dbg2X}, % limit verbose len.
|
||||
Last2 = get(last_verbose),
|
||||
Summ2 = machi_projection:make_summary(P_latest2x),
|
||||
if P_latest2#projection_v1.upi == [],
|
||||
(S#ch_mgr.proj)#projection_v1.upi /= [] ->
|
||||
%% if P_latest2#projection_v1.upi == [],
|
||||
%% (S#ch_mgr.proj)#projection_v1.upi /= [] ->
|
||||
if true ->
|
||||
<<CSumRep:4/binary,_/binary>> =
|
||||
P_latest2#projection_v1.epoch_csum,
|
||||
io:format(user, "\n~s CONFIRM epoch ~w ~w upi ~w rep ~w by ~w\n", [machi_util:pretty_time(), (S#ch_mgr.proj)#projection_v1.epoch_number, CSumRep, P_latest2#projection_v1.upi, P_latest2#projection_v1.repairing, S#ch_mgr.name]);
|
||||
io:format(user, "~s CONFIRM epoch ~w ~w upi ~w rep ~w by ~w\n", [machi_util:pretty_time(), (S#ch_mgr.proj)#projection_v1.epoch_number, CSumRep, P_latest2#projection_v1.upi, P_latest2#projection_v1.repairing, S#ch_mgr.name]);
|
||||
true ->
|
||||
ok
|
||||
end,
|
||||
case proplists:get_value(private_write_verbose,
|
||||
S#ch_mgr.opts) of
|
||||
%% TODO put me back: case proplists:get_value(private_write_verbose,
|
||||
%% S#ch_mgr.opts) of
|
||||
case true of
|
||||
true when Summ2 /= Last2 ->
|
||||
put(last_verbose, Summ2),
|
||||
?V("\n~s ~p uses plain: ~w \n",
|
||||
|
|
|
@ -274,7 +274,19 @@ make_repair_directives3([{Offset, Size, CSum, _FLU}=A|Rest0],
|
|||
%% byte range from all FLUs
|
||||
%% 3b. Log big warning about data loss.
|
||||
%% 4. Log any other checksum discrepencies as they are found.
|
||||
exit({todo_repair_sanity_check, ?LINE, File, Offset, As})
|
||||
QQ = [begin
|
||||
Pxy = orddict:fetch(FLU, ProxiesDict),
|
||||
{ok, EpochID} = machi_proxy_flu1_client:get_epoch_id(
|
||||
Pxy, ?SHORT_TIMEOUT),
|
||||
NSInfo = undefined,
|
||||
XX = machi_proxy_flu1_client:read_chunk(
|
||||
Pxy, NSInfo, EpochID, File, Offset, Size, undefined,
|
||||
?SHORT_TIMEOUT),
|
||||
{FLU, XX}
|
||||
end || {__Offset, __Size, __CSum, FLU} <- As],
|
||||
|
||||
exit({todo_repair_sanity_check, ?LINE, File, Offset, {as,As}, {qq,QQ}})
|
||||
%% exit({todo_repair_sanity_check, ?LINE, File, Offset, As})
|
||||
end,
|
||||
%% List construction guarantees us that there's at least one ?MAX_OFFSET
|
||||
%% item remains. Sort order + our "taking" of all exact Offset+Size
|
||||
|
|
|
@ -786,9 +786,9 @@ do_repair_chunk2([], ReturnMode, Chunk, _CSum, _Repaired, _NSInfo, File, Offset,
|
|||
%% TODO: add stats for # of repairs, length(_Repaired)-1, etc etc?
|
||||
case ReturnMode of
|
||||
read ->
|
||||
{ok, Chunk, S};
|
||||
{reply, {ok, {[Chunk], []}}, S};
|
||||
{append, Offset, Size, File} ->
|
||||
{ok, {Offset, Size, File}, S}
|
||||
{reply, {ok, {[{Offset, Size, File}], []}}, S}
|
||||
end;
|
||||
do_repair_chunk2([First|Rest]=ToRepair, ReturnMode, Chunk, CSum, Repaired, NSInfo, File, Offset,
|
||||
Size, Depth, STime, #state{epoch_id=EpochID, proxies_dict=PD}=S) ->
|
||||
|
|
|
@ -231,10 +231,14 @@ find_or_make_filename(Tid, DataDir, NS, NSLocator, Prefix, N) ->
|
|||
end.
|
||||
|
||||
generate_filename(DataDir, NS, NSLocator, Prefix, N) ->
|
||||
{A,B,C} = erlang:now(),
|
||||
TODO = lists:flatten(filename:basename(DataDir) ++ "," ++ io_lib:format("~w,~w,~w", [A,B,C])),
|
||||
{F, _} = machi_util:make_data_filename(
|
||||
DataDir,
|
||||
NS, NSLocator, Prefix,
|
||||
generate_uuid_v4_str(),
|
||||
TODO,
|
||||
%% TODO put me back!!
|
||||
%% generate_uuid_v4_str(),
|
||||
N),
|
||||
binary_to_list(F).
|
||||
|
||||
|
|
|
@ -121,7 +121,9 @@ append(CRIndex, Bin, #state{verbose=V}=S) ->
|
|||
NSInfo = #ns_info{},
|
||||
NoCSum = <<>>,
|
||||
Opts1 = #append_opts{},
|
||||
io:format(user, "append_chunk ~p ~P ->\n", [Prefix, Bin, 6]),
|
||||
Res = (catch machi_cr_client:append_chunk(C, NSInfo, Prefix, Bin, NoCSum, Opts1, sec(1))),
|
||||
io:format(user, "append_chunk ~p ~P ->\n ~p\n", [Prefix, Bin, 6, Res]),
|
||||
case Res of
|
||||
{ok, {_Off, Len, _FileName}=Key} ->
|
||||
case ets:insert_new(?WRITTEN_TAB, {Key, Bin}) of
|
||||
|
@ -188,6 +190,7 @@ change_partition(Partition,
|
|||
[] -> ?V("## Turn OFF partition: ~w~n", [Partition]);
|
||||
_ -> ?V("## Turn ON partition: ~w~n", [Partition])
|
||||
end || Verbose],
|
||||
io:format(user, "partition ~p\n", [Partition]),
|
||||
machi_partition_simulator:always_these_partitions(Partition),
|
||||
_ = machi_partition_simulator:get(FLUNames),
|
||||
%% Don't wait for stable chain, tick will be executed on demand
|
||||
|
@ -456,7 +459,6 @@ assert_chunk(C, {Off, Len, FileName}=Key, Bin) ->
|
|||
FileNameStr = binary_to_list(FileName),
|
||||
%% TODO : Use CSum instead of binary (after disuccsion about CSum is calmed down?)
|
||||
NSInfo = undefined,
|
||||
io:format(user, "TODO fix broken read_chunk mod ~s line ~w\n", [?MODULE, ?LINE]),
|
||||
case (catch machi_cr_client:read_chunk(C, NSInfo, FileName, Off, Len, undefined, sec(3))) of
|
||||
{ok, {[{FileNameStr, Off, Bin, _}], []}} ->
|
||||
ok;
|
||||
|
|
Loading…
Reference in a new issue