WIP: narrowing in on repair problems due to double-write errors
This commit is contained in:
parent
fbb0203f67
commit
a7f42d636e
5 changed files with 32 additions and 11 deletions
|
@ -2967,7 +2967,8 @@ zerf_find_last_annotated(FLU, MajoritySize, S) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
perhaps_verbose_c111(P_latest2, S) ->
|
perhaps_verbose_c111(P_latest2, S) ->
|
||||||
case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
case true of
|
||||||
|
%%TODO put me back: case proplists:get_value(private_write_verbose, S#ch_mgr.opts) of
|
||||||
true ->
|
true ->
|
||||||
Dbg2X = lists:keydelete(react, 1,
|
Dbg2X = lists:keydelete(react, 1,
|
||||||
P_latest2#projection_v1.dbg2) ++
|
P_latest2#projection_v1.dbg2) ++
|
||||||
|
@ -2975,16 +2976,18 @@ perhaps_verbose_c111(P_latest2, S) ->
|
||||||
P_latest2x = P_latest2#projection_v1{dbg2=Dbg2X}, % limit verbose len.
|
P_latest2x = P_latest2#projection_v1{dbg2=Dbg2X}, % limit verbose len.
|
||||||
Last2 = get(last_verbose),
|
Last2 = get(last_verbose),
|
||||||
Summ2 = machi_projection:make_summary(P_latest2x),
|
Summ2 = machi_projection:make_summary(P_latest2x),
|
||||||
if P_latest2#projection_v1.upi == [],
|
%% if P_latest2#projection_v1.upi == [],
|
||||||
(S#ch_mgr.proj)#projection_v1.upi /= [] ->
|
%% (S#ch_mgr.proj)#projection_v1.upi /= [] ->
|
||||||
|
if true ->
|
||||||
<<CSumRep:4/binary,_/binary>> =
|
<<CSumRep:4/binary,_/binary>> =
|
||||||
P_latest2#projection_v1.epoch_csum,
|
P_latest2#projection_v1.epoch_csum,
|
||||||
io:format(user, "\n~s CONFIRM epoch ~w ~w upi ~w rep ~w by ~w\n", [machi_util:pretty_time(), (S#ch_mgr.proj)#projection_v1.epoch_number, CSumRep, P_latest2#projection_v1.upi, P_latest2#projection_v1.repairing, S#ch_mgr.name]);
|
io:format(user, "~s CONFIRM epoch ~w ~w upi ~w rep ~w by ~w\n", [machi_util:pretty_time(), (S#ch_mgr.proj)#projection_v1.epoch_number, CSumRep, P_latest2#projection_v1.upi, P_latest2#projection_v1.repairing, S#ch_mgr.name]);
|
||||||
true ->
|
true ->
|
||||||
ok
|
ok
|
||||||
end,
|
end,
|
||||||
case proplists:get_value(private_write_verbose,
|
%% TODO put me back: case proplists:get_value(private_write_verbose,
|
||||||
S#ch_mgr.opts) of
|
%% S#ch_mgr.opts) of
|
||||||
|
case true of
|
||||||
true when Summ2 /= Last2 ->
|
true when Summ2 /= Last2 ->
|
||||||
put(last_verbose, Summ2),
|
put(last_verbose, Summ2),
|
||||||
?V("\n~s ~p uses plain: ~w \n",
|
?V("\n~s ~p uses plain: ~w \n",
|
||||||
|
|
|
@ -274,7 +274,19 @@ make_repair_directives3([{Offset, Size, CSum, _FLU}=A|Rest0],
|
||||||
%% byte range from all FLUs
|
%% byte range from all FLUs
|
||||||
%% 3b. Log big warning about data loss.
|
%% 3b. Log big warning about data loss.
|
||||||
%% 4. Log any other checksum discrepencies as they are found.
|
%% 4. Log any other checksum discrepencies as they are found.
|
||||||
exit({todo_repair_sanity_check, ?LINE, File, Offset, As})
|
QQ = [begin
|
||||||
|
Pxy = orddict:fetch(FLU, ProxiesDict),
|
||||||
|
{ok, EpochID} = machi_proxy_flu1_client:get_epoch_id(
|
||||||
|
Pxy, ?SHORT_TIMEOUT),
|
||||||
|
NSInfo = undefined,
|
||||||
|
XX = machi_proxy_flu1_client:read_chunk(
|
||||||
|
Pxy, NSInfo, EpochID, File, Offset, Size, undefined,
|
||||||
|
?SHORT_TIMEOUT),
|
||||||
|
{FLU, XX}
|
||||||
|
end || {__Offset, __Size, __CSum, FLU} <- As],
|
||||||
|
|
||||||
|
exit({todo_repair_sanity_check, ?LINE, File, Offset, {as,As}, {qq,QQ}})
|
||||||
|
%% exit({todo_repair_sanity_check, ?LINE, File, Offset, As})
|
||||||
end,
|
end,
|
||||||
%% List construction guarantees us that there's at least one ?MAX_OFFSET
|
%% List construction guarantees us that there's at least one ?MAX_OFFSET
|
||||||
%% item remains. Sort order + our "taking" of all exact Offset+Size
|
%% item remains. Sort order + our "taking" of all exact Offset+Size
|
||||||
|
|
|
@ -786,9 +786,9 @@ do_repair_chunk2([], ReturnMode, Chunk, _CSum, _Repaired, _NSInfo, File, Offset,
|
||||||
%% TODO: add stats for # of repairs, length(_Repaired)-1, etc etc?
|
%% TODO: add stats for # of repairs, length(_Repaired)-1, etc etc?
|
||||||
case ReturnMode of
|
case ReturnMode of
|
||||||
read ->
|
read ->
|
||||||
{ok, Chunk, S};
|
{reply, {ok, {[Chunk], []}}, S};
|
||||||
{append, Offset, Size, File} ->
|
{append, Offset, Size, File} ->
|
||||||
{ok, {Offset, Size, File}, S}
|
{reply, {ok, {[{Offset, Size, File}], []}}, S}
|
||||||
end;
|
end;
|
||||||
do_repair_chunk2([First|Rest]=ToRepair, ReturnMode, Chunk, CSum, Repaired, NSInfo, File, Offset,
|
do_repair_chunk2([First|Rest]=ToRepair, ReturnMode, Chunk, CSum, Repaired, NSInfo, File, Offset,
|
||||||
Size, Depth, STime, #state{epoch_id=EpochID, proxies_dict=PD}=S) ->
|
Size, Depth, STime, #state{epoch_id=EpochID, proxies_dict=PD}=S) ->
|
||||||
|
|
|
@ -231,10 +231,14 @@ find_or_make_filename(Tid, DataDir, NS, NSLocator, Prefix, N) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
generate_filename(DataDir, NS, NSLocator, Prefix, N) ->
|
generate_filename(DataDir, NS, NSLocator, Prefix, N) ->
|
||||||
|
{A,B,C} = erlang:now(),
|
||||||
|
TODO = lists:flatten(filename:basename(DataDir) ++ "," ++ io_lib:format("~w,~w,~w", [A,B,C])),
|
||||||
{F, _} = machi_util:make_data_filename(
|
{F, _} = machi_util:make_data_filename(
|
||||||
DataDir,
|
DataDir,
|
||||||
NS, NSLocator, Prefix,
|
NS, NSLocator, Prefix,
|
||||||
generate_uuid_v4_str(),
|
TODO,
|
||||||
|
%% TODO put me back!!
|
||||||
|
%% generate_uuid_v4_str(),
|
||||||
N),
|
N),
|
||||||
binary_to_list(F).
|
binary_to_list(F).
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,9 @@ append(CRIndex, Bin, #state{verbose=V}=S) ->
|
||||||
NSInfo = #ns_info{},
|
NSInfo = #ns_info{},
|
||||||
NoCSum = <<>>,
|
NoCSum = <<>>,
|
||||||
Opts1 = #append_opts{},
|
Opts1 = #append_opts{},
|
||||||
|
io:format(user, "append_chunk ~p ~P ->\n", [Prefix, Bin, 6]),
|
||||||
Res = (catch machi_cr_client:append_chunk(C, NSInfo, Prefix, Bin, NoCSum, Opts1, sec(1))),
|
Res = (catch machi_cr_client:append_chunk(C, NSInfo, Prefix, Bin, NoCSum, Opts1, sec(1))),
|
||||||
|
io:format(user, "append_chunk ~p ~P ->\n ~p\n", [Prefix, Bin, 6, Res]),
|
||||||
case Res of
|
case Res of
|
||||||
{ok, {_Off, Len, _FileName}=Key} ->
|
{ok, {_Off, Len, _FileName}=Key} ->
|
||||||
case ets:insert_new(?WRITTEN_TAB, {Key, Bin}) of
|
case ets:insert_new(?WRITTEN_TAB, {Key, Bin}) of
|
||||||
|
@ -188,6 +190,7 @@ change_partition(Partition,
|
||||||
[] -> ?V("## Turn OFF partition: ~w~n", [Partition]);
|
[] -> ?V("## Turn OFF partition: ~w~n", [Partition]);
|
||||||
_ -> ?V("## Turn ON partition: ~w~n", [Partition])
|
_ -> ?V("## Turn ON partition: ~w~n", [Partition])
|
||||||
end || Verbose],
|
end || Verbose],
|
||||||
|
io:format(user, "partition ~p\n", [Partition]),
|
||||||
machi_partition_simulator:always_these_partitions(Partition),
|
machi_partition_simulator:always_these_partitions(Partition),
|
||||||
_ = machi_partition_simulator:get(FLUNames),
|
_ = machi_partition_simulator:get(FLUNames),
|
||||||
%% Don't wait for stable chain, tick will be executed on demand
|
%% Don't wait for stable chain, tick will be executed on demand
|
||||||
|
@ -456,7 +459,6 @@ assert_chunk(C, {Off, Len, FileName}=Key, Bin) ->
|
||||||
FileNameStr = binary_to_list(FileName),
|
FileNameStr = binary_to_list(FileName),
|
||||||
%% TODO : Use CSum instead of binary (after disuccsion about CSum is calmed down?)
|
%% TODO : Use CSum instead of binary (after disuccsion about CSum is calmed down?)
|
||||||
NSInfo = undefined,
|
NSInfo = undefined,
|
||||||
io:format(user, "TODO fix broken read_chunk mod ~s line ~w\n", [?MODULE, ?LINE]),
|
|
||||||
case (catch machi_cr_client:read_chunk(C, NSInfo, FileName, Off, Len, undefined, sec(3))) of
|
case (catch machi_cr_client:read_chunk(C, NSInfo, FileName, Off, Len, undefined, sec(3))) of
|
||||||
{ok, {[{FileNameStr, Off, Bin, _}], []}} ->
|
{ok, {[{FileNameStr, Off, Bin, _}], []}} ->
|
||||||
ok;
|
ok;
|
||||||
|
|
Loading…
Reference in a new issue