Trim command and GC prototype implementation #32

Merged
kuenishi merged 4 commits from ku/trim-and-gc into master 2015-10-29 06:07:10 +00:00
18 changed files with 687 additions and 133 deletions
Showing only changes of commit f7358424e4 - Show all commits

View file

@ -48,9 +48,10 @@ enum Mpb_GeneralStatusCode {
PARTITION = 4; PARTITION = 4;
NOT_WRITTEN = 5; NOT_WRITTEN = 5;
WRITTEN = 6; WRITTEN = 6;
NO_SUCH_FILE = 7; TRIMMED = 7; // The whole file was trimmed
PARTIAL_READ = 8; NO_SUCH_FILE = 8;
BAD_EPOCH = 9; PARTIAL_READ = 9;
BAD_EPOCH = 10;
BAD_JOSS = 255; // Only for testing by the Taipan BAD_JOSS = 255; // Only for testing by the Taipan
} }
@ -355,6 +356,7 @@ message Mpb_ProjectionV1 {
// append_chunk() // append_chunk()
// write_chunk() // write_chunk()
// read_chunk() // read_chunk()
// trim_chunk()
// checksum_list() // checksum_list()
// list_files() // list_files()
// wedge_status() // wedge_status()
@ -424,6 +426,20 @@ message Mpb_LL_ReadChunkResp {
repeated Mpb_ChunkPos trimmed = 3; repeated Mpb_ChunkPos trimmed = 3;
} }
// Low level API: trim_chunk()
message Mpb_LL_TrimChunkReq {
required Mpb_EpochID epoch_id = 1;
required string file = 2;
required uint64 offset = 3;
required uint32 size = 4;
optional uint32 trigger_gc = 5 [default=1];
}
message Mpb_LL_TrimChunkResp {
required Mpb_GeneralStatusCode status = 1;
}
// Low level API: checksum_list() // Low level API: checksum_list()
message Mpb_LL_ChecksumListReq { message Mpb_LL_ChecksumListReq {
@ -588,11 +604,12 @@ message Mpb_LL_Request {
optional Mpb_LL_AppendChunkReq append_chunk = 30; optional Mpb_LL_AppendChunkReq append_chunk = 30;
optional Mpb_LL_WriteChunkReq write_chunk = 31; optional Mpb_LL_WriteChunkReq write_chunk = 31;
optional Mpb_LL_ReadChunkReq read_chunk = 32; optional Mpb_LL_ReadChunkReq read_chunk = 32;
optional Mpb_LL_ChecksumListReq checksum_list = 33; optional Mpb_LL_TrimChunkReq trim_chunk = 33;
optional Mpb_LL_ListFilesReq list_files = 34; optional Mpb_LL_ChecksumListReq checksum_list = 34;
optional Mpb_LL_WedgeStatusReq wedge_status = 35; optional Mpb_LL_ListFilesReq list_files = 35;
optional Mpb_LL_DeleteMigrationReq delete_migration = 36; optional Mpb_LL_WedgeStatusReq wedge_status = 36;
optional Mpb_LL_TruncHackReq trunc_hack = 37; optional Mpb_LL_DeleteMigrationReq delete_migration = 37;
optional Mpb_LL_TruncHackReq trunc_hack = 38;
} }
message Mpb_LL_Response { message Mpb_LL_Response {
@ -622,9 +639,10 @@ message Mpb_LL_Response {
optional Mpb_LL_AppendChunkResp append_chunk = 30; optional Mpb_LL_AppendChunkResp append_chunk = 30;
optional Mpb_LL_WriteChunkResp write_chunk = 31; optional Mpb_LL_WriteChunkResp write_chunk = 31;
optional Mpb_LL_ReadChunkResp read_chunk = 32; optional Mpb_LL_ReadChunkResp read_chunk = 32;
optional Mpb_LL_ChecksumListResp checksum_list = 33; optional Mpb_LL_TrimChunkResp trim_chunk = 33;
optional Mpb_LL_ListFilesResp list_files = 34; optional Mpb_LL_ChecksumListResp checksum_list = 34;
optional Mpb_LL_WedgeStatusResp wedge_status = 35; optional Mpb_LL_ListFilesResp list_files = 35;
optional Mpb_LL_DeleteMigrationResp delete_migration = 36; optional Mpb_LL_WedgeStatusResp wedge_status = 36;
optional Mpb_LL_TruncHackResp trunc_hack = 37; optional Mpb_LL_DeleteMigrationResp delete_migration = 37;
optional Mpb_LL_TruncHackResp trunc_hack = 38;
} }

View file

@ -360,6 +360,9 @@ do_append_head3(Prefix, Chunk, ChunkExtra, Depth, STime, TO,
%% written block is. But we lost a race. Repeat, with a new %% written block is. But we lost a race. Repeat, with a new
%% sequencer assignment. %% sequencer assignment.
do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S); do_append_head(Prefix, Chunk, ChunkExtra, Depth, STime, TO, S);
{error, trimmed} = Err ->
%% TODO: behaviour
{reply, Err, S};
{error, not_written} -> {error, not_written} ->
exit({todo_should_never_happen,?MODULE,?LINE, exit({todo_should_never_happen,?MODULE,?LINE,
Prefix,iolist_size(Chunk)}) Prefix,iolist_size(Chunk)})
@ -434,6 +437,9 @@ do_append_midtail2([FLU|RestFLUs]=FLUs, Prefix, File, Offset, Chunk,
Resume = {append, Offset, iolist_size(Chunk), File}, Resume = {append, Offset, iolist_size(Chunk), File},
do_repair_chunk(FLUs, Resume, Chunk, [], File, Offset, do_repair_chunk(FLUs, Resume, Chunk, [], File, Offset,
iolist_size(Chunk), Depth, STime, S); iolist_size(Chunk), Depth, STime, S);
{error, trimmed} = Err ->
%% TODO: nothing can be done
{reply, Err, S};
{error, not_written} -> {error, not_written} ->
exit({todo_should_never_happen,?MODULE,?LINE,File,Offset}) exit({todo_should_never_happen,?MODULE,?LINE,File,Offset})
end. end.
@ -497,6 +503,8 @@ do_write_head2(File, Offset, Chunk, Depth, STime, TO,
do_write_head(File, Offset, Chunk, Depth, STime, TO, S); do_write_head(File, Offset, Chunk, Depth, STime, TO, S);
{error, written}=Err -> {error, written}=Err ->
{reply, Err, S}; {reply, Err, S};
{error, trimmed}=Err ->
{reply, Err, S};
{error, not_written} -> {error, not_written} ->
exit({todo_should_never_happen,?MODULE,?LINE, exit({todo_should_never_happen,?MODULE,?LINE,
iolist_size(Chunk)}) iolist_size(Chunk)})
@ -528,8 +536,14 @@ do_read_chunk2(File, Offset, Size, Opts, Depth, STime, TO,
ConsistencyMode = P#projection_v1.mode, ConsistencyMode = P#projection_v1.mode,
case ?FLU_PC:read_chunk(orddict:fetch(Tail, PD), EpochID, case ?FLU_PC:read_chunk(orddict:fetch(Tail, PD), EpochID,
File, Offset, Size, Opts, ?TIMEOUT) of File, Offset, Size, Opts, ?TIMEOUT) of
{ok, {Chunks, []}} when is_list(Chunks) -> {ok, {Chunks, Trimmed}} when is_list(Chunks), is_list(Trimmed) ->
{reply, {ok, {Chunks, []}}, S}; %% After partition heal, there could happen that heads may
%% have chunk trimmed but tails may have chunk written -
%% such repair couldn't be triggered in read time (because
%% there's data!). In this case, repair should happen by
%% partition heal event or some background
%% hashtree-n-repair service. TODO. FIXME.
{reply, {ok, {Chunks, Trimmed}}, S};
%% {ok, BadChunk} -> %% {ok, BadChunk} ->
%% %% TODO cleaner handling of bad chunks %% %% TODO cleaner handling of bad chunks
%% exit({todo, bad_chunk_size, ?MODULE, ?LINE, File, Offset, Size, %% exit({todo, bad_chunk_size, ?MODULE, ?LINE, File, Offset, Size,
@ -537,9 +551,11 @@ do_read_chunk2(File, Offset, Size, Opts, Depth, STime, TO,
{error, bad_arg} = BadArg -> {error, bad_arg} = BadArg ->
{reply, BadArg, S}; {reply, BadArg, S};
{error, partial_read}=Err -> {error, partial_read}=Err ->
%% TODO: maybe this case we might need another repair?
{reply, Err, S}; {reply, Err, S};
{error, bad_checksum}=BadCS -> {error, bad_checksum}=BadCS ->
%% TODO: alternate strategy? %% TODO: alternate strategy?
%% Maybe we need read repair here, too?
{reply, BadCS, S}; {reply, BadCS, S};
{error, Retry} {error, Retry}
when Retry == partition; Retry == bad_epoch; Retry == wedged -> when Retry == partition; Retry == bad_epoch; Retry == wedged ->
@ -548,12 +564,125 @@ do_read_chunk2(File, Offset, Size, Opts, Depth, STime, TO,
read_repair(ConsistencyMode, read, File, Offset, Size, Depth, STime, S); read_repair(ConsistencyMode, read, File, Offset, Size, Depth, STime, S);
%% {reply, {error, not_written}, S}; %% {reply, {error, not_written}, S};
{error, written} -> {error, written} ->
exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size}) exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size});
{error, trimmed}=Err ->
{reply, Err, S}
end.
do_trim_chunk(File, Offset, Size, 0=Depth, STime, TO, S) ->
do_trim_chunk(File, Offset, Size, Depth+1, STime, TO, S);
do_trim_chunk(File, Offset, Size, Depth, STime, TO, #state{proj=P}=S) ->
sleep_a_while(Depth),
DiffMs = timer:now_diff(os:timestamp(), STime) div 1000,
if DiffMs > TO ->
{reply, {error, partition}, S};
true ->
%% This is suboptimal for performance: there are some paths
%% through this point where our current projection is good
%% enough. But we're going to try to keep the code as simple
%% as we can for now.
S2 = update_proj(S#state{proj=undefined, bad_proj=P}),
case S2#state.proj of
P2 when P2 == undefined orelse
P2#projection_v1.upi == [] ->
do_trim_chunk(File, Offset, Size, Depth + 1,
STime, TO, S2);
_ ->
do_trim_chunk2(File, Offset, Size, Depth + 1,
STime, TO, S2)
end
end.
do_trim_chunk2(File, Offset, Size, Depth, STime, TO,
#state{epoch_id=EpochID, proj=P, proxies_dict=PD}=S) ->
[HeadFLU|RestFLUs] = mutation_flus(P),
Proxy = orddict:fetch(HeadFLU, PD),
case ?FLU_PC:trim_chunk(Proxy, EpochID, File, Offset, Size, ?TIMEOUT) of
ok ->
%% From this point onward, we use the same code & logic path as
%% append does.
do_trim_midtail(RestFLUs, undefined, File, Offset, Size,
[HeadFLU], 0, STime, TO, S);
{error, trimmed} ->
%% Maybe the trim had failed in the middle of the tail so re-run
%% trim accross the whole chain.
do_trim_midtail(RestFLUs, undefined, File, Offset, Size,
[HeadFLU], 0, STime, TO, S);
{error, bad_checksum}=BadCS ->
{reply, BadCS, S};
{error, Retry}
when Retry == partition; Retry == bad_epoch; Retry == wedged ->
do_trim_chunk(File, Offset, Size, Depth, STime, TO, S)
end.
do_trim_midtail(RestFLUs, Prefix, File, Offset, Size,
Ws, Depth, STime, TO, S)
when RestFLUs == [] orelse Depth == 0 ->
do_trim_midtail2(RestFLUs, Prefix, File, Offset, Size,
Ws, Depth + 1, STime, TO, S);
do_trim_midtail(_RestFLUs, Prefix, File, Offset, Size,
Ws, Depth, STime, TO, #state{proj=P}=S) ->
%% io:format(user, "midtail sleep2,", []),
sleep_a_while(Depth),
DiffMs = timer:now_diff(os:timestamp(), STime) div 1000,
if DiffMs > TO ->
{reply, {error, partition}, S};
true ->
S2 = update_proj(S#state{proj=undefined, bad_proj=P}),
case S2#state.proj of
undefined ->
{reply, {error, partition}, S};
P2 ->
RestFLUs2 = mutation_flus(P2),
case RestFLUs2 -- Ws of
RestFLUs2 ->
%% None of the writes that we have done so far
%% are to FLUs that are in the RestFLUs2 list.
%% We are pessimistic here and assume that
%% those FLUs are permanently dead. Start
%% over with a new sequencer assignment, at
%% the 2nd have of the impl (we have already
%% slept & refreshed the projection).
if Prefix == undefined -> % atom! not binary()!!
{error, partition};
true ->
do_trim_chunk(Prefix, Offset, Size,
Depth, STime, TO, S2)
end;
RestFLUs3 ->
do_trim_midtail2(RestFLUs3, Prefix, File, Offset, Size,
Ws, Depth + 1, STime, TO, S2)
end
end
end.
do_trim_midtail2([], _Prefix, _File, _Offset, _Size,
_Ws, _Depth, _STime, _TO, S) ->
%% io:format(user, "ok!\n", []),
{reply, ok, S};
do_trim_midtail2([FLU|RestFLUs]=FLUs, Prefix, File, Offset, Size,
Ws, Depth, STime, TO,
#state{epoch_id=EpochID, proxies_dict=PD}=S) ->
Proxy = orddict:fetch(FLU, PD),
case ?FLU_PC:trim_chunk(Proxy, EpochID, File, Offset, Size, ?TIMEOUT) of
ok ->
%% io:format(user, "write ~w,", [FLU]),
do_trim_midtail2(RestFLUs, Prefix, File, Offset, Size,
[FLU|Ws], Depth, STime, TO, S);
{error, trimmed} ->
do_trim_midtail2(RestFLUs, Prefix, File, Offset, Size,
[FLU|Ws], Depth, STime, TO, S);
{error, bad_checksum}=BadCS ->
%% TODO: alternate strategy?
{reply, BadCS, S};
{error, Retry}
when Retry == partition; Retry == bad_epoch; Retry == wedged ->
do_trim_midtail(FLUs, Prefix, File, Offset, Size,
Ws, Depth, STime, TO, S)
end. end.
do_trim_chunk(_File, _Offset, _Size, _Depth, _STime, _TO, S) ->
%% This is just a stub to reach CR client from high level client
{reply, {error, bad_joss}, S}.
%% Read repair: depends on the consistency mode that we're in: %% Read repair: depends on the consistency mode that we're in:
%% %%
@ -597,6 +726,7 @@ read_repair2(cp_mode=ConsistencyMode,
case ?FLU_PC:read_chunk(orddict:fetch(Tail, PD), EpochID, case ?FLU_PC:read_chunk(orddict:fetch(Tail, PD), EpochID,
File, Offset, Size, [], ?TIMEOUT) of File, Offset, Size, [], ?TIMEOUT) of
{ok, Chunks} when is_list(Chunks) -> {ok, Chunks} when is_list(Chunks) ->
%% TODO: change to {Chunks, Trimmed} and have them repaired
ToRepair = mutation_flus(P) -- [Tail], ToRepair = mutation_flus(P) -- [Tail],
{Reply, S1} = do_repair_chunks(Chunks, ToRepair, ReturnMode, {Reply, S1} = do_repair_chunks(Chunks, ToRepair, ReturnMode,
[Tail], File, Depth, STime, S, {ok, Chunks}), [Tail], File, Depth, STime, S, {ok, Chunks}),
@ -614,7 +744,12 @@ read_repair2(cp_mode=ConsistencyMode,
{error, not_written} -> {error, not_written} ->
{reply, {error, not_written}, S}; {reply, {error, not_written}, S};
{error, written} -> {error, written} ->
exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size}) exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size});
{error, trimmed} ->
%% TODO: Again, whole file was trimmed. Needs repair. How
%% do we repair trimmed file (which was already unlinked)
%% across the flu servers?
exit({todo_should_repair_unlinked_files, ?MODULE, ?LINE, File})
end; end;
read_repair2(ap_mode=ConsistencyMode, read_repair2(ap_mode=ConsistencyMode,
ReturnMode, File, Offset, Size, Depth, STime, ReturnMode, File, Offset, Size, Depth, STime,
@ -622,6 +757,7 @@ read_repair2(ap_mode=ConsistencyMode,
Eligible = mutation_flus(P), Eligible = mutation_flus(P),
case try_to_find_chunk(Eligible, File, Offset, Size, S) of case try_to_find_chunk(Eligible, File, Offset, Size, S) of
{ok, {Chunks, _Trimmed}, GotItFrom} when is_list(Chunks) -> {ok, {Chunks, _Trimmed}, GotItFrom} when is_list(Chunks) ->
%% TODO: Repair trimmed chunks
ToRepair = mutation_flus(P) -- [GotItFrom], ToRepair = mutation_flus(P) -- [GotItFrom],
{Reply0, S1} = do_repair_chunks(Chunks, ToRepair, ReturnMode, [GotItFrom], {Reply0, S1} = do_repair_chunks(Chunks, ToRepair, ReturnMode, [GotItFrom],
File, Depth, STime, S, {ok, Chunks}), File, Depth, STime, S, {ok, Chunks}),
@ -638,7 +774,11 @@ read_repair2(ap_mode=ConsistencyMode,
{error, not_written} -> {error, not_written} ->
{reply, {error, not_written}, S}; {reply, {error, not_written}, S};
{error, written} -> {error, written} ->
exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size}) exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size});
{error, trimmed} ->
%% TODO: Again, whole file was trimmed. Needs repair. How
%% do we repair trimmed file across the flu servers?
exit({todo_should_repair_unlinked_files, ?MODULE, ?LINE, File})
end. end.
do_repair_chunks([], _, _, _, _, _, _, S, Reply) -> do_repair_chunks([], _, _, _, _, _, _, S, Reply) ->
@ -703,6 +843,9 @@ do_repair_chunk2([First|Rest]=ToRepair, ReturnMode, Chunk, Repaired, File, Offse
%% that it is exactly our Chunk. %% that it is exactly our Chunk.
do_repair_chunk2(Rest, ReturnMode, Chunk, Repaired, File, do_repair_chunk2(Rest, ReturnMode, Chunk, Repaired, File,
Offset, Size, Depth, STime, S); Offset, Size, Depth, STime, S);
{error, trimmed} = _Error ->
%% TODO
exit(todo_should_repair_trimmed);
{error, not_written} -> {error, not_written} ->
exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size}) exit({todo_should_never_happen,?MODULE,?LINE,File,Offset,Size})
end. end.
@ -872,7 +1015,9 @@ try_to_find_chunk(Eligible, File, Offset, Size,
[{FoundFLU, {ok, ChunkAndTrimmed}}|_] -> [{FoundFLU, {ok, ChunkAndTrimmed}}|_] ->
{ok, ChunkAndTrimmed, FoundFLU}; {ok, ChunkAndTrimmed, FoundFLU};
[] -> [] ->
RetryErrs = [partition, bad_epoch, wedged], RetryErrs = [partition, bad_epoch, wedged, trimmed],
%% Adding 'trimmed' to return so as to trigger repair,
%% once all other retry errors fixed
case [Err || {error, Err} <- Rs, lists:member(Err, RetryErrs)] of case [Err || {error, Err} <- Rs, lists:member(Err, RetryErrs)] of
[SomeErr|_] -> [SomeErr|_] ->
{error, SomeErr}; {error, SomeErr};

View file

@ -177,7 +177,14 @@ all_trimmed(#machi_csum_table{table=T}, Left, Right) ->
-spec all_trimmed(table(), machi_dt:chunk_pos()) -> boolean(). -spec all_trimmed(table(), machi_dt:chunk_pos()) -> boolean().
all_trimmed(#machi_csum_table{table=T}, Pos) -> all_trimmed(#machi_csum_table{table=T}, Pos) ->
runthru(ets:tab2list(T), 0, Pos). case ets:tab2list(T) of
[{0, ?MINIMUM_OFFSET, _}|L] ->
%% tl/1 to remove header space {0, 1024, <<0>>}
runthru(L, ?MINIMUM_OFFSET, Pos);
List ->
%% In case a header is removed;
runthru(List, 0, Pos)
end.
-spec any_trimmed(table(), -spec any_trimmed(table(),
pos_integer(), pos_integer(),

View file

@ -47,7 +47,7 @@
%% public API %% public API
-export([ -export([
start_link/2, start_link/3,
stop/1, stop/1,
sync/1, sync/1,
sync/2, sync/2,
@ -55,6 +55,7 @@
read/4, read/4,
write/3, write/3,
write/4, write/4,
trim/4,
append/2, append/2,
append/4 append/4
]). ]).
@ -78,6 +79,7 @@
Errors :: non_neg_integer() }. Errors :: non_neg_integer() }.
-record(state, { -record(state, {
fluname :: atom(),
data_dir :: string() | undefined, data_dir :: string() | undefined,
filename :: string() | undefined, filename :: string() | undefined,
data_path :: string() | undefined, data_path :: string() | undefined,
@ -93,7 +95,8 @@
ops = 0 :: non_neg_integer(), %% sum of all ops ops = 0 :: non_neg_integer(), %% sum of all ops
reads = {0, 0} :: op_stats(), reads = {0, 0} :: op_stats(),
writes = {0, 0} :: op_stats(), writes = {0, 0} :: op_stats(),
appends = {0, 0} :: op_stats() appends = {0, 0} :: op_stats(),
trims = {0, 0} :: op_stats()
}). }).
%% Public API %% Public API
@ -101,9 +104,9 @@
% @doc Start a new instance of the file proxy service. Takes the filename % @doc Start a new instance of the file proxy service. Takes the filename
% and data directory as arguments. This function is typically called by the % and data directory as arguments. This function is typically called by the
% `machi_file_proxy_sup:start_proxy/2' function. % `machi_file_proxy_sup:start_proxy/2' function.
-spec start_link(Filename :: string(), DataDir :: string()) -> any(). -spec start_link(FluName :: atom(), Filename :: string(), DataDir :: string()) -> any().
start_link(Filename, DataDir) -> start_link(FluName, Filename, DataDir) ->
gen_server:start_link(?MODULE, {Filename, DataDir}, []). gen_server:start_link(?MODULE, {FluName, Filename, DataDir}, []).
% @doc Request to stop an instance of the file proxy service. % @doc Request to stop an instance of the file proxy service.
-spec stop(Pid :: pid()) -> ok. -spec stop(Pid :: pid()) -> ok.
@ -179,6 +182,12 @@ write(_Pid, Offset, ClientMeta, _Data) ->
lager:warning("Bad arg to write: Offset ~p, ClientMeta: ~p", [Offset, ClientMeta]), lager:warning("Bad arg to write: Offset ~p, ClientMeta: ~p", [Offset, ClientMeta]),
{error, bad_arg}. {error, bad_arg}.
trim(Pid, Offset, Size, TriggerGC) when is_pid(Pid),
is_integer(Offset) andalso Offset >= 0,
is_integer(Size) andalso Size > 0,
is_boolean(TriggerGC) ->
gen_server:call(Pid, {trim ,Offset, Size, TriggerGC}, ?TIMEOUT).
% @doc Append data % @doc Append data
-spec append(Pid :: pid(), Data :: binary()) -> {ok, File :: string(), Offset :: non_neg_integer()} -spec append(Pid :: pid(), Data :: binary()) -> {ok, File :: string(), Offset :: non_neg_integer()}
|{error, term()}. |{error, term()}.
@ -205,7 +214,7 @@ append(_Pid, ClientMeta, Extra, _Data) ->
%% gen_server callbacks %% gen_server callbacks
% @private % @private
init({Filename, DataDir}) -> init({FluName, Filename, DataDir}) ->
CsumFile = machi_util:make_checksum_filename(DataDir, Filename), CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
{_, DPath} = machi_util:make_data_filename(DataDir, Filename), {_, DPath} = machi_util:make_data_filename(DataDir, Filename),
ok = filelib:ensure_dir(CsumFile), ok = filelib:ensure_dir(CsumFile),
@ -216,6 +225,7 @@ init({Filename, DataDir}) ->
{ok, FHd} = file:open(DPath, [read, write, binary, raw]), {ok, FHd} = file:open(DPath, [read, write, binary, raw]),
Tref = schedule_tick(), Tref = schedule_tick(),
St = #state{ St = #state{
fluname = FluName,
filename = Filename, filename = Filename,
data_dir = DataDir, data_dir = DataDir,
data_path = DPath, data_path = DPath,
@ -285,16 +295,21 @@ handle_call({read, Offset, Length, Opts}, _From,
csum_table = CsumTable, csum_table = CsumTable,
reads = {T, Err} reads = {T, Err}
}) -> }) ->
%% TODO: use these options - NoChunk prevents reading from disks
%% NoChecksum doesn't check checksums
NoChecksum = proplists:get_value(no_checksum, Opts, false), NoChecksum = proplists:get_value(no_checksum, Opts, false),
NoChunk = proplists:get_value(no_chunk, Opts, false), NoChunk = proplists:get_value(no_chunk, Opts, false),
NeedsMerge = proplists:get_value(needs_trimmed, Opts, false),
{Resp, NewErr} = {Resp, NewErr} =
case do_read(FH, F, CsumTable, Offset, Length, NoChecksum, NoChunk, NeedsMerge) of case do_read(FH, F, CsumTable, Offset, Length, NoChunk, NoChecksum) of
{ok, {[], []}} -> {ok, {[], []}} ->
{{error, not_written}, Err + 1}; {{error, not_written}, Err + 1};
{ok, {Chunks0, Trimmed0}} -> {ok, {Chunks0, Trimmed0}} ->
Chunks = slice_both_side(Chunks0, Offset, Offset+Length), Chunks = slice_both_side(Chunks0, Offset, Offset+Length),
{{ok, {Chunks, Trimmed0}}, Err}; Trimmed = case proplists:get_value(needs_trimmed, Opts, false) of
true -> Trimmed0;
false -> []
end,
{{ok, {Chunks, Trimmed}}, Err};
Error -> Error ->
lager:error("Can't read ~p, ~p at File ~p", [Offset, Length, F]), lager:error("Can't read ~p, ~p at File ~p", [Offset, Length, F]),
{Error, Err + 1} {Error, Err + 1}
@ -338,6 +353,45 @@ handle_call({write, Offset, ClientMeta, Data}, _From,
{reply, Resp, State#state{writes = {T+1, NewErr}, {reply, Resp, State#state{writes = {T+1, NewErr},
eof_position = NewEof}}; eof_position = NewEof}};
%%% TRIMS
handle_call({trim, _Offset, _ClientMeta, _Data}, _From,
State = #state{wedged = true,
writes = {T, Err}
}) ->
{reply, {error, wedged}, State#state{writes = {T + 1, Err + 1}}};
handle_call({trim, Offset, Size, _TriggerGC}, _From,
State = #state{data_filehandle=FHd,
ops = Ops,
trims = {T, Err},
csum_table = CsumTable}) ->
case machi_csum_table:all_trimmed(CsumTable, Offset, Size) of
true ->
NewState = State#state{ops=Ops+1, trims={T, Err+1}},
%% All bytes of that range was already trimmed returns ok
%% here, not {error, trimmed}, which means the whole file
%% was trimmed
maybe_gc(ok, NewState);
false ->
LUpdate = maybe_regenerate_checksum(
FHd,
machi_csum_table:find_leftneighbor(CsumTable, Offset)),
RUpdate = maybe_regenerate_checksum(
FHd,
machi_csum_table:find_rightneighbor(CsumTable, Offset+Size)),
case machi_csum_table:trim(CsumTable, Offset, Size, LUpdate, RUpdate) of
ok ->
NewState = State#state{ops=Ops+1, trims={T+1, Err}},
maybe_gc(ok, NewState);
Error ->
{reply, Error, State#state{ops=Ops+1, trims={T, Err+1}}}
end
end;
%% APPENDS %% APPENDS
handle_call({append, _ClientMeta, _Extra, _Data}, _From, handle_call({append, _ClientMeta, _Extra, _Data}, _From,
@ -476,10 +530,20 @@ terminate(Reason, #state{filename = F,
lager:info(" Reads: ~p/~p", [RT, RE]), lager:info(" Reads: ~p/~p", [RT, RE]),
lager:info(" Writes: ~p/~p", [WT, WE]), lager:info(" Writes: ~p/~p", [WT, WE]),
lager:info("Appends: ~p/~p", [AT, AE]), lager:info("Appends: ~p/~p", [AT, AE]),
case FHd of
undefined ->
noop; %% file deleted
_ ->
ok = file:sync(FHd), ok = file:sync(FHd),
ok = file:close(FHd), ok = file:close(FHd)
end,
case T of
undefined ->
noop; %% file deleted
_ ->
ok = machi_csum_table:sync(T), ok = machi_csum_table:sync(T),
ok = machi_csum_table:close(T), ok = machi_csum_table:close(T)
end,
ok. ok.
% @private % @private
@ -518,9 +582,8 @@ check_or_make_tagged_csum(OtherTag, _ClientCsum, _Data) ->
CsumTable :: machi_csum_table:table(), CsumTable :: machi_csum_table:table(),
Offset :: non_neg_integer(), Offset :: non_neg_integer(),
Size :: non_neg_integer(), Size :: non_neg_integer(),
NoChecksum :: boolean(),
NoChunk :: boolean(), NoChunk :: boolean(),
NeedsTrimmed :: boolean() NoChecksum :: boolean()
) -> {ok, Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum :: binary()}]} | ) -> {ok, Chunks :: [{string(), Offset::non_neg_integer(), binary(), Csum :: binary()}]} |
{error, bad_checksum} | {error, bad_checksum} |
{error, partial_read} | {error, partial_read} |
@ -539,23 +602,23 @@ check_or_make_tagged_csum(OtherTag, _ClientCsum, _Data) ->
% tuple is returned.</ul> % tuple is returned.</ul>
% </li> % </li>
% %
do_read(FHd, Filename, CsumTable, Offset, Size, _, _, _) -> do_read(FHd, Filename, CsumTable, Offset, Size, _, _) ->
do_read(FHd, Filename, CsumTable, Offset, Size).
do_read(FHd, Filename, CsumTable, Offset, Size) ->
%% Note that find/3 only returns overlapping chunks, both borders %% Note that find/3 only returns overlapping chunks, both borders
%% are not aligned to original Offset and Size. %% are not aligned to original Offset and Size.
ChunkCsums = machi_csum_table:find(CsumTable, Offset, Size), ChunkCsums = machi_csum_table:find(CsumTable, Offset, Size),
read_all_ranges(FHd, Filename, ChunkCsums, []). read_all_ranges(FHd, Filename, ChunkCsums, [], []).
read_all_ranges(_, _, [], ReadChunks) -> read_all_ranges(_, _, [], ReadChunks, TrimmedChunks) ->
%% TODO: currently returns empty list of trimmed chunks %% TODO: currently returns empty list of trimmed chunks
{ok, {lists:reverse(ReadChunks), []}}; {ok, {lists:reverse(ReadChunks), lists:reverse(TrimmedChunks)}};
read_all_ranges(FHd, Filename, [{Offset, Size, TaggedCsum}|T], ReadChunks) -> read_all_ranges(FHd, Filename, [{Offset, Size, trimmed}|T], ReadChunks, TrimmedChunks) ->
read_all_ranges(FHd, Filename, T, ReadChunks, [{Filename, Offset, Size}|TrimmedChunks]);
read_all_ranges(FHd, Filename, [{Offset, Size, TaggedCsum}|T], ReadChunks, TrimmedChunks) ->
case file:pread(FHd, Offset, Size) of case file:pread(FHd, Offset, Size) of
eof -> eof ->
read_all_ranges(FHd, Filename, T, ReadChunks); read_all_ranges(FHd, Filename, T, ReadChunks, TrimmedChunks);
{ok, Bytes} when byte_size(Bytes) == Size -> {ok, Bytes} when byte_size(Bytes) == Size ->
{Tag, Ck} = machi_util:unmake_tagged_csum(TaggedCsum), {Tag, Ck} = machi_util:unmake_tagged_csum(TaggedCsum),
case check_or_make_tagged_csum(Tag, Ck, Bytes) of case check_or_make_tagged_csum(Tag, Ck, Bytes) of
@ -565,12 +628,14 @@ read_all_ranges(FHd, Filename, [{Offset, Size, TaggedCsum}|T], ReadChunks) ->
{error, bad_checksum}; {error, bad_checksum};
TaggedCsum -> TaggedCsum ->
read_all_ranges(FHd, Filename, T, read_all_ranges(FHd, Filename, T,
[{Filename, Offset, Bytes, TaggedCsum}|ReadChunks]); [{Filename, Offset, Bytes, TaggedCsum}|ReadChunks],
TrimmedChunks);
OtherCsum when Tag =:= ?CSUM_TAG_NONE -> OtherCsum when Tag =:= ?CSUM_TAG_NONE ->
%% XXX FIXME: Should we return something other than %% XXX FIXME: Should we return something other than
%% {ok, ....} in this case? %% {ok, ....} in this case?
read_all_ranges(FHd, Filename, T, read_all_ranges(FHd, Filename, T,
[{Filename, Offset, Bytes, OtherCsum}|ReadChunks]) [{Filename, Offset, Bytes, OtherCsum}|ReadChunks],
TrimmedChunks)
end; end;
{ok, Partial} -> {ok, Partial} ->
lager:error("In file ~p, offset ~p, wanted to read ~p bytes, but got ~p", lager:error("In file ~p, offset ~p, wanted to read ~p bytes, but got ~p",
@ -616,7 +681,7 @@ handle_write(FHd, CsumTable, Filename, TaggedCsum, Offset, Data) ->
{error, Reason} {error, Reason}
end; end;
[{Offset, Size, TaggedCsum}] -> [{Offset, Size, TaggedCsum}] ->
case do_read(FHd, Filename, CsumTable, Offset, Size, false, false, false) of case do_read(FHd, Filename, CsumTable, Offset, Size, false, false) of
{error, _} = E -> {error, _} = E ->
lager:warning("This should never happen: got ~p while reading" lager:warning("This should never happen: got ~p while reading"
" at offset ~p in file ~p that's supposedly written", " at offset ~p in file ~p that's supposedly written",
@ -693,6 +758,8 @@ do_write(FHd, CsumTable, Filename, TaggedCsum, Offset, Size, Data) ->
%% Dialyzer 'can never match': slice_both_side([], _, _) -> %% Dialyzer 'can never match': slice_both_side([], _, _) ->
%% []; %% [];
slice_both_side([], _, _) ->
[];
slice_both_side([{F, Offset, Chunk, _Csum}|L], LeftPos, RightPos) slice_both_side([{F, Offset, Chunk, _Csum}|L], LeftPos, RightPos)
when Offset < LeftPos andalso LeftPos < RightPos -> when Offset < LeftPos andalso LeftPos < RightPos ->
TrashLen = 8 * (LeftPos - Offset), TrashLen = 8 * (LeftPos - Offset),
@ -729,3 +796,43 @@ maybe_regenerate_checksum(FHd, {Offset, Size, _Csum}) ->
Error -> Error ->
throw(Error) throw(Error)
end. end.
%% GC: make sure unwritte bytes = [{Eof, infinity}] and Eof is > max file size
%% walk through the checksum table and make sure all chunks trimmed
%% Then unlink the file
-spec maybe_gc(term(), #state{}) ->
{reply, term(), #state{}} | {stop, normal, term(), #state{}}.
maybe_gc(Reply, S = #state{eof_position = Eof,
max_file_size = MaxFileSize}) when Eof < MaxFileSize ->
lager:debug("The file is still small; not trying GC (Eof, MaxFileSize) = (~p, ~p)~n",
[Eof, MaxFileSize]),
{reply, Reply, S};
maybe_gc(Reply, S = #state{fluname=FluName,
data_filehandle = FHd,
data_dir = DataDir,
filename = Filename,
eof_position = Eof,
csum_table=CsumTable}) ->
case machi_csum_table:all_trimmed(CsumTable, Eof) of
true ->
lager:debug("GC? Let's do it: ~p.~n", [Filename]),
%% Before unlinking a file, it should inform
%% machi_flu_filename_mgr that this file is
%% deleted and mark it as "trimmed" to avoid
%% filename reuse and resurrection. Maybe garbage
%% will remain if a process crashed but it also
%% should be recovered at filename_mgr startup.
%% Also, this should be informed *before* file proxy
%% deletes files.
ok = machi_flu_metadata_mgr:trim_file(FluName, {file, Filename}),
ok = file:close(FHd),
{_, DPath} = machi_util:make_data_filename(DataDir, Filename),
ok = file:delete(DPath),
machi_csum_table:delete(CsumTable),
{stop, normal, Reply,
S#state{data_filehandle=undefined,
csum_table=undefined}};
false ->
{reply, Reply, S}
end.

View file

@ -44,7 +44,8 @@ start_link(FluName) ->
supervisor:start_link({local, make_proxy_name(FluName)}, ?MODULE, []). supervisor:start_link({local, make_proxy_name(FluName)}, ?MODULE, []).
start_proxy(FluName, DataDir, Filename) -> start_proxy(FluName, DataDir, Filename) ->
supervisor:start_child(make_proxy_name(FluName), [Filename, DataDir]). supervisor:start_child(make_proxy_name(FluName),
[FluName, Filename, DataDir]).
init([]) -> init([]) ->
SupFlags = {simple_one_for_one, 1000, 10}, SupFlags = {simple_one_for_one, 1000, 10},

View file

@ -75,6 +75,7 @@
epoch_id :: 'undefined' | machi_dt:epoch_id(), epoch_id :: 'undefined' | machi_dt:epoch_id(),
pb_mode = undefined :: 'undefined' | 'high' | 'low', pb_mode = undefined :: 'undefined' | 'high' | 'low',
high_clnt :: 'undefined' | pid(), high_clnt :: 'undefined' | pid(),
trim_table :: ets:tid(),
props = [] :: list() % proplist props = [] :: list() % proplist
}). }).
@ -148,6 +149,7 @@ main2(FluName, TcpPort, DataDir, Props) ->
{true, undefined} {true, undefined}
end, end,
Witness_p = proplists:get_value(witness_mode, Props, false), Witness_p = proplists:get_value(witness_mode, Props, false),
S0 = #state{flu_name=FluName, S0 = #state{flu_name=FluName,
proj_store=ProjectionPid, proj_store=ProjectionPid,
tcp_port=TcpPort, tcp_port=TcpPort,
@ -411,6 +413,9 @@ do_pb_ll_request3({low_write_chunk, _EpochID, File, Offset, Chunk, CSum_tag,
do_pb_ll_request3({low_read_chunk, _EpochID, File, Offset, Size, Opts}, do_pb_ll_request3({low_read_chunk, _EpochID, File, Offset, Size, Opts},
#state{witness=false} = S) -> #state{witness=false} = S) ->
{do_server_read_chunk(File, Offset, Size, Opts, S), S}; {do_server_read_chunk(File, Offset, Size, Opts, S), S};
do_pb_ll_request3({low_trim_chunk, _EpochID, File, Offset, Size, TriggerGC},
#state{witness=false}=S) ->
{do_server_trim_chunk(File, Offset, Size, TriggerGC, S), S};
do_pb_ll_request3({low_checksum_list, _EpochID, File}, do_pb_ll_request3({low_checksum_list, _EpochID, File},
#state{witness=false}=S) -> #state{witness=false}=S) ->
{do_server_checksum_listing(File, S), S}; {do_server_checksum_listing(File, S), S};
@ -541,22 +546,48 @@ do_server_append_chunk2(_PKey, Prefix, Chunk, CSum_tag, Client_CSum,
do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, #state{flu_name=FluName}) -> do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum, #state{flu_name=FluName}) ->
case sanitize_file_string(File) of case sanitize_file_string(File) of
ok -> ok ->
{ok, Pid} = machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}), case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
{ok, Pid} ->
Meta = [{client_csum_tag, CSum_tag}, {client_csum, CSum}], Meta = [{client_csum_tag, CSum_tag}, {client_csum, CSum}],
machi_file_proxy:write(Pid, Offset, Meta, Chunk); machi_file_proxy:write(Pid, Offset, Meta, Chunk);
{error, trimmed} = Error ->
Error
end;
_ -> _ ->
{error, bad_arg} {error, bad_arg}
end. end.
do_server_read_chunk(File, Offset, Size, Opts, #state{flu_name=FluName})-> do_server_read_chunk(File, Offset, Size, Opts, #state{flu_name=FluName})->
%% TODO: Look inside Opts someday.
case sanitize_file_string(File) of case sanitize_file_string(File) of
ok -> ok ->
{ok, Pid} = machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}), case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
{ok, Pid} ->
case machi_file_proxy:read(Pid, Offset, Size, Opts) of case machi_file_proxy:read(Pid, Offset, Size, Opts) of
%% XXX FIXME
%% For now we are omiting the checksum data because it blows up
%% protobufs.
{ok, ChunksAndTrimmed} -> {ok, ChunksAndTrimmed}; {ok, ChunksAndTrimmed} -> {ok, ChunksAndTrimmed};
Other -> Other Other -> Other
end; end;
{error, trimmed} = Error ->
Error
end;
_ ->
{error, bad_arg}
end.
do_server_trim_chunk(File, Offset, Size, TriggerGC, #state{flu_name=FluName}) ->
lager:debug("Hi there! I'm trimming this: ~s, (~p, ~p), ~p~n",
[File, Offset, Size, TriggerGC]),
case sanitize_file_string(File) of
ok ->
case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, File}) of
{ok, Pid} ->
machi_file_proxy:trim(Pid, Offset, Size, TriggerGC);
{error, trimmed} = Trimmed ->
%% Should be returned back to (maybe) trigger repair
Trimmed
end;
_ -> _ ->
{error, bad_arg} {error, bad_arg}
end. end.
@ -662,10 +693,14 @@ handle_append(Prefix, Chunk, Csum, Extra, FluName, EpochId) ->
Res = machi_flu_filename_mgr:find_or_make_filename_from_prefix(FluName, EpochId, {prefix, Prefix}), Res = machi_flu_filename_mgr:find_or_make_filename_from_prefix(FluName, EpochId, {prefix, Prefix}),
case Res of case Res of
{file, F} -> {file, F} ->
{ok, Pid} = machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}), case machi_flu_metadata_mgr:start_proxy_pid(FluName, {file, F}) of
{ok, Pid} ->
{Tag, CS} = machi_util:unmake_tagged_csum(Csum), {Tag, CS} = machi_util:unmake_tagged_csum(Csum),
Meta = [{client_csum_tag, Tag}, {client_csum, CS}], Meta = [{client_csum_tag, Tag}, {client_csum, CS}],
machi_file_proxy:append(Pid, Meta, Extra, Chunk); machi_file_proxy:append(Pid, Meta, Extra, Chunk);
{error, trimmed} = E ->
E
end;
Error -> Error ->
Error Error
end. end.

View file

@ -80,6 +80,7 @@
%% For "internal" replication only. %% For "internal" replication only.
-export([ -export([
write_chunk/5, write_chunk/6, write_chunk/5, write_chunk/6,
trim_chunk/5, trim_chunk/6,
delete_migration/3, delete_migration/4, delete_migration/3, delete_migration/4,
trunc_hack/3, trunc_hack/4 trunc_hack/3, trunc_hack/4
]). ]).
@ -474,6 +475,31 @@ write_chunk(Host, TcpPort, EpochID, File, Offset, Chunk)
disconnect(Sock) disconnect(Sock)
end. end.
%% @doc Restricted API: Write a chunk of already-sequenced data to
%% `File' at `Offset'.
-spec trim_chunk(port_wrap(), machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size()) ->
ok | {error, machi_dt:error_general()} | {error, term()}.
trim_chunk(Sock, EpochID, File0, Offset, Size)
when Offset >= ?MINIMUM_OFFSET ->
ReqID = <<"id">>,
File = machi_util:make_binary(File0),
true = (Offset >= ?MINIMUM_OFFSET),
Req = machi_pb_translate:to_pb_request(
ReqID,
{low_trim_chunk, EpochID, File, Offset, Size, 0}),
do_pb_request_common(Sock, ReqID, Req).
%% @doc Restricted API: Write a chunk of already-sequenced data to
%% `File' at `Offset'.
-spec trim_chunk(machi_dt:inet_host(), machi_dt:inet_port(),
machi_dt:epoch_id(), machi_dt:file_name(), machi_dt:file_offset(), machi_dt:chunk_size()) ->
ok | {error, machi_dt:error_general()} | {error, term()}.
trim_chunk(_Host, _TcpPort, _EpochID, _File, _Offset, _Size) ->
not_used.
%% @doc Restricted API: Delete a file after it has been successfully %% @doc Restricted API: Delete a file after it has been successfully
%% migrated. %% migrated.

View file

@ -125,7 +125,10 @@ list_files_by_prefix(_FluName, Other) ->
%% gen_server API %% gen_server API
init([FluName, DataDir]) -> init([FluName, DataDir]) ->
Tid = ets:new(make_filename_mgr_name(FluName), [named_table, {read_concurrency, true}]), Tid = ets:new(make_filename_mgr_name(FluName), [named_table, {read_concurrency, true}]),
{ok, #state{ fluname = FluName, epoch = 0, datadir = DataDir, tid = Tid }}. {ok, #state{fluname = FluName,
epoch = 0,
datadir = DataDir,
tid = Tid}}.
handle_cast(Req, State) -> handle_cast(Req, State) ->
lager:warning("Got unknown cast ~p", [Req]), lager:warning("Got unknown cast ~p", [Req]),
@ -245,4 +248,3 @@ increment_and_cache_filename(Tid, DataDir, Prefix) ->
-ifdef(TEST). -ifdef(TEST).
-endif. -endif.

View file

@ -39,10 +39,13 @@
-define(HASH(X), erlang:phash2(X)). %% hash algorithm to use -define(HASH(X), erlang:phash2(X)). %% hash algorithm to use
-define(TIMEOUT, 10 * 1000). %% 10 second timeout -define(TIMEOUT, 10 * 1000). %% 10 second timeout
-define(KNOWN_FILES_LIST_PREFIX, "known_files_").
-record(state, {fluname :: atom(), -record(state, {fluname :: atom(),
datadir :: string(), datadir :: string(),
tid :: ets:tid(), tid :: ets:tid(),
cnt :: non_neg_integer() cnt :: non_neg_integer(),
trimmed_files :: machi_plist:plist()
}). }).
%% This record goes in the ets table where filename is the key %% This record goes in the ets table where filename is the key
@ -59,7 +62,8 @@
lookup_proxy_pid/2, lookup_proxy_pid/2,
start_proxy_pid/2, start_proxy_pid/2,
stop_proxy_pid/2, stop_proxy_pid/2,
build_metadata_mgr_name/2 build_metadata_mgr_name/2,
trim_file/2
]). ]).
%% gen_server callbacks %% gen_server callbacks
@ -97,10 +101,24 @@ start_proxy_pid(FluName, {file, Filename}) ->
stop_proxy_pid(FluName, {file, Filename}) -> stop_proxy_pid(FluName, {file, Filename}) ->
gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, Filename}, ?TIMEOUT). gen_server:call(get_manager_atom(FluName, Filename), {stop_proxy_pid, Filename}, ?TIMEOUT).
trim_file(FluName, {file, Filename}) ->
gen_server:call(get_manager_atom(FluName, Filename), {trim_file, Filename}, ?TIMEOUT).
%% gen_server callbacks %% gen_server callbacks
init([FluName, Name, DataDir, Num]) -> init([FluName, Name, DataDir, Num]) ->
%% important: we'll need another persistent storage to
%% remember deleted (trimmed) file, to prevent resurrection after
%% flu restart and append.
FileListFileName =
filename:join([DataDir, ?KNOWN_FILES_LIST_PREFIX ++ atom_to_list(FluName)]),
{ok, PList} = machi_plist:open(FileListFileName, []),
%% TODO make sure all files non-existent, if any remaining files
%% here, just delete it. They're in the list *because* they're all
%% trimmed.
Tid = ets:new(Name, [{keypos, 2}, {read_concurrency, true}, {write_concurrency, true}]), Tid = ets:new(Name, [{keypos, 2}, {read_concurrency, true}, {write_concurrency, true}]),
{ok, #state{ fluname = FluName, datadir = DataDir, tid = Tid, cnt = Num}}. {ok, #state{fluname = FluName, datadir = DataDir, tid = Tid, cnt = Num,
trimmed_files=PList}}.
handle_cast(Req, State) -> handle_cast(Req, State) ->
lager:warning("Got unknown cast ~p", [Req]), lager:warning("Got unknown cast ~p", [Req]),
@ -113,7 +131,11 @@ handle_call({proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
end, end,
{reply, Reply, State}; {reply, Reply, State};
handle_call({start_proxy_pid, Filename}, _From, State = #state{ fluname = N, tid = Tid, datadir = D }) -> handle_call({start_proxy_pid, Filename}, _From,
State = #state{ fluname = N, tid = Tid, datadir = D,
trimmed_files=TrimmedFiles}) ->
case machi_plist:find(TrimmedFiles, Filename) of
false ->
NewR = case lookup_md(Tid, Filename) of NewR = case lookup_md(Tid, Filename) of
not_found -> not_found ->
start_file_proxy(N, D, Filename); start_file_proxy(N, D, Filename);
@ -124,6 +146,10 @@ handle_call({start_proxy_pid, Filename}, _From, State = #state{ fluname = N, tid
end, end,
update_ets(Tid, NewR), update_ets(Tid, NewR),
{reply, {ok, NewR#md.proxy_pid}, State}; {reply, {ok, NewR#md.proxy_pid}, State};
true ->
{reply, {error, trimmed}, State}
end;
handle_call({stop_proxy_pid, Filename}, _From, State = #state{ tid = Tid }) -> handle_call({stop_proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
case lookup_md(Tid, Filename) of case lookup_md(Tid, Filename) of
not_found -> not_found ->
@ -137,6 +163,15 @@ handle_call({stop_proxy_pid, Filename}, _From, State = #state{ tid = Tid }) ->
end, end,
{reply, ok, State}; {reply, ok, State};
handle_call({trim_file, Filename}, _,
S = #state{trimmed_files = TrimmedFiles }) ->
case machi_plist:add(TrimmedFiles, Filename) of
{ok, TrimmedFiles2} ->
{reply, ok, S#state{trimmed_files=TrimmedFiles2}};
Error ->
{reply, Error, S}
end;
handle_call(Req, From, State) -> handle_call(Req, From, State) ->
lager:warning("Got unknown call ~p from ~p", [Req, From]), lager:warning("Got unknown call ~p from ~p", [Req, From]),
{reply, hoge, State}. {reply, hoge, State}.
@ -169,18 +204,21 @@ handle_info({'DOWN', Mref, process, Pid, wedged}, State = #state{ tid = Tid }) -
lager:error("file proxy ~p shutdown because it's wedged", [Pid]), lager:error("file proxy ~p shutdown because it's wedged", [Pid]),
clear_ets(Tid, Mref), clear_ets(Tid, Mref),
{noreply, State}; {noreply, State};
handle_info({'DOWN', _Mref, process, Pid, trimmed}, State = #state{ tid = _Tid }) ->
lager:debug("file proxy ~p shutdown because the file was trimmed", [Pid]),
{noreply, State};
handle_info({'DOWN', Mref, process, Pid, Error}, State = #state{ tid = Tid }) -> handle_info({'DOWN', Mref, process, Pid, Error}, State = #state{ tid = Tid }) ->
lager:error("file proxy ~p shutdown because ~p", [Pid, Error]), lager:error("file proxy ~p shutdown because ~p", [Pid, Error]),
clear_ets(Tid, Mref), clear_ets(Tid, Mref),
{noreply, State}; {noreply, State};
handle_info(Info, State) -> handle_info(Info, State) ->
lager:warning("Got unknown info ~p", [Info]), lager:warning("Got unknown info ~p", [Info]),
{noreply, State}. {noreply, State}.
terminate(Reason, _State) -> terminate(Reason, _State = #state{trimmed_files=TrimmedFiles}) ->
lager:info("Shutting down because ~p", [Reason]), lager:info("Shutting down because ~p", [Reason]),
machi_plist:close(TrimmedFiles),
ok. ok.
code_change(_OldVsn, State, _Extra) -> code_change(_OldVsn, State, _Extra) ->

View file

@ -94,6 +94,9 @@ write_chunk(PidSpec, File, Offset, Chunk, CSum) ->
write_chunk(PidSpec, File, Offset, Chunk, CSum, Timeout) -> write_chunk(PidSpec, File, Offset, Chunk, CSum, Timeout) ->
send_sync(PidSpec, {write_chunk, File, Offset, Chunk, CSum}, Timeout). send_sync(PidSpec, {write_chunk, File, Offset, Chunk, CSum}, Timeout).
%% @doc Tries to read a chunk of a specified file. It returns `{ok,
%% {Chunks, TrimmedChunks}}' for live file while it returns `{error,
%% trimmed}' if all bytes of the file was trimmed.
-spec read_chunk(pid(), string(), pos_integer(), pos_integer(), -spec read_chunk(pid(), string(), pos_integer(), pos_integer(),
[{flag_no_checksum | flag_no_chunk | needs_trimmed, boolean()}]) -> [{flag_no_checksum | flag_no_chunk | needs_trimmed, boolean()}]) ->
{ok, {list(), list()}} | {error, term()}. {ok, {list(), list()}} | {error, term()}.
@ -107,6 +110,10 @@ read_chunk(PidSpec, File, Offset, Size, Options) ->
read_chunk(PidSpec, File, Offset, Size, Options, Timeout) -> read_chunk(PidSpec, File, Offset, Size, Options, Timeout) ->
send_sync(PidSpec, {read_chunk, File, Offset, Size, Options}, Timeout). send_sync(PidSpec, {read_chunk, File, Offset, Size, Options}, Timeout).
%% @doc Trims arbitrary binary range of any file. TODO: Add option
%% specifying whether to trigger GC.
-spec trim_chunk(pid(), string(), non_neg_integer(), machi_dt:chunk_size()) ->
ok | {error, term()}.
trim_chunk(PidSpec, File, Offset, Size) -> trim_chunk(PidSpec, File, Offset, Size) ->
trim_chunk(PidSpec, File, Offset, Size, ?DEFAULT_TIMEOUT). trim_chunk(PidSpec, File, Offset, Size, ?DEFAULT_TIMEOUT).
@ -415,6 +422,8 @@ convert_general_status_code('NOT_WRITTEN') ->
{error, not_written}; {error, not_written};
convert_general_status_code('WRITTEN') -> convert_general_status_code('WRITTEN') ->
{error, written}; {error, written};
convert_general_status_code('TRIMMED') ->
{error, trimmed};
convert_general_status_code('NO_SUCH_FILE') -> convert_general_status_code('NO_SUCH_FILE') ->
{error, no_such_file}; {error, no_such_file};
convert_general_status_code('PARTIAL_READ') -> convert_general_status_code('PARTIAL_READ') ->

View file

@ -88,6 +88,17 @@ from_pb_request(#mpb_ll_request{
offset=Offset, offset=Offset,
chunk_size=Size} = ChunkPos, chunk_size=Size} = ChunkPos,
{ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}}; {ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}};
from_pb_request(#mpb_ll_request{
req_id=ReqID,
trim_chunk=#mpb_ll_trimchunkreq{
epoch_id=PB_EpochID,
file=File,
offset=Offset,
size=Size,
trigger_gc=PB_TriggerGC}}) ->
EpochID = conv_to_epoch_id(PB_EpochID),
TriggerGC = conv_to_boolean(PB_TriggerGC),
{ReqID, {low_trim_chunk, EpochID, File, Offset, Size, TriggerGC}};
from_pb_request(#mpb_ll_request{ from_pb_request(#mpb_ll_request{
req_id=ReqID, req_id=ReqID,
checksum_list=#mpb_ll_checksumlistreq{ checksum_list=#mpb_ll_checksumlistreq{
@ -262,6 +273,10 @@ from_pb_response(#mpb_ll_response{
_ -> _ ->
{ReqID, machi_pb_high_client:convert_general_status_code(Status)} {ReqID, machi_pb_high_client:convert_general_status_code(Status)}
end; end;
from_pb_response(#mpb_ll_response{
req_id=ReqID,
trim_chunk=#mpb_ll_trimchunkresp{status=Status}}) ->
{ReqID, machi_pb_high_client:convert_general_status_code(Status)};
from_pb_response(#mpb_ll_response{ from_pb_response(#mpb_ll_response{
req_id=ReqID, req_id=ReqID,
checksum_list=#mpb_ll_checksumlistresp{ checksum_list=#mpb_ll_checksumlistresp{
@ -398,11 +413,10 @@ to_pb_request(ReqID, {low_write_chunk, EpochID, File, Offset, Chunk, CSum_tag, C
chunk=Chunk, chunk=Chunk,
csum=PB_CSum}}}; csum=PB_CSum}}};
to_pb_request(ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}) -> to_pb_request(ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}) ->
%% TODO: stop ignoring Opts ^_^
PB_EpochID = conv_from_epoch_id(EpochID), PB_EpochID = conv_from_epoch_id(EpochID),
FNChecksum = proplists:get_value(no_checksum, Opts, false), FNChecksum = proplists:get_value(no_checksum, Opts, false),
FNChunk = proplists:get_value(no_chunk, Opts, false), FNChunk = proplists:get_value(no_chunk, Opts, false),
NeedsTrimmed = proplists:get_value(needs_merge, Opts, false), NeedsTrimmed = proplists:get_value(needs_trimmed, Opts, false),
#mpb_ll_request{ #mpb_ll_request{
req_id=ReqID, do_not_alter=2, req_id=ReqID, do_not_alter=2,
read_chunk=#mpb_ll_readchunkreq{ read_chunk=#mpb_ll_readchunkreq{
@ -414,6 +428,15 @@ to_pb_request(ReqID, {low_read_chunk, EpochID, File, Offset, Size, Opts}) ->
flag_no_checksum=machi_util:bool2int(FNChecksum), flag_no_checksum=machi_util:bool2int(FNChecksum),
flag_no_chunk=machi_util:bool2int(FNChunk), flag_no_chunk=machi_util:bool2int(FNChunk),
flag_needs_trimmed=machi_util:bool2int(NeedsTrimmed)}}; flag_needs_trimmed=machi_util:bool2int(NeedsTrimmed)}};
to_pb_request(ReqID, {low_trim_chunk, EpochID, File, Offset, Size, TriggerGC}) ->
PB_EpochID = conv_from_epoch_id(EpochID),
#mpb_ll_request{req_id=ReqID, do_not_alter=2,
trim_chunk=#mpb_ll_trimchunkreq{
epoch_id=PB_EpochID,
file=File,
offset=Offset,
size=Size,
trigger_gc=TriggerGC}};
to_pb_request(ReqID, {low_checksum_list, EpochID, File}) -> to_pb_request(ReqID, {low_checksum_list, EpochID, File}) ->
PB_EpochID = conv_from_epoch_id(EpochID), PB_EpochID = conv_from_epoch_id(EpochID),
#mpb_ll_request{req_id=ReqID, do_not_alter=2, #mpb_ll_request{req_id=ReqID, do_not_alter=2,
@ -524,6 +547,18 @@ to_pb_response(ReqID, {low_read_chunk, _EID, _Fl, _Off, _Sz, _Opts}, Resp)->
_Else -> _Else ->
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else])) make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
end; end;
to_pb_response(ReqID, {low_trim_chunk, _, _, _, _, _}, Resp) ->
case Resp of
ok ->
#mpb_ll_response{req_id=ReqID,
trim_chunk=#mpb_ll_trimchunkresp{status='OK'}};
{error, _}=Error ->
Status = conv_from_status(Error),
#mpb_ll_response{req_id=ReqID,
read_chunk=#mpb_ll_trimchunkresp{status=Status}};
_Else ->
make_ll_error_resp(ReqID, 66, io_lib:format("err ~p", [_Else]))
end;
to_pb_response(ReqID, {low_checksum_list, _EpochID, _File}, Resp) -> to_pb_response(ReqID, {low_checksum_list, _EpochID, _File}, Resp) ->
case Resp of case Resp of
{ok, Chunk} -> {ok, Chunk} ->
@ -909,6 +944,8 @@ conv_from_status({error, not_written}) ->
'NOT_WRITTEN'; 'NOT_WRITTEN';
conv_from_status({error, written}) -> conv_from_status({error, written}) ->
'WRITTEN'; 'WRITTEN';
conv_from_status({error, trimmed}) ->
'TRIMMED';
conv_from_status({error, no_such_file}) -> conv_from_status({error, no_such_file}) ->
'NO_SUCH_FILE'; 'NO_SUCH_FILE';
conv_from_status({error, partial_read}) -> conv_from_status({error, partial_read}) ->

63
src/machi_plist.erl Normal file
View file

@ -0,0 +1,63 @@
-module(machi_plist).
%%% @doc persistent list of binaries that support mutual exclusion
-export([open/2, close/1, find/2, add/2]).
-ifdef(TEST).
-export([all/1]).
-endif.
-record(machi_plist,
{filename :: string(),
fd :: file:descriptor(),
list}).
-type plist() :: #machi_plist{}.
-export_type([plist/0]).
-spec open(filename:filename(), proplists:proplist()) ->
{ok, plist()} | {error, file:posix()}.
open(Filename, _Opt) ->
List = case file:read_file(Filename) of
{ok, <<>>} -> [];
{ok, Bin} -> binary_to_term(Bin);
{error, enoent} -> []
end,
case file:open(Filename, [read, write, raw, binary, sync]) of
{ok, Fd} ->
{ok, #machi_plist{filename=Filename,
fd=Fd,
list=List}};
Error ->
Error
end.
-spec close(plist()) -> ok.
close(#machi_plist{fd=Fd}) ->
_ = file:close(Fd).
-spec find(plist(), string()) -> boolean().
find(#machi_plist{list=List}, Name) ->
lists:member(Name, List).
-spec add(plist(), string()) -> {ok, plist()} | {error, file:posix()}.
add(Plist = #machi_plist{list=List0, fd=Fd}, Name) ->
case find(Plist, Name) of
true ->
{ok, Plist};
false ->
List = lists:append(List0, [Name]),
case file:pwrite(Fd, 0, term_to_binary(List)) of
ok ->
{ok, Plist#machi_plist{list=List}};
Error ->
Error
end
end.
-ifdef(TEST).
-spec all(plist()) -> [file:filename()].
all(#machi_plist{list=List}) ->
List.
-endif.

View file

@ -79,6 +79,7 @@
%% Internal API %% Internal API
write_chunk/5, write_chunk/6, write_chunk/5, write_chunk/6,
trim_chunk/5, trim_chunk/6,
%% Helpers %% Helpers
stop_proxies/1, start_proxies/1 stop_proxies/1, start_proxies/1
@ -310,6 +311,18 @@ write_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) ->
Else Else
end. end.
trim_chunk(PidSpec, EpochID, File, Offset, Size) ->
trim_chunk(PidSpec, EpochID, File, Offset, Size, infinity).
%% @doc Write a chunk (binary- or iolist-style) of data to a file
%% with `Prefix' at `Offset'.
trim_chunk(PidSpec, EpochID, File, Offset, Chunk, Timeout) ->
gen_server:call(PidSpec,
{req, {trim_chunk, EpochID, File, Offset, Chunk}},
Timeout).
%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%
init([I]) -> init([I]) ->
@ -383,6 +396,9 @@ make_req_fun({read_chunk, EpochID, File, Offset, Size, Opts},
make_req_fun({write_chunk, EpochID, File, Offset, Chunk}, make_req_fun({write_chunk, EpochID, File, Offset, Chunk},
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) -> #state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
fun() -> Mod:write_chunk(Sock, EpochID, File, Offset, Chunk) end; fun() -> Mod:write_chunk(Sock, EpochID, File, Offset, Chunk) end;
make_req_fun({trim_chunk, EpochID, File, Offset, Size},
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
fun() -> Mod:trim_chunk(Sock, EpochID, File, Offset, Size) end;
make_req_fun({checksum_list, EpochID, File}, make_req_fun({checksum_list, EpochID, File},
#state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) -> #state{sock=Sock,i=#p_srvr{proto_mod=Mod}}) ->
fun() -> Mod:checksum_list(Sock, EpochID, File) end; fun() -> Mod:checksum_list(Sock, EpochID, File) end;

View file

@ -76,13 +76,14 @@ smoke3_test() ->
{?LINE, trim, {0, 1024, <<>>}, undefined, undefined} {?LINE, trim, {0, 1024, <<>>}, undefined, undefined}
], ],
[ begin [ begin
%% ?debugVal({Line, Chunk}), %% ?debugVal({_Line, Chunk}),
{Offset, Size, Csum} = Chunk, {Offset, Size, Csum} = Chunk,
?assertEqual(LeftN0, ?assertEqual(LeftN0,
machi_csum_table:find_leftneighbor(MC, Offset)), machi_csum_table:find_leftneighbor(MC, Offset)),
?assertEqual(RightN0, ?assertEqual(RightN0,
machi_csum_table:find_rightneighbor(MC, Offset+Size)), machi_csum_table:find_rightneighbor(MC, Offset+Size)),
LeftN = case LeftN0 of LeftN = case LeftN0 of
{OffsL, SizeL, trimmed} -> {OffsL, SizeL, trimmed};
{OffsL, SizeL, _} -> {OffsL, SizeL, <<"boom">>}; {OffsL, SizeL, _} -> {OffsL, SizeL, <<"boom">>};
OtherL -> OtherL OtherL -> OtherL
end, end,

View file

@ -170,7 +170,7 @@ start_command(S) ->
start(_S) -> start(_S) ->
{_, _, MS} = os:timestamp(), {_, _, MS} = os:timestamp(),
File = test_server:temp_name("eqc_data") ++ "." ++ integer_to_list(MS), File = test_server:temp_name("eqc_data") ++ "." ++ integer_to_list(MS),
{ok, Pid} = machi_file_proxy:start_link(File, ?TESTDIR), {ok, Pid} = machi_file_proxy:start_link(some_flu, File, ?TESTDIR),
unlink(Pid), unlink(Pid),
Pid. Pid.

View file

@ -78,7 +78,7 @@ random_binary(Start, End) ->
machi_file_proxy_test_() -> machi_file_proxy_test_() ->
clean_up_data_dir(?TESTDIR), clean_up_data_dir(?TESTDIR),
{ok, Pid} = machi_file_proxy:start_link("test", ?TESTDIR), {ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR),
[ [
?_assertEqual({error, bad_arg}, machi_file_proxy:read(Pid, -1, -1)), ?_assertEqual({error, bad_arg}, machi_file_proxy:read(Pid, -1, -1)),
?_assertEqual({error, bad_arg}, machi_file_proxy:write(Pid, -1, <<"yo">>)), ?_assertEqual({error, bad_arg}, machi_file_proxy:write(Pid, -1, <<"yo">>)),
@ -100,7 +100,7 @@ machi_file_proxy_test_() ->
multiple_chunks_read_test_() -> multiple_chunks_read_test_() ->
clean_up_data_dir(?TESTDIR), clean_up_data_dir(?TESTDIR),
{ok, Pid} = machi_file_proxy:start_link("test", ?TESTDIR), {ok, Pid} = machi_file_proxy:start_link(fluname, "test", ?TESTDIR),
[ [
?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))), ?_assertMatch({ok, "test", _}, machi_file_proxy:append(Pid, random_binary(0, 1024))),
?_assertEqual(ok, machi_file_proxy:write(Pid, 10000, <<"fail">>)), ?_assertEqual(ok, machi_file_proxy:write(Pid, 10000, <<"fail">>)),

View file

@ -38,6 +38,7 @@ smoke_test2() ->
Ps = [#p_srvr{name=a, address="localhost", port=Port, props="./data.a"} Ps = [#p_srvr{name=a, address="localhost", port=Port, props="./data.a"}
], ],
D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]), D = orddict:from_list([{P#p_srvr.name, P} || P <- Ps]),
ok = application:set_env(machi, max_file_size, 1024*1024),
[os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps], [os:cmd("rm -rf " ++ P#p_srvr.props) || P <- Ps],
{ok, SupPid} = machi_flu_sup:start_link(), {ok, SupPid} = machi_flu_sup:start_link(),
@ -90,16 +91,47 @@ smoke_test2() ->
{ok, [{File1Size,File1}]} = ?C:list_files(Clnt), {ok, [{File1Size,File1}]} = ?C:list_files(Clnt),
true = is_integer(File1Size), true = is_integer(File1Size),
File1Bin = binary_to_list(File1),
[begin [begin
%% ok = ?C:trim_chunk(Clnt, Fl, Off, Sz) #p_srvr{name=Name, port=Port, props=Dir} = P,
%% This gets an error as trim API is still a stub ?assertEqual({ok, [File1Bin]},
?assertMatch({bummer, file:list_dir(filename:join([Dir, "data"]))),
{throw, FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]),
{error, bad_joss_taipan_fixme}, {ok, Plist} = machi_plist:open(FileListFileName, []),
_Boring_stack_trace}}, ?assertEqual([], machi_plist:all(Plist))
?C:trim_chunk(Clnt, Fl, Off, Sz)) end || P <- Ps],
end || {Ch, Fl, Off, Sz} <- Reads],
[begin
ok = ?C:trim_chunk(Clnt, Fl, Off, Sz)
end || {_Ch, Fl, Off, Sz} <- Reads],
[begin
{ok, {[], Trimmed}} =
?C:read_chunk(Clnt, Fl, Off, Sz, [{needs_trimmed, true}]),
Filename = binary_to_list(Fl),
?assertEqual([{Filename, Off, Sz}], Trimmed)
end || {_Ch, Fl, Off, Sz} <- Reads],
LargeBytes = binary:copy(<<"x">>, 1024*1024),
LBCsum = {client_sha, machi_util:checksum_chunk(LargeBytes)},
{ok, {Offx, Sizex, Filex}} =
?C:append_chunk(Clnt, PK, Prefix, LargeBytes, LBCsum, 0),
ok = ?C:trim_chunk(Clnt, Filex, Offx, Sizex),
%% Make sure everything was trimmed
File = binary_to_list(Filex),
[begin
#p_srvr{name=Name, port=_Port, props=Dir} = P,
?assertEqual({ok, []},
file:list_dir(filename:join([Dir, "data"]))),
FileListFileName = filename:join([Dir, "known_files_" ++ atom_to_list(Name)]),
{ok, Plist} = machi_plist:open(FileListFileName, []),
?assertEqual([File], machi_plist:all(Plist))
end || P <- Ps],
[begin
{error, trimmed} =
?C:read_chunk(Clnt, Fl, Off, Sz, [])
end || {_Ch, Fl, Off, Sz} <- Reads],
ok ok
after after
(catch ?C:quit(Clnt)) (catch ?C:quit(Clnt))

17
test/machi_plist_test.erl Normal file
View file

@ -0,0 +1,17 @@
-module(machi_plist_test).
-include_lib("eunit/include/eunit.hrl").
open_close_test() ->
FileName = "bark-bark-one",
file:delete(FileName),
{ok, PList0} = machi_plist:open(FileName, []),
{ok, PList1} = machi_plist:add(PList0, "boomar"),
?assertEqual(["boomar"], machi_plist:all(PList1)),
ok = machi_plist:close(PList1),
{ok, PList2} = machi_plist:open(FileName, []),
?assertEqual(["boomar"], machi_plist:all(PList2)),
ok = machi_plist:close(PList2),
file:delete(FileName),
ok.