Minor refactoring + add demo/exploratory time measurement code

%% Demo/exploratory hackery to check relative speeds of dealing with
%% checksum data in different ways.
%%
%% Summary:
%%
%% * Use compact binary encoding, with 1 byte header for entry length.
%%     * Because the hex-style code is *far* slower just for enc & dec ops.
%%     * For 1M entries of enc+dec: 0.215 sec vs. 15.5 sec.
%% * File sorter when sorting binaries as-is is only 30-40% slower
%%   than an in-memory split (of huge binary emulated by file:read_file()
%%   "big slurp") and sort of the same as-is sortable binaries.
%% * File sorter slows by a factor of about 2.5 if {order, fun compare/2}
%%   function must be used, i.e. because the checksum entry lengths differ.
%% * File sorter + {order, fun compare/2} is still *far* faster than external
%%   sort by OS X's sort(1) of sortable ASCII hex-style:
%%   4.5 sec vs. 21 sec.
%% * File sorter {order, fun compare/2} is faster than in-memory sort
%%   of order-friendly 3-tuple-style: 4.5 sec vs. 15 sec.
This commit is contained in:
Scott Lystig Fritchie 2015-06-30 14:08:46 +09:00
parent 2a4ae1ba52
commit 2d070bf1e3

View file

@ -78,6 +78,10 @@
-include("machi_pb.hrl"). -include("machi_pb.hrl").
-include("machi_projection.hrl"). -include("machi_projection.hrl").
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-endif. % TEST
-define(SERVER_CMD_READ_TIMEOUT, 600*1000). -define(SERVER_CMD_READ_TIMEOUT, 600*1000).
-export([start_link/1, stop/1, -export([start_link/1, stop/1,
@ -263,9 +267,6 @@ append_server_loop(FluPid, #state{data_dir=DataDir,wedged=Wedged_p}=S) ->
append_server_loop(FluPid, S) append_server_loop(FluPid, S)
end. end.
-define(EpochIDSpace, ((4*2)+(20*2))). % hexencodingwhee!
-define(CSumSpace, ((1*2)+(20*2))). % hexencodingwhee!
net_server_loop(Sock, S) -> net_server_loop(Sock, S) ->
case gen_tcp:recv(Sock, 0, ?SERVER_CMD_READ_TIMEOUT) of case gen_tcp:recv(Sock, 0, ?SERVER_CMD_READ_TIMEOUT) of
{ok, Bin} -> {ok, Bin} ->
@ -444,29 +445,8 @@ do_server_append_chunk2(_PKey, Prefix, Chunk, CSum_tag, Client_CSum,
ChunkExtra, #state{flu_name=FluName}=_S) -> ChunkExtra, #state{flu_name=FluName}=_S) ->
%% TODO: Do anything with PKey? %% TODO: Do anything with PKey?
try try
CSum = case CSum_tag of TaggedCSum = check_or_make_tagged_checksum(CSum_tag, Client_CSum,Chunk),
?CSUM_TAG_NONE -> FluName ! {seq_append, self(), Prefix, Chunk, TaggedCSum, ChunkExtra},
%% TODO: If the client was foolish enough to use
%% this type of non-checksum, then the client gets
%% what it deserves wrt data integrity, alas. In
%% the client-side Chain Replication method, each
%% server will calculated this independently, which
%% isn't exactly what ought to happen for best data
%% integrity checking. In server-side CR, the csum
%% should be calculated by the head and passed down
%% the chain together with the value.
CS = machi_util:checksum_chunk(Chunk),
machi_util:make_tagged_csum(server_sha, CS);
?CSUM_TAG_CLIENT_SHA ->
CS = machi_util:checksum_chunk(Chunk),
if CS == Client_CSum ->
machi_util:make_tagged_csum(server_sha,
Client_CSum);
true ->
throw({bad_csum, CS})
end
end,
FluName ! {seq_append, self(), Prefix, Chunk, CSum, ChunkExtra},
receive receive
{assignment, Offset, File} -> {assignment, Offset, File} ->
Size = iolist_size(Chunk), Size = iolist_size(Chunk),
@ -514,36 +494,11 @@ do_server_write_chunk(File, Offset, Chunk, CSum_tag, CSum,
do_server_write_chunk2(_File, Offset, Chunk, CSum_tag, do_server_write_chunk2(_File, Offset, Chunk, CSum_tag,
Client_CSum, _DataDir, FHc, FHd) -> Client_CSum, _DataDir, FHc, FHd) ->
try try
CSum = case CSum_tag of TaggedCSum = check_or_make_tagged_checksum(CSum_tag, Client_CSum,Chunk),
?CSUM_TAG_NONE ->
%% TODO: If the client was foolish enough to use
%% this type of non-checksum, then the client gets
%% what it deserves wrt data integrity, alas. In
%% the client-side Chain Replication method, each
%% server will calculated this independently, which
%% isn't exactly what ought to happen for best data
%% integrity checking. In server-side CR, the csum
%% should be calculated by the head and passed down
%% the chain together with the value.
CS = machi_util:checksum_chunk(Chunk),
machi_util:make_tagged_csum(server_sha,CS);
?CSUM_TAG_CLIENT_SHA ->
CS = machi_util:checksum_chunk(Chunk),
if CS == Client_CSum ->
machi_util:make_tagged_csum(server_sha,
Client_CSum);
true ->
throw({bad_csum, CS})
end
end,
Size = iolist_size(Chunk), Size = iolist_size(Chunk),
case file:pwrite(FHd, Offset, Chunk) of case file:pwrite(FHd, Offset, Chunk) of
ok -> ok ->
OffsetHex = machi_util:bin_to_hexstr(<<Offset:64/big>>), CSum_info = encode_csum_file_entry_nothex(Offset, Size, TaggedCSum),
LenHex = machi_util:bin_to_hexstr(<<Size:32/big>>),
CSumHex2 = machi_util:bin_to_hexstr(CSum),
CSum_info = [OffsetHex, 32, LenHex, 32,
CSumHex2, 10],
ok = file:write(FHc, CSum_info), ok = file:write(FHc, CSum_info),
ok; ok;
_Else3 -> _Else3 ->
@ -780,21 +735,18 @@ seq_append_server_loop(DataDir, Prefix, _File, {FHd,FHc}, FileNum, Offset)
run_seq_append_server2(Prefix, DataDir); run_seq_append_server2(Prefix, DataDir);
seq_append_server_loop(DataDir, Prefix, File, {FHd,FHc}=FH_, FileNum, Offset) -> seq_append_server_loop(DataDir, Prefix, File, {FHd,FHc}=FH_, FileNum, Offset) ->
receive receive
{seq_append, From, Prefix, Chunk, CSum, Extra} -> {seq_append, From, Prefix, Chunk, TaggedCSum, Extra} ->
if Chunk /= <<>> -> if Chunk /= <<>> ->
ok = file:pwrite(FHd, Offset, Chunk); ok = file:pwrite(FHd, Offset, Chunk);
true -> true ->
ok ok
end, end,
From ! {assignment, Offset, File}, From ! {assignment, Offset, File},
Len = byte_size(Chunk), Size = iolist_size(Chunk),
OffsetHex = machi_util:bin_to_hexstr(<<Offset:64/big>>), CSum_info = encode_csum_file_entry_nothex(Offset, Size, TaggedCSum),
LenHex = machi_util:bin_to_hexstr(<<Len:32/big>>),
CSumHex = machi_util:bin_to_hexstr(CSum),
CSum_info = [OffsetHex, 32, LenHex, 32, CSumHex, 10],
ok = file:write(FHc, CSum_info), ok = file:write(FHc, CSum_info),
seq_append_server_loop(DataDir, Prefix, File, FH_, seq_append_server_loop(DataDir, Prefix, File, FH_,
FileNum, Offset + Len + Extra); FileNum, Offset + Size + Extra);
{sync_stuff, FromPid, Ref} -> {sync_stuff, FromPid, Ref} ->
file:sync(FHc), file:sync(FHc),
FromPid ! {sync_finished, Ref}, FromPid ! {sync_finished, Ref},
@ -913,3 +865,235 @@ split_uri_options(OpsBin) ->
[<<"size">>, Bin] -> [<<"size">>, Bin] ->
{size, binary_to_integer(Bin)} {size, binary_to_integer(Bin)}
end || X <- L]. end || X <- L].
encode_csum_file_entry_nothex(Offset, Size, TaggedCSum) ->
Len = 8 + 4 + byte_size(TaggedCSum),
[<<Len:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big>>,
TaggedCSum].
decode_csum_file_entry_nothex(<<_:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big, TaggedCSum/binary>>) ->
{Offset, Size, TaggedCSum}.
split_1byte_len_tag_decode(Bin) ->
split_1byte_len_tag_decode(Bin, []).
split_1byte_len_tag_decode(<<Len:8/unsigned-big, Part:Len/binary, Rest/binary>>, Acc)->
split_1byte_len_tag_decode(Rest, [decode_csum_file_entry_nothex(Part)|Acc]);
split_1byte_len_tag_decode(Other, Acc) ->
{lists:reverse(Acc), Other}.
split_1byte_len_tag(Bin) ->
split_1byte_len_tag(Bin, []).
split_1byte_len_tag(<<Len:8/unsigned-big, Part:Len/binary, Rest/binary>>, Acc)->
case get(hack_length) of
Len -> ok;
_ -> put(hack_different, true)
end,
split_1byte_len_tag(Rest, [<<Len:8/unsigned-big, Part/binary>>|Acc]);
split_1byte_len_tag(Other, Acc) ->
{lists:reverse(Acc), Other}.
%% split_1byte_len_tag(<<Len:8/unsigned-big, Part:Len/binary, Rest/binary>>, Acc)->
%% case get(hack_length) of
%% Len -> ok;
%% _ -> put(hack_different, true)
%% end,
%% split_1byte_len_tag(Rest, [<<Len:8/unsigned-big, Part/binary>>|Acc]);
%% split_1byte_len_tag(Other, Acc) ->
%% {lists:reverse(Acc), Other}.
check_or_make_tagged_checksum(?CSUM_TAG_NONE, Client_CSum, Chunk) ->
%% TODO: If the client was foolish enough to use
%% this type of non-checksum, then the client gets
%% what it deserves wrt data integrity, alas. In
%% the client-side Chain Replication method, each
%% server will calculated this independently, which
%% isn't exactly what ought to happen for best data
%% integrity checking. In server-side CR, the csum
%% should be calculated by the head and passed down
%% the chain together with the value.
CS = machi_util:checksum_chunk(Chunk),
machi_util:make_tagged_csum(server_sha, CS);
check_or_make_tagged_checksum(?CSUM_TAG_CLIENT_SHA, Client_CSum, Chunk) ->
CS = machi_util:checksum_chunk(Chunk),
if CS == Client_CSum ->
machi_util:make_tagged_csum(server_sha,
Client_CSum);
true ->
throw({bad_csum, CS})
end.
-ifdef(TEST).
%% Remove "_COMMENTED" string to run the demo/exploratory code.
timing_demo_test_COMMENTED_() ->
{timeout, 300, fun() -> timing_demo_test2() end}.
%% Demo/exploratory hackery to check relative speeds of dealing with
%% checksum data in different ways.
%%
%% Summary:
%%
%% * Use compact binary encoding, with 1 byte header for entry length.
%% * Because the hex-style code is *far* slower just for enc & dec ops.
%% * For 1M entries of enc+dec: 0.215 sec vs. 15.5 sec.
%% * File sorter when sorting binaries as-is is only 30-40% slower
%% than an in-memory split (of huge binary emulated by file:read_file()
%% "big slurp") and sort of the same as-is sortable binaries.
%% * File sorter slows by a factor of about 2.5 if {order, fun compare/2}
%% function must be used, i.e. because the checksum entry lengths differ.
%% * File sorter + {order, fun compare/2} is still *far* faster than external
%% sort by OS X's sort(1) of sortable ASCII hex-style:
%% 4.5 sec vs. 21 sec.
%% * File sorter {order, fun compare/2} is faster than in-memory sort
%% of order-friendly 3-tuple-style: 4.5 sec vs. 15 sec.
timing_demo_test2() ->
Xs = [random:uniform(1 bsl 32) || _ <- lists:duplicate(1*1000*1000, $x)],
CSum = <<"123456789abcdef0A">>,
17 = byte_size(CSum),
io:format(user, "\n", []),
%% %% {ok, ZZZ} = file:open("/tmp/foo.hex-style", [write, binary, raw, delayed_write]),
io:format(user, "Hex-style file entry enc+dec: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{HexUSec, _} =
timer:tc(fun() ->
lists:foldl(fun(X, _) ->
B = encode_csum_file_entry(X, 100, CSum),
%% file:write(ZZZ, [B, 10]),
decode_csum_file_entry(list_to_binary(B))
end, x, Xs)
end),
io:format(user, "~.3f sec\n", [HexUSec / 1000000]),
%% %% file:close(ZZZ),
io:format(user, "Not-sortable file entry enc+dec: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{NotSortedUSec, _} =
timer:tc(fun() ->
lists:foldl(fun(X, _) ->
B = encode_csum_file_entry_nothex(X, 100, CSum),
decode_csum_file_entry_nothex(list_to_binary(B))
end, x, Xs)
end),
io:format(user, "~.3f sec\n", [NotSortedUSec / 1000000]),
NotHexList = lists:foldl(fun(X, Acc) ->
B = encode_csum_file_entry_nothex(X, 100, CSum),
[B|Acc]
end, [], Xs),
NotHexBin = iolist_to_binary(NotHexList),
io:format(user, "Split NotHexBin: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{NotHexBinUSec, SplitRes} =
timer:tc(fun() ->
put(hack_length, 29),
put(hack_different, false),
{Sorted, _Leftover} = split_1byte_len_tag(NotHexBin),
io:format(user, " Leftover ~p (hack_different ~p) ", [_Leftover, get(hack_different)]),
Sorted
end),
io:format(user, "~.3f sec\n", [NotHexBinUSec / 1000000]),
io:format(user, "Sort Split results: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{SortSplitUSec, _} =
timer:tc(fun() ->
lists:sort(SplitRes)
%% lists:sort(fun sort_2lines/2, SplitRes)
end),
io:format(user, "~.3f sec\n", [SortSplitUSec / 1000000]),
UnsortedName = "/tmp/foo.unsorted",
SortedName = "/tmp/foo.sorted",
ok = file:write_file(UnsortedName, NotHexList),
io:format(user, "File Sort Split results: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{FileSortUSec, _} =
timer:tc(fun() ->
{ok, FHin} = file:open(UnsortedName, [read, binary]),
{ok, FHout} = file:open(SortedName,
[write, binary, delayed_write]),
put(hack_sorter_sha_ctx, crypto:hash_init(sha)),
ok = file_sorter:sort(sort_input_fun(FHin, <<>>),
sort_output_fun(FHout),
[{format,binary},
{header, 1}
%% , {order, fun sort_2lines/2}
])
end),
io:format(user, "~.3f sec\n", [FileSortUSec / 1000000]),
_SHA = crypto:hash_final(get(hack_sorter_sha_ctx)),
%% io:format(user, "SHA via (hack_sorter_sha_ctx) = ~p\n", [_SHA]),
io:format(user, "NotHex-Not-sortable tuple list creation: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{NotHexTupleCreationUSec, NotHexTupleList} =
timer:tc(fun() ->
lists:foldl(fun(X, Acc) ->
B = encode_csum_file_entry(
X, 100, CSum),
[B|Acc]
end, [], Xs)
end),
io:format(user, "~.3f sec\n", [NotHexTupleCreationUSec / 1000000]),
io:format(user, "NotHex-Not-sortable tuple list sort: ", []),
[erlang:garbage_collect(self()) || _ <- lists:seq(1, 4)],
{NotHexTupleSortUSec, _} =
timer:tc(fun() ->
lists:sort(NotHexTupleList)
end),
io:format(user, "~.3f sec\n", [NotHexTupleSortUSec / 1000000]),
ok.
sort_2lines(<<_:1/binary, A/binary>>, <<_:1/binary, B/binary>>) ->
A < B.
sort_input_fun(FH, PrevStuff) ->
fun(close) ->
ok;
(read) ->
case file:read(FH, 1024*1024) of
{ok, NewStuff} ->
AllStuff = if PrevStuff == <<>> ->
NewStuff;
true ->
<<PrevStuff/binary, NewStuff/binary>>
end,
{SplitRes, Leftover} = split_1byte_len_tag(AllStuff),
{SplitRes, sort_input_fun(FH, Leftover)};
eof ->
end_of_input
end
end.
sort_output_fun(FH) ->
fun(close) ->
file:close(FH);
(Stuff) ->
Ctx = get(hack_sorter_sha_ctx),
put(hack_sorter_sha_ctx, crypto:hash_update(Ctx, Stuff)),
ok = file:write(FH, Stuff),
sort_output_fun(FH)
end.
encode_csum_file_entry(Offset, Size, TaggedCSum) ->
OffsetHex = machi_util:bin_to_hexstr(<<Offset:64/big>>),
SizeHex = machi_util:bin_to_hexstr(<<Size:32/big>>),
CSumHex = machi_util:bin_to_hexstr(TaggedCSum),
[OffsetHex, 32, SizeHex, 32, CSumHex].
decode_csum_file_entry(<<OffsetHex:16/binary, _:1/binary, SizeHex:8/binary, _:1/binary, CSumHex/binary>>) ->
Offset = machi_util:hexstr_to_bin(OffsetHex),
Size = machi_util:hexstr_to_bin(SizeHex),
CSum = machi_util:hexstr_to_bin(CSumHex),
{Offset, Size, CSum}.
-endif. % TEST