diff --git a/src/machi.proto b/src/machi.proto index 8358a83..8a4bf15 100644 --- a/src/machi.proto +++ b/src/machi.proto @@ -209,6 +209,9 @@ message Mpb_ChecksumListReq { message Mpb_ChecksumListResp { required Mpb_GeneralStatusCode status = 1; + // For data type rationale, see comments for + // machi_flu1_client:checksum_list/4 or + // http://basho.github.io/machi/edoc/machi_flu1_client.html#checksum_list-4 optional bytes chunk = 2; } @@ -403,6 +406,9 @@ message Mpb_LL_ChecksumListReq { message Mpb_LL_ChecksumListResp { required Mpb_GeneralStatusCode status = 1; + // For data type rationale, see comments for + // machi_flu1_client:checksum_list/4 or + // http://basho.github.io/machi/edoc/machi_flu1_client.html#checksum_list-4 optional bytes chunk = 2; } diff --git a/src/machi_flu1.erl b/src/machi_flu1.erl index 997eb35..e45c4bd 100644 --- a/src/machi_flu1.erl +++ b/src/machi_flu1.erl @@ -87,6 +87,9 @@ -export([start_link/1, stop/1, update_wedge_state/3]). -export([make_listener_regname/1, make_projection_server_regname/1]). +-export([encode_csum_file_entry/3, encode_csum_file_entry_bin/3, + decode_csum_file_entry/1, + split_checksum_list_blob/1, split_checksum_list_blob_decode/1]). -record(state, { flu_name :: atom(), @@ -498,7 +501,7 @@ do_server_write_chunk2(_File, Offset, Chunk, CSum_tag, Size = iolist_size(Chunk), case file:pwrite(FHd, Offset, Chunk) of ok -> - CSum_info = encode_csum_file_entry_nothex(Offset, Size, TaggedCSum), + CSum_info = encode_csum_file_entry(Offset, Size, TaggedCSum), ok = file:write(FHc, CSum_info), ok; _Else3 -> @@ -752,7 +755,7 @@ seq_append_server_loop(DataDir, Prefix, File, {FHd,FHc}=FH_, FileNum, Offset) -> end, From ! {assignment, Offset, File}, Size = iolist_size(Chunk), - CSum_info = encode_csum_file_entry_nothex(Offset, Size, TaggedCSum), + CSum_info = encode_csum_file_entry(Offset, Size, TaggedCSum), ok = file:write(FHc, CSum_info), seq_append_server_loop(DataDir, Prefix, File, FH_, FileNum, Offset + Size + Extra); @@ -875,42 +878,85 @@ split_uri_options(OpsBin) -> {size, binary_to_integer(Bin)} end || X <- L]. -encode_csum_file_entry_nothex(Offset, Size, TaggedCSum) -> +%% @doc Encode `Offset + Size + TaggedCSum' into an `iolist()' type for +%% internal storage by the FLU. + +-spec encode_csum_file_entry( + machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()) -> + iolist(). +encode_csum_file_entry(Offset, Size, TaggedCSum) -> Len = 8 + 4 + byte_size(TaggedCSum), [<>, TaggedCSum]. -decode_csum_file_entry_nothex(<<_:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big, TaggedCSum/binary>>) -> +%% @doc Encode `Offset + Size + TaggedCSum' into an `binary()' type for +%% internal storage by the FLU. + +-spec encode_csum_file_entry_bin( + machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()) -> + binary(). +encode_csum_file_entry_bin(Offset, Size, TaggedCSum) -> + Len = 8 + 4 + byte_size(TaggedCSum), + <>. + +%% @doc Decode a single `binary()' blob into an +%% `{Offset,Size,TaggedCSum}' tuple. +%% +%% The internal encoding (which is currently exposed to the outside world +%% via this function and related ones) is: +%% +%% +%% +%% See `machi.hrl' for the tagged checksum types, e.g., +%% `?CSUM_TAG_NONE'. + +-spec decode_csum_file_entry(binary()) -> + {machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()}. +decode_csum_file_entry(<<_:8/unsigned-big, Offset:64/unsigned-big, Size:32/unsigned-big, TaggedCSum/binary>>) -> {Offset, Size, TaggedCSum}. -split_1byte_len_tag_decode(Bin) -> - split_1byte_len_tag_decode(Bin, []). +%% @doc Split a `binary()' blob of `checksum_list' data into a list of +%% unparsed `binary()' blobs, one per entry. +%% +%% Decode the unparsed blobs with {@link decode_csum_file_entry/1}, if +%% desired. -split_1byte_len_tag_decode(<>, Acc)-> - split_1byte_len_tag_decode(Rest, [decode_csum_file_entry_nothex(Part)|Acc]); -split_1byte_len_tag_decode(Other, Acc) -> - {lists:reverse(Acc), Other}. +-spec split_checksum_list_blob(binary()) -> + list(binary()). +split_checksum_list_blob(Bin) -> + split_checksum_list_blob(Bin, []). -split_1byte_len_tag(Bin) -> - split_1byte_len_tag(Bin, []). - -split_1byte_len_tag(<>, Acc)-> +split_checksum_list_blob(<>, Acc)-> case get(hack_length) of Len -> ok; _ -> put(hack_different, true) end, - split_1byte_len_tag(Rest, [<>|Acc]); -split_1byte_len_tag(Other, Acc) -> - {lists:reverse(Acc), Other}. + split_checksum_list_blob(Rest, [<>|Acc]); +split_checksum_list_blob(Rest, Acc) -> + {lists:reverse(Acc), Rest}. -%% split_1byte_len_tag(<>, Acc)-> -%% case get(hack_length) of -%% Len -> ok; -%% _ -> put(hack_different, true) -%% end, -%% split_1byte_len_tag(Rest, [<>|Acc]); -%% split_1byte_len_tag(Other, Acc) -> -%% {lists:reverse(Acc), Other}. +%% @doc Split a `binary()' blob of `checksum_list' data into a list of +%% `{Offset,Size,TaggedCSum}' tuples. + +-spec split_checksum_list_blob_decode(binary()) -> + list({machi_dt:file_offset(), machi_dt:chunk_size(), machi_dt:chunk_s()}). +split_checksum_list_blob_decode(Bin) -> + split_checksum_list_blob_decode(Bin, []). + +split_checksum_list_blob_decode(<>, Acc)-> + split_checksum_list_blob_decode(Rest, [decode_csum_file_entry(Part)|Acc]); +split_checksum_list_blob_decode(Rest, Acc) -> + {lists:reverse(Acc), Rest}. check_or_make_tagged_checksum(?CSUM_TAG_NONE, Client_CSum, Chunk) -> %% TODO: If the client was foolish enough to use @@ -971,9 +1017,9 @@ timing_demo_test2() -> {HexUSec, _} = timer:tc(fun() -> lists:foldl(fun(X, _) -> - B = encode_csum_file_entry(X, 100, CSum), + B = encode_csum_file_entry_hex(X, 100, CSum), %% file:write(ZZZ, [B, 10]), - decode_csum_file_entry(list_to_binary(B)) + decode_csum_file_entry_hex(list_to_binary(B)) end, x, Xs) end), io:format(user, "~.3f sec\n", [HexUSec / 1000000]), @@ -984,14 +1030,14 @@ timing_demo_test2() -> {NotSortedUSec, _} = timer:tc(fun() -> lists:foldl(fun(X, _) -> - B = encode_csum_file_entry_nothex(X, 100, CSum), - decode_csum_file_entry_nothex(list_to_binary(B)) + B = encode_csum_file_entry(X, 100, CSum), + decode_csum_file_entry(list_to_binary(B)) end, x, Xs) end), io:format(user, "~.3f sec\n", [NotSortedUSec / 1000000]), NotHexList = lists:foldl(fun(X, Acc) -> - B = encode_csum_file_entry_nothex(X, 100, CSum), + B = encode_csum_file_entry(X, 100, CSum), [B|Acc] end, [], Xs), NotHexBin = iolist_to_binary(NotHexList), @@ -1002,7 +1048,7 @@ timing_demo_test2() -> timer:tc(fun() -> put(hack_length, 29), put(hack_different, false), - {Sorted, _Leftover} = split_1byte_len_tag(NotHexBin), + {Sorted, _Leftover} = split_checksum_list_blob(NotHexBin), io:format(user, " Leftover ~p (hack_different ~p) ", [_Leftover, get(hack_different)]), Sorted end), @@ -1045,7 +1091,7 @@ timing_demo_test2() -> {NotHexTupleCreationUSec, NotHexTupleList} = timer:tc(fun() -> lists:foldl(fun(X, Acc) -> - B = encode_csum_file_entry( + B = encode_csum_file_entry_hex( X, 100, CSum), [B|Acc] end, [], Xs) @@ -1076,7 +1122,7 @@ sort_input_fun(FH, PrevStuff) -> true -> <> end, - {SplitRes, Leftover} = split_1byte_len_tag(AllStuff), + {SplitRes, Leftover} = split_checksum_list_blob(AllStuff), {SplitRes, sort_input_fun(FH, Leftover)}; eof -> end_of_input @@ -1093,13 +1139,13 @@ sort_output_fun(FH) -> sort_output_fun(FH) end. -encode_csum_file_entry(Offset, Size, TaggedCSum) -> +encode_csum_file_entry_hex(Offset, Size, TaggedCSum) -> OffsetHex = machi_util:bin_to_hexstr(<>), SizeHex = machi_util:bin_to_hexstr(<>), CSumHex = machi_util:bin_to_hexstr(TaggedCSum), [OffsetHex, 32, SizeHex, 32, CSumHex]. -decode_csum_file_entry(<>) -> +decode_csum_file_entry_hex(<>) -> Offset = machi_util:hexstr_to_bin(OffsetHex), Size = machi_util:hexstr_to_bin(SizeHex), CSum = machi_util:hexstr_to_bin(CSumHex), diff --git a/src/machi_flu1_client.erl b/src/machi_flu1_client.erl index 648048a..53a55f1 100644 --- a/src/machi_flu1_client.erl +++ b/src/machi_flu1_client.erl @@ -174,16 +174,36 @@ read_chunk(Host, TcpPort, EpochID, File, Offset, Size) %% @doc Fetch the list of chunk checksums for `File'. -spec checksum_list(port_wrap(), machi_dt:epoch_id(), machi_dt:file_name()) -> - {ok, [machi_dt:chunk_summary()]} | + {ok, binary()} | {error, machi_dt:error_general() | 'no_such_file' | 'partial_read'} | {error, term()}. checksum_list(Sock, EpochID, File) -> checksum_list2(Sock, EpochID, File). %% @doc Fetch the list of chunk checksums for `File'. +%% +%% Why return a simple `binary()' type rather than +%% `[machi_dt:chunk_summary()]'? The two reasons are: +%%
    +%%
  1. Server overhead: the CPU required to chop up the implementation- +%% specific store into zillions of very small terms is very high. +%%
  2. +%%
  3. Protocol encoding and decoding overhead: the cost is non-zero, +%% and the sum of cost of encoding and decoding a zillion small terms +%% is substantial. +%%
  4. +%%
+%% +%% For both reasons, the server's protocol response is absurdly simple +%% and very fast: send back a `binary()' blob to the client. Then it +%% is the client's responsibility to spend the CPU time to parse the +%% blob. +%% +%% Details of the encoding used inside the `binary()' blog can be found +%% in the EDoc comments for {@link machi_flu1:decode_csum_file_entry/1}. -spec checksum_list(machi_dt:inet_host(), machi_dt:inet_port(), machi_dt:epoch_id(), machi_dt:file_name()) -> - {ok, [machi_dt:chunk_summary()]} | + {ok, binary()} | {error, machi_dt:error_general() | 'no_such_file'} | {error, term()}. checksum_list(Host, TcpPort, EpochID, File) when is_integer(TcpPort) -> Sock = connect(#p_srvr{proto_mod=?MODULE, address=Host, port=TcpPort}),