Add merkle library #48
3 changed files with 246 additions and 222 deletions
203
src/machi_merkle_tree.erl
Normal file
203
src/machi_merkle_tree.erl
Normal file
|
@ -0,0 +1,203 @@
|
||||||
|
%% -------------------------------------------------------------------
|
||||||
|
%%
|
||||||
|
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% This file is provided to you under the Apache License,
|
||||||
|
%% Version 2.0 (the "License"); you may not use this file
|
||||||
|
%% except in compliance with the License. You may obtain
|
||||||
|
%% a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing,
|
||||||
|
%% software distributed under the License is distributed on an
|
||||||
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
%% KIND, either express or implied. See the License for the
|
||||||
|
%% specific language governing permissions and limitations
|
||||||
|
%% under the License.
|
||||||
|
%%
|
||||||
|
%% -------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% @doc Creates a Merkle tree per file based on the checksum data for
|
||||||
|
%% a given data file.
|
||||||
|
%%
|
||||||
|
%% Has selectable backend, chosen at open.
|
||||||
|
%%
|
||||||
|
%% The default 'merklet' implementation uses the `merklet' library. Keys are
|
||||||
|
%% encoded as `<<Offset:64, Size:32>>' values encoded as `<<Tag:8, Csum/binary>>'
|
||||||
|
%% *or* as `<<0>>' for unwritten bytes, or `<<1>>' for trimmed bytes.
|
||||||
|
%%
|
||||||
|
%% The `naive' implementation representation is:
|
||||||
|
%%
|
||||||
|
%% `<<Length:64, Offset:32, 0>>' for unwritten bytes
|
||||||
|
%% `<<Length:64, Offset:32, 1>>' for trimmed bytes
|
||||||
|
%% `<<Length:64, Offset:32, Csum/binary>>' for written bytes
|
||||||
|
%%
|
||||||
|
%% The tree feeds these leaf nodes into hashes representing chunks of a minimum
|
||||||
|
%% size of at least 1024 KB (1 MB), but if the file size is larger, we will try
|
||||||
|
%% to get about 100 chunks for called "Level 1." We aim for around 10 hashes at
|
||||||
|
%% level 2, and then 2 hashes level 3 and finally the root.
|
||||||
|
|
||||||
|
-module(machi_merkle_tree).
|
||||||
|
|
||||||
|
-include("machi.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
open/2,
|
||||||
|
open/3,
|
||||||
|
tree/1,
|
||||||
|
filename/1,
|
||||||
|
diff/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
|
||||||
|
-record(naive, {
|
||||||
|
chunk_size = 1048576 :: pos_integer(), %% default 1 MB
|
||||||
|
recalc = true :: boolean(),
|
||||||
|
root :: 'undefined' | binary(),
|
||||||
|
lvl1 = [] :: [ binary() ],
|
||||||
|
lvl2 = [] :: [ binary() ],
|
||||||
|
lvl3 = [] :: [ binary() ],
|
||||||
|
leaves = [] :: [ { Offset :: pos_integer(),
|
||||||
|
Size :: pos_integer(),
|
||||||
|
Csum :: binary()} ]
|
||||||
|
}).
|
||||||
|
|
||||||
|
-record(mt, {
|
||||||
|
filename :: string(),
|
||||||
|
tree :: #naive{}|merklet:tree(),
|
||||||
|
backend = 'merklet' :: 'naive'|'merklet'
|
||||||
|
}).
|
||||||
|
|
||||||
|
-define(TRIMMED, <<1>>).
|
||||||
|
-define(UNWRITTEN, <<0>>).
|
||||||
|
-define(ENCODE(Offset, Size), <<Offset:64/unsigned-big, Size:32/unsigned-big>>).
|
||||||
|
-define(NAIVE_ENCODE(Offset, Size, Data), <<Offset:64/unsigned-big, Size:32/unsigned-big, Data/binary>>).
|
||||||
|
|
||||||
|
-define(NEW_MERKLET, undefined).
|
||||||
|
-define(TIMEOUT, (10*1000)).
|
||||||
|
|
||||||
|
-define(MINIMUM_CHUNK, 1048576). %% 1024 * 1024
|
||||||
|
-define(LEVEL_SIZE, 10).
|
||||||
|
-define(H, sha).
|
||||||
|
|
||||||
|
%% public API
|
||||||
|
|
||||||
|
open(Filename, DataDir) ->
|
||||||
|
open(Filename, DataDir, merklet).
|
||||||
|
|
||||||
|
open(Filename, DataDir, Type) ->
|
||||||
|
Tree = load_filename(Filename, DataDir, Type),
|
||||||
|
{ok, #mt{ filename = Filename, tree = Tree, backend = Type}}.
|
||||||
|
|
||||||
|
tree(#mt{ tree = T, backend = merklet }) -> T;
|
||||||
|
tree(#mt{ tree = T, backend = naive }) ->
|
||||||
|
case T#naive.recalc of
|
||||||
|
true -> build_tree(T);
|
||||||
|
false -> T
|
||||||
|
end.
|
||||||
|
|
||||||
|
filename(#mt{ filename = F }) -> F.
|
||||||
|
|
||||||
|
diff(#mt{backend = naive, tree = T1}, #mt{backend = naive, tree = T2}) ->
|
||||||
|
case T1#naive.root == T2#naive.root of
|
||||||
|
true -> same;
|
||||||
|
false -> different %% TODO: implement diff
|
||||||
|
end;
|
||||||
|
diff(#mt{backend = merklet, tree = T1}, #mt{backend = merklet, tree = T2}) ->
|
||||||
|
case merklet:diff(T1, T2) of
|
||||||
|
[] -> same;
|
||||||
|
Diff -> Diff
|
||||||
|
end;
|
||||||
|
diff(_, _) -> error(badarg).
|
||||||
|
|
||||||
|
%% private
|
||||||
|
|
||||||
|
% @private
|
||||||
|
load_filename(Filename, DataDir, merklet) ->
|
||||||
|
{_Last, M} = do_load(Filename, DataDir, fun insert_csum/2, ?NEW_MERKLET),
|
||||||
|
M;
|
||||||
|
load_filename(Filename, DataDir, naive) ->
|
||||||
|
{Last, M} = do_load(Filename, DataDir, fun insert_csum_naive/2, []),
|
||||||
|
ChunkSize = max(?MINIMUM_CHUNK, Last div 100),
|
||||||
|
T = #naive{ leaves = lists:reverse(M), chunk_size = ChunkSize, recalc = true },
|
||||||
|
build_tree(T).
|
||||||
|
|
||||||
|
do_load(Filename, DataDir, FoldFun, AccInit) ->
|
||||||
|
CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
|
||||||
|
{ok, T} = machi_csum_table:open(CsumFile, []),
|
||||||
|
Acc = machi_csum_table:foldl_chunks(FoldFun, {0, AccInit}, T),
|
||||||
|
ok = machi_csum_table:close(T),
|
||||||
|
Acc.
|
||||||
|
|
||||||
|
% @private
|
||||||
|
insert_csum({Last, Size, _Csum}=In, {Last, MT}) ->
|
||||||
|
%% no gap here, insert a record
|
||||||
|
{Last+Size, update_merkle_tree(In, MT)};
|
||||||
|
insert_csum({Offset, Size, _Csum}=In, {Last, MT}) ->
|
||||||
|
%% gap here, insert unwritten record
|
||||||
|
%% *AND* insert written record
|
||||||
|
Hole = Offset - Last,
|
||||||
|
MT0 = update_merkle_tree({Last, Hole, unwritten}, MT),
|
||||||
|
{Offset+Size, update_merkle_tree(In, MT0)}.
|
||||||
|
|
||||||
|
insert_csum_naive({Last, Size, _Csum}=In, {Last, MT}) ->
|
||||||
|
%% no gap
|
||||||
|
{Last+Size, update_acc(In, MT)};
|
||||||
|
insert_csum_naive({Offset, Size, _Csum}=In, {Last, MT}) ->
|
||||||
|
Hole = Offset - Last,
|
||||||
|
MT0 = update_acc({Last, Hole, unwritten}, MT),
|
||||||
|
{Offset+Size, update_acc(In, MT0)}.
|
||||||
|
|
||||||
|
% @private
|
||||||
|
update_merkle_tree({Offset, Size, unwritten}, MT) ->
|
||||||
|
merklet:insert({?ENCODE(Offset, Size), ?UNWRITTEN}, MT);
|
||||||
|
update_merkle_tree({Offset, Size, trimmed}, MT) ->
|
||||||
|
merklet:insert({?ENCODE(Offset, Size), ?TRIMMED}, MT);
|
||||||
|
update_merkle_tree({Offset, Size, Csum}, MT) ->
|
||||||
|
merklet:insert({?ENCODE(Offset, Size), Csum}, MT).
|
||||||
|
|
||||||
|
update_acc({Offset, Size, unwritten}, MT) ->
|
||||||
|
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?UNWRITTEN)} | MT ];
|
||||||
|
update_acc({Offset, Size, trimmed}, MT) ->
|
||||||
|
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?TRIMMED)} | MT ];
|
||||||
|
update_acc({Offset, Size, Csum}, MT) ->
|
||||||
|
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, Csum)} | MT ].
|
||||||
|
|
||||||
|
build_tree(MT = #naive{ leaves = L, chunk_size = ChunkSize }) ->
|
||||||
|
lager:debug("Leaves: ~p~n", [L]),
|
||||||
|
Lvl1s = build_level_1(ChunkSize, L, 1, [ crypto:hash_init(?H) ]),
|
||||||
|
lager:debug("Lvl1: ~p~n", [Lvl1s]),
|
||||||
|
Mod2 = length(Lvl1s) div ?LEVEL_SIZE,
|
||||||
|
Lvl2s = build_int_level(Mod2, Lvl1s, 1, [ crypto:hash_init(?H) ]),
|
||||||
|
lager:debug("Lvl2: ~p~n", [Lvl2s]),
|
||||||
|
Mod3 = length(Lvl2s) div 2,
|
||||||
|
Lvl3s = build_int_level(Mod3, Lvl2s, 1, [ crypto:hash_init(?H) ]),
|
||||||
|
lager:debug("Lvl3: ~p~n", [Lvl3s]),
|
||||||
|
Root = build_root(Lvl3s, crypto:hash_init(?H)),
|
||||||
|
lager:debug("Root: ~p~n", [Root]),
|
||||||
|
MT#naive{ root = Root, lvl1 = Lvl1s, lvl2 = Lvl2s, lvl3 = Lvl3s, recalc = false }.
|
||||||
|
|
||||||
|
build_root([], Ctx) ->
|
||||||
|
crypto:hash_final(Ctx);
|
||||||
|
build_root([H|T], Ctx) ->
|
||||||
|
build_root(T, crypto:hash_update(Ctx, H)).
|
||||||
|
|
||||||
|
build_int_level(_Mod, [], _Cnt, [ Ctx | Rest ]) ->
|
||||||
|
lists:reverse( [ crypto:hash_final(Ctx) | Rest ] );
|
||||||
|
build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) when Cnt rem Mod == 0 ->
|
||||||
|
NewCtx = crypto:hash_init(?H),
|
||||||
|
build_int_level(Mod, T, Cnt + 1, [ crypto:hash_update(NewCtx, H), crypto:hash_final(Ctx) | Rest ]);
|
||||||
|
build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) ->
|
||||||
|
build_int_level(Mod, T, Cnt+1, [ crypto:hash_update(Ctx, H) | Rest ]).
|
||||||
|
|
||||||
|
build_level_1(_Size, [], _Multiple, [ Ctx | Rest ]) ->
|
||||||
|
lists:reverse([ crypto:hash_final(Ctx) | Rest ]);
|
||||||
|
build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
|
||||||
|
when ( Pos + Len ) > ( Size * Multiple ) ->
|
||||||
|
NewCtx = crypto:hash_init(?H),
|
||||||
|
build_level_1(Size, T, Multiple+1,
|
||||||
|
[ crypto:hash_update(NewCtx, Hash), crypto:hash_final(Ctx) | Rest ]);
|
||||||
|
build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
|
||||||
|
when ( Pos + Len ) =< ( Size * Multiple ) ->
|
||||||
|
build_level_1(Size, T, Multiple, [ crypto:hash_update(Ctx, Hash) | Rest ]).
|
|
@ -1,208 +0,0 @@
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
%%
|
|
||||||
%% Copyright (c) 2007-2015 Basho Technologies, Inc. All Rights Reserved.
|
|
||||||
%%
|
|
||||||
%% This file is provided to you under the Apache License,
|
|
||||||
%% Version 2.0 (the "License"); you may not use this file
|
|
||||||
%% except in compliance with the License. You may obtain
|
|
||||||
%% a copy of the License at
|
|
||||||
%%
|
|
||||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
%%
|
|
||||||
%% Unless required by applicable law or agreed to in writing,
|
|
||||||
%% software distributed under the License is distributed on an
|
|
||||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
%% KIND, either express or implied. See the License for the
|
|
||||||
%% specific language governing permissions and limitations
|
|
||||||
%% under the License.
|
|
||||||
%%
|
|
||||||
%% -------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% @doc This manager maintains a Merkle tree per file per FLU as implemented
|
|
||||||
%% by the `merklet' library. Keys are encoded as `<<Offset:64, Size:32>>'
|
|
||||||
%% values encoded as `<<Tag:8, Csum/binary>>' *or* as <<0>> for unwritten
|
|
||||||
%% bytes, or <<1>> for trimmed bytes.
|
|
||||||
|
|
||||||
-module(machi_merkle_tree_mgr).
|
|
||||||
-behaviour(gen_server).
|
|
||||||
|
|
||||||
-include("machi.hrl").
|
|
||||||
|
|
||||||
-export([
|
|
||||||
child_spec/3,
|
|
||||||
start_link/3,
|
|
||||||
initialize/2,
|
|
||||||
update/5,
|
|
||||||
fetch/2
|
|
||||||
]).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
-export([
|
|
||||||
init/1,
|
|
||||||
handle_call/3,
|
|
||||||
handle_cast/2,
|
|
||||||
handle_info/2,
|
|
||||||
terminate/2,
|
|
||||||
code_change/3
|
|
||||||
]).
|
|
||||||
|
|
||||||
-record(state, {
|
|
||||||
fluname :: atom(),
|
|
||||||
datadir :: string(),
|
|
||||||
tid :: ets:tid()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-record(mt, {
|
|
||||||
filename :: string(),
|
|
||||||
tree :: merklet:tree()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-define(TRIMMED, <<1>>).
|
|
||||||
-define(UNWRITTEN, <<0>>).
|
|
||||||
-define(ENCODE(Offset, Size), <<Offset:64/unsigned-big, Size:32/unsigned-big>>).
|
|
||||||
|
|
||||||
-define(NEW_MERKLET, undefined).
|
|
||||||
-define(TIMEOUT, (10*1000)).
|
|
||||||
|
|
||||||
%% public API
|
|
||||||
|
|
||||||
child_spec(FluName, DataDir, Options) ->
|
|
||||||
Name = make_merkle_tree_mgr_name(FluName),
|
|
||||||
{Name,
|
|
||||||
{?MODULE, start_link, [FluName, DataDir, Options]},
|
|
||||||
permanent, 5000, worker, [?MODULE]}.
|
|
||||||
|
|
||||||
start_link(FluName, DataDir, Options) ->
|
|
||||||
gen_server:start_link({local, make_merkle_tree_mgr_name(FluName)},
|
|
||||||
?MODULE,
|
|
||||||
{FluName, DataDir, Options},
|
|
||||||
[]).
|
|
||||||
|
|
||||||
-spec initialize( FluName :: atom(),
|
|
||||||
Filename :: string() ) -> ok.
|
|
||||||
%% @doc A heads-up hint to the manager that it ought to compute a merkle
|
|
||||||
%% tree for the given file (if it hasn't already).
|
|
||||||
initialize(FluName, Filename) ->
|
|
||||||
gen_server:cast(make_merkle_tree_mgr_name(FluName),
|
|
||||||
{initialize, Filename}).
|
|
||||||
|
|
||||||
-spec update( FluName :: atom(),
|
|
||||||
Filename :: string(),
|
|
||||||
Offset :: non_neg_integer(),
|
|
||||||
Length :: pos_integer(),
|
|
||||||
Csum :: binary() ) -> ok.
|
|
||||||
%% @doc A new leaf node ought to be added file the given filename,
|
|
||||||
%% with the particular information.
|
|
||||||
update(FluName, Filename, Offset, Length, Csum) ->
|
|
||||||
gen_server:cast(make_merkle_tree_mgr_name(FluName),
|
|
||||||
{update, Filename, Offset, Length, Csum}).
|
|
||||||
|
|
||||||
-spec fetch ( FluName :: atom(),
|
|
||||||
Filename :: string() ) -> {ok, 'undefined'|merklet:tree()}.
|
|
||||||
%% @doc Returns the merkle tree for the given filename.
|
|
||||||
fetch(FluName, Filename) ->
|
|
||||||
gen_server:call(make_merkle_tree_mgr_name(FluName),
|
|
||||||
{fetch, Filename}, ?TIMEOUT).
|
|
||||||
|
|
||||||
%% gen_server callbacks
|
|
||||||
init({FluName, DataDir, Options}) ->
|
|
||||||
Tid = ets:new(make_merkle_tree_mgr_name(FluName), [{keypos, 2}, {read_concurrency, true}]),
|
|
||||||
case proplists:get_value(no_load, Options, false) of
|
|
||||||
true ->
|
|
||||||
ok;
|
|
||||||
false ->
|
|
||||||
handle_load(Tid, DataDir)
|
|
||||||
end,
|
|
||||||
{ok, #state{fluname=FluName, datadir=DataDir, tid = Tid}}.
|
|
||||||
|
|
||||||
handle_call({fetch, Filename}, _From, S = #state{ tid = Tid }) ->
|
|
||||||
Res = handle_fetch(Tid, Filename),
|
|
||||||
{reply, {ok, Res}, S};
|
|
||||||
handle_call(Req, _From, State) ->
|
|
||||||
lager:warning("Unknown call: ~p", [Req]),
|
|
||||||
{reply, whoaaaaaaaaaaaa, State}.
|
|
||||||
|
|
||||||
handle_cast({initialize, Filename}, S = #state{ datadir = D, tid = Tid }) ->
|
|
||||||
load_filename(Tid, D, Filename),
|
|
||||||
{noreply, S};
|
|
||||||
|
|
||||||
handle_cast({update, Filename, Offset, Length, Csum}, S = #state{ tid = Tid }) ->
|
|
||||||
%% XXX FIXME: Not sure about the correctness of this
|
|
||||||
insert(Tid, Filename, {Offset, Length, Csum}),
|
|
||||||
{noreply, S};
|
|
||||||
|
|
||||||
handle_cast(Cast, State) ->
|
|
||||||
lager:warning("Unknown cast: ~p", [Cast]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
handle_info(Req, State) ->
|
|
||||||
lager:warning("Unknown info message: ~p", [Req]),
|
|
||||||
{noreply, State}.
|
|
||||||
|
|
||||||
terminate(Reason, #state{fluname = F}) ->
|
|
||||||
lager:debug("Shutting down merkle tree manager for FLU ~p because ~p",
|
|
||||||
[F, Reason]),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
code_change(_OldVsn, State, _Extra) ->
|
|
||||||
{ok, State}.
|
|
||||||
|
|
||||||
%% private
|
|
||||||
|
|
||||||
make_merkle_tree_mgr_name(FluName) ->
|
|
||||||
list_to_atom(atom_to_list(FluName) ++ "_merkle_tree_mgr").
|
|
||||||
|
|
||||||
handle_load(Tid, DataDir) ->
|
|
||||||
Files = get_files(DataDir),
|
|
||||||
lists:foreach(fun(F) -> load_filename(Tid, DataDir, F) end, Files).
|
|
||||||
|
|
||||||
get_files(DataDir) ->
|
|
||||||
{_, WildPath} = machi_util:make_data_filename(DataDir, ""),
|
|
||||||
filelib:wildcard("*", WildPath).
|
|
||||||
|
|
||||||
load_filename(Tid, DataDir, Filename) ->
|
|
||||||
CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
|
|
||||||
{ok, T} = machi_csum_table:open(CsumFile, []),
|
|
||||||
%% docs say that the traversal order of ets:foldl is non-determinstic
|
|
||||||
%% but hopefully since csum_table uses an ordered set that's not true...
|
|
||||||
{_LastPosition, M} = machi_csum_table:foldl_chunks(fun insert_csum/2,
|
|
||||||
{?MINIMUM_OFFSET, ?NEW_MERKLET}, T),
|
|
||||||
true = ets:insert_new(Tid, #mt{ filename = Filename, tree = M}),
|
|
||||||
ok = machi_csum_table:close(T),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
insert_csum({Last, Size, _Csum}=In, {Last, MT}) ->
|
|
||||||
%% no gap here, insert a record
|
|
||||||
{Last+Size, update_merkle_tree(In, MT)};
|
|
||||||
insert_csum({Offset, Size, _Csum}=In, {Last, MT}) ->
|
|
||||||
%% gap here, insert unwritten record
|
|
||||||
%% *AND* insert written record
|
|
||||||
Hole = Offset - Last,
|
|
||||||
MT0 = update_merkle_tree({Last, Hole, unwritten}, MT),
|
|
||||||
{Offset+Size, update_merkle_tree(In, MT0)}.
|
|
||||||
|
|
||||||
insert(Tid, Filename, Term) ->
|
|
||||||
case ets:lookup(Tid, Filename) of
|
|
||||||
[] -> error(not_found); %% TODO: Something better?
|
|
||||||
[R] ->
|
|
||||||
NewMT = update_merkle_tree(Term, R#mt.tree),
|
|
||||||
%% we choose update_element because it
|
|
||||||
%% makes atomic changes so it is concurrent
|
|
||||||
%% safe. The regular 'insert' function
|
|
||||||
%% does not provide that guarantee.
|
|
||||||
true = ets:update_element(Tid, Filename, {#mt.tree, NewMT}),
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
||||||
handle_fetch(Tid, Filename) ->
|
|
||||||
case ets:lookup(Tid, Filename) of
|
|
||||||
[] -> undefined;
|
|
||||||
[R] -> R#mt.tree
|
|
||||||
end.
|
|
||||||
|
|
||||||
update_merkle_tree({Offset, Size, unwritten}, MT) ->
|
|
||||||
merklet:insert({?ENCODE(Offset, Size), ?UNWRITTEN}, MT);
|
|
||||||
update_merkle_tree({Offset, Size, trimmed}, MT) ->
|
|
||||||
merklet:insert({?ENCODE(Offset, Size), ?TRIMMED}, MT);
|
|
||||||
update_merkle_tree({Offset, Size, Csum}, MT) ->
|
|
||||||
merklet:insert({?ENCODE(Offset, Size), Csum}, MT).
|
|
|
@ -18,7 +18,7 @@
|
||||||
%%
|
%%
|
||||||
%% -------------------------------------------------------------------
|
%% -------------------------------------------------------------------
|
||||||
|
|
||||||
-module(machi_merkle_tree_mgr_test).
|
-module(machi_merkle_tree_test).
|
||||||
-compile([export_all]).
|
-compile([export_all]).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
@ -27,6 +27,16 @@
|
||||||
-define(TESTFILE, "yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1").
|
-define(TESTFILE, "yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1").
|
||||||
-define(GAP_CHANCE, 0.10).
|
-define(GAP_CHANCE, 0.10).
|
||||||
|
|
||||||
|
choose_filename() ->
|
||||||
|
random_from_list([
|
||||||
|
"def^c5ea7511-d649-47d6-a8c3-2b619379c237^1",
|
||||||
|
"jkl^b077eff7-b2be-4773-a73f-fea4acb8a732^1",
|
||||||
|
"stu^553fa47a-157c-4fac-b10f-2252c7d8c37a^1",
|
||||||
|
"vwx^ae015d68-7689-4c9f-9677-926c6664f513^1",
|
||||||
|
"yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1"
|
||||||
|
]).
|
||||||
|
|
||||||
|
|
||||||
make_csum_file(DataDir, Filename, Offsets) ->
|
make_csum_file(DataDir, Filename, Offsets) ->
|
||||||
Path = machi_util:make_checksum_filename(DataDir, Filename),
|
Path = machi_util:make_checksum_filename(DataDir, Filename),
|
||||||
filelib:ensure_dir(Path),
|
filelib:ensure_dir(Path),
|
||||||
|
@ -83,18 +93,37 @@ generate_offsets(_FH, _Filesize, _Current, Acc) ->
|
||||||
lists:reverse(Acc).
|
lists:reverse(Acc).
|
||||||
|
|
||||||
test() ->
|
test() ->
|
||||||
|
test(100).
|
||||||
|
|
||||||
|
test(N) ->
|
||||||
|
{ok, F} = file:open("results.txt", [raw, write]),
|
||||||
|
lists:foreach(fun(X) -> format_and_store(F, run_test(X)) end, lists:seq(1, N)).
|
||||||
|
|
||||||
|
format_and_store(F, {OffsetNum, {MTime, MSize}, {NTime, NSize}}) ->
|
||||||
|
S = io_lib:format("~w\t~w\t~w\t~w\t~w\n", [OffsetNum, MTime, MSize, NTime, NSize]),
|
||||||
|
ok = file:write(F, S).
|
||||||
|
|
||||||
|
run_test(C) ->
|
||||||
random:seed(os:timestamp()),
|
random:seed(os:timestamp()),
|
||||||
O = make_offsets("test/" ++ ?TESTFILE),
|
OffsetFn = "test/" ++ choose_filename(),
|
||||||
?debugFmt("Offsets: ~p", [O]),
|
O = make_offsets(OffsetFn),
|
||||||
make_csum_file(".", ?TESTFILE, O),
|
Fn = "csum_" ++ integer_to_list(C),
|
||||||
|
make_csum_file(".", Fn, O),
|
||||||
|
|
||||||
_ = machi_merkle_tree_mgr:start_link(test, ".", []),
|
Osize = length(O),
|
||||||
machi_merkle_tree_mgr:initialize(test, ?TESTFILE),
|
|
||||||
timer:sleep(1000),
|
|
||||||
All = machi_merkle_tree_mgr:fetch(test, ?TESTFILE),
|
|
||||||
?debugFmt("All: ~p~n", [All]),
|
|
||||||
timer:sleep(1000),
|
|
||||||
All = machi_merkle_tree_mgr:fetch(test, ?TESTFILE),
|
|
||||||
?debugFmt("All: ~p~n", [All]),
|
|
||||||
ok.
|
|
||||||
|
|
||||||
|
{MTime, {ok, M}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", merklet) end),
|
||||||
|
{NTime, {ok, N}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", naive) end),
|
||||||
|
|
||||||
|
?assertEqual(Fn, machi_merkle_tree:filename(M)),
|
||||||
|
?assertEqual(Fn, machi_merkle_tree:filename(N)),
|
||||||
|
|
||||||
|
MTree = machi_merkle_tree:tree(M),
|
||||||
|
MSize = byte_size(term_to_binary(MTree)),
|
||||||
|
|
||||||
|
NTree = machi_merkle_tree:tree(N),
|
||||||
|
NSize = byte_size(term_to_binary(NTree)),
|
||||||
|
|
||||||
|
?assertEqual(same, machi_merkle_tree:diff(N, N)),
|
||||||
|
?assertEqual(same, machi_merkle_tree:diff(M, M)),
|
||||||
|
{Osize, {MTime, MSize}, {NTime, NSize}}.
|
Loading…
Reference in a new issue