Merge pull request #48 from basho/mra/merkle-cleanup

Add merkle library
2015-12-02 16:25:50 +09:00 · 2015-12-02 16:25:50 +09:00 · e9b1134cd9
commit e9b1134cd9
parent ac10f97220 1f56850c2b
4 changed files with 375 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -26,3 +26,4 @@ rel/machi
 current_counterexample.eqc
 foo*
 typescript*
+*.swp
--- a/include/machi_merkle_tree.hrl
+++ b/include/machi_merkle_tree.hrl
@ -0,0 +1,20 @@
+%% machi merkle tree records
+
+-record(naive, {
+          chunk_size = 1048576   :: pos_integer(), %% default 1 MB
+          recalc = true          :: boolean(),
+          root                   :: 'undefined' | binary(),
+          lvl1 = []              :: [ binary() ],
+          lvl2 = []              :: [ binary() ],
+          lvl3 = []              :: [ binary() ],
+          leaves = []            :: [ { Offset :: pos_integer(),
+                                        Size :: pos_integer(),
+                                        Csum :: binary()} ]
+         }).
+
+-record(mt, {
+          filename               :: string(),
+          tree                   :: #naive{},
+          backend = 'naive'      :: 'naive'
+         }).
+
--- a/src/machi_merkle_tree.erl
+++ b/src/machi_merkle_tree.erl
@ -0,0 +1,156 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc.  All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License.  You may obtain
+%% a copy of the License at
+%%
+%%   http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied.  See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% @doc Creates a Merkle tree per file based on the checksum data for
+%% a given data file.
+%%
+%% The `naive' implementation representation is:
+%%
+%% `<<Length:64, Offset:32, 0>>' for unwritten bytes
+%% `<<Length:64, Offset:32, 1>>' for trimmed bytes
+%% `<<Length:64, Offset:32, Csum/binary>>' for written bytes
+%%
+%% The tree feeds these leaf nodes into hashes representing chunks of a minimum
+%% size of at least 1024 KB (1 MB), but if the file size is larger, we will try
+%% to get about 100 chunks for the first rollup "Level 1." We aim for around 10
+%% hashes at level 2, and then 2 hashes level 3 and finally the root.
+
+-module(machi_merkle_tree).
+
+-include("machi.hrl").
+-include("machi_merkle_tree.hrl").
+
+-ifdef(TEST).
+-compile(export_all).
+-else.
+-export([
+    open/2,
+    open/3,
+    tree/1,
+    filename/1,
+    diff/2
+]).
+-endif.
+
+-define(TRIMMED, <<1>>).
+-define(UNWRITTEN, <<0>>).
+-define(NAIVE_ENCODE(Offset, Size, Data), <<Offset:64/unsigned-big, Size:32/unsigned-big, Data/binary>>).
+
+-define(MINIMUM_CHUNK, 1048576). %% 1024 * 1024
+-define(LEVEL_SIZE, 10).
+-define(H, sha).
+
+%% public API
+
+open(Filename, DataDir) ->
+    open(Filename, DataDir, naive).
+
+open(Filename, DataDir, Type) ->
+    Tree = load_filename(Filename, DataDir, Type),
+    {ok, #mt{ filename = Filename, tree = Tree, backend = Type}}.
+
+tree(#mt{ tree = T, backend = naive }) ->
+    case T#naive.recalc of
+         true -> build_tree(T);
+        false -> T
+    end.
+
+filename(#mt{ filename = F }) -> F.
+
+diff(#mt{backend = naive, tree = T1}, #mt{backend = naive, tree = T2}) ->
+    case T1#naive.root == T2#naive.root of
+        true -> same;
+        false -> naive_diff(T1, T2) 
+    end;
+diff(_, _) -> error(badarg).
+
+%% private
+
+% @private
+load_filename(Filename, DataDir, naive) ->
+    {Last, M} = do_load(Filename, DataDir, fun insert_csum_naive/2, []),
+    ChunkSize = max(?MINIMUM_CHUNK, Last div 100),
+    T = #naive{ leaves = lists:reverse(M), chunk_size = ChunkSize, recalc = true },
+    build_tree(T).
+
+do_load(Filename, DataDir, FoldFun, AccInit) ->
+    CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
+    {ok, T} = machi_csum_table:open(CsumFile, []),
+    Acc = machi_csum_table:foldl_chunks(FoldFun, {0, AccInit}, T),
+    ok = machi_csum_table:close(T),
+    Acc.
+
+% @private
+insert_csum_naive({Last, Size, _Csum}=In, {Last, MT}) ->
+    %% no gap
+    {Last+Size, update_acc(In, MT)};
+insert_csum_naive({Offset, Size, _Csum}=In, {Last, MT}) ->
+    Hole = Offset - Last,
+    MT0 = update_acc({Last, Hole, unwritten}, MT),
+    {Offset+Size, update_acc(In, MT0)}.
+
+% @private
+update_acc({Offset, Size, unwritten}, MT) ->
+    [ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?UNWRITTEN)} | MT ];
+update_acc({Offset, Size, trimmed}, MT) ->
+    [ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, ?TRIMMED)} | MT ];
+update_acc({Offset, Size, <<_Tag:8, Csum/binary>>}, MT) ->
+    [ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, Csum)} | MT ].
+
+build_tree(MT = #naive{ leaves = L, chunk_size = ChunkSize }) ->
+    Lvl1s = build_level_1(ChunkSize, L, 1, [ crypto:hash_init(?H) ]),
+    Mod2 = length(Lvl1s) div ?LEVEL_SIZE,
+    Lvl2s = build_int_level(Mod2, Lvl1s, 1, [ crypto:hash_init(?H) ]),
+    Mod3 = length(Lvl2s) div 2,
+    Lvl3s = build_int_level(Mod3, Lvl2s, 1, [ crypto:hash_init(?H) ]),
+    Root = build_root(Lvl3s, crypto:hash_init(?H)),
+    MT#naive{ root = Root, lvl1 = Lvl1s, lvl2 = Lvl2s, lvl3 = Lvl3s, recalc = false }.
+
+build_root([], Ctx) ->
+    crypto:hash_final(Ctx);
+build_root([H|T], Ctx) ->
+    build_root(T, crypto:hash_update(Ctx, H)).
+
+build_int_level(_Mod, [], _Cnt, [ Ctx | Rest ]) ->
+    lists:reverse( [ crypto:hash_final(Ctx) | Rest ] );
+build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) when Cnt rem Mod == 0 ->
+    NewCtx = crypto:hash_init(?H),
+    build_int_level(Mod, T, Cnt + 1, [ crypto:hash_update(NewCtx, H), crypto:hash_final(Ctx) | Rest ]);
+build_int_level(Mod, [H|T], Cnt, [ Ctx | Rest ]) ->
+    build_int_level(Mod, T, Cnt+1, [ crypto:hash_update(Ctx, H) | Rest ]).
+
+build_level_1(_Size, [], _Multiple, [ Ctx | Rest ]) ->
+    lists:reverse([ crypto:hash_final(Ctx) | Rest ]);
+build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
+                                    when ( Pos + Len ) > ( Size * Multiple ) ->
+    NewCtx = crypto:hash_init(?H),
+    build_level_1(Size, T, Multiple+1,
+                  [ crypto:hash_update(NewCtx, Hash), crypto:hash_final(Ctx) | Rest ]);
+build_level_1(Size, [{Pos, Len, Hash}|T], Multiple, [ Ctx | Rest ])
+                                    when ( Pos + Len ) =< ( Size * Multiple ) ->
+    build_level_1(Size, T, Multiple, [ crypto:hash_update(Ctx, Hash) | Rest ]).
+
+naive_diff(#naive{lvl1 = L1}, #naive{lvl1=L2, chunk_size=CS2}) ->
+    Set1 = gb_sets:from_list(lists:zip(lists:seq(1, length(L1)), L1)),
+    Set2 = gb_sets:from_list(lists:zip(lists:seq(1, length(L2)), L2)),
+
+    %% The byte ranges in list 2 that do not match in list 1
+    %% Or should we do something else?
+    [ {(X-1)*CS2, CS2, SHA} || {X, SHA} <- gb_sets:to_list(gb_sets:subtract(Set1, Set2)) ].
--- a/test/machi_merkle_tree_test.erl
+++ b/test/machi_merkle_tree_test.erl
@ -0,0 +1,198 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2007-2015 Basho Technologies, Inc.  All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License.  You may obtain
+%% a copy of the License at
+%%
+%%   http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied.  See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(machi_merkle_tree_test).
+-compile([export_all]).
+
+-include("machi_merkle_tree.hrl").
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("kernel/include/file.hrl").
+
+-define(GAP_CHANCE, 0.10).
+
+%% unit tests
+basic_test() ->
+    random:seed(os:timestamp()),
+    Fsz = choose_size() * 1024,
+    Filesize = max(Fsz, 10*1024*1024),
+    ChunkSize = max(1048576, Filesize div 100),
+    N = make_leaf_nodes(Filesize),
+    D0 = #naive{ leaves = N, chunk_size = ChunkSize, recalc = true },
+    T1 = machi_merkle_tree:build_tree(D0),
+
+    D1 = #naive{ leaves = tl(N), chunk_size = ChunkSize, recalc = true },
+    T2 = machi_merkle_tree:build_tree(D1),
+
+    ?assertNotEqual(T1#naive.root, T2#naive.root),
+    ?assertEqual(1, length(machi_merkle_tree:naive_diff(T1, T2))).
+
+
+make_leaf_nodes(Filesize) ->
+    lists:reverse(
+      lists:foldl(fun(T, Acc) -> machi_merkle_tree:update_acc(T, Acc) end, 
+                  [], 
+                  generate_offsets(Filesize, 1024, []))
+     ).
+
+choose_int(Factor) ->
+    random:uniform(1024*Factor).
+
+small_int() ->
+    choose_int(10).
+
+medium_int() ->
+    choose_int(1024).
+
+large_int() ->
+    choose_int(4096).
+
+generate_offsets(Filesize, Current, Acc) when Current < Filesize ->
+    Length0 = choose_size(),
+
+    Length = case Length0 + Current > Filesize of
+                 false -> Length0;
+                  true -> Filesize - Current
+    end,
+    Data = term_to_binary(os:timestamp()),
+    Checksum = machi_util:make_tagged_csum(client_sha, machi_util:checksum_chunk(Data)),
+    Gap = maybe_gap(random:uniform()),
+    generate_offsets(Filesize, Current + Length + Gap, [ {Current, Length, Checksum} | Acc ]);
+generate_offsets(_Filesize, _Current, Acc) ->
+    lists:reverse(Acc).
+
+
+random_from_list(L) ->
+    N = random:uniform(length(L)),
+    lists:nth(N, L).
+
+choose_size() ->
+    F = random_from_list([fun small_int/0, fun medium_int/0, fun large_int/0]),
+    F().
+
+maybe_gap(Chance) when Chance < ?GAP_CHANCE ->
+    choose_size();
+maybe_gap(_) -> 0.
+
+%% Define or remove these ifdefs if benchmarking is desired.
+-ifdef(BENCH).
+generate_offsets(FH, Filesize, Current, Acc) when Current < Filesize ->
+    Length0 = choose_size(),
+
+    Length = case Length0 + Current > Filesize of
+                 false -> Length0;
+                  true -> Filesize - Current
+    end,
+    {ok, Data} = file:pread(FH, Current, Length),
+    Checksum = machi_util:make_tagged_csum(client_sha, machi_util:checksum_chunk(Data)),
+    Gap = maybe_gap(random:uniform()),
+    generate_offsets(FH, Filesize, Current + Length + Gap, [ {Current, Length, Checksum} | Acc ]);
+generate_offsets(_FH, _Filesize, _Current, Acc) ->
+    lists:reverse(Acc).
+
+make_offsets_from_file(Filename) ->
+    {ok, Info} = file:read_file_info(Filename),
+    Filesize = Info#file_info.size,
+    {ok, FH} = file:open(Filename, [read, raw, binary]),
+    Offsets = generate_offsets(FH, Filesize, 1024, []),
+    file:close(FH),
+    Offsets.
+
+choose_filename() ->
+    random_from_list([
+        "def^c5ea7511-d649-47d6-a8c3-2b619379c237^1",
+        "jkl^b077eff7-b2be-4773-a73f-fea4acb8a732^1",
+        "stu^553fa47a-157c-4fac-b10f-2252c7d8c37a^1",
+        "vwx^ae015d68-7689-4c9f-9677-926c6664f513^1",
+        "yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1"
+                     ]).
+
+
+make_csum_file(DataDir, Filename, Offsets) ->
+    Path = machi_util:make_checksum_filename(DataDir, Filename),
+    filelib:ensure_dir(Path),
+    {ok, MC} = machi_csum_table:open(Path, []),
+    lists:foreach(fun({Offset, Size, Checksum}) -> 
+                    machi_csum_table:write(MC, Offset, Size, Checksum) end,
+                  Offsets),
+    machi_csum_table:close(MC).
+
+
+test() -> 
+    test(100).
+
+test(N) ->
+    {ok, F} = file:open("results.txt", [raw, write]),
+    lists:foreach(fun(X) -> format_and_store(F, run_test(X)) end, lists:seq(1, N)).
+
+format_and_store(F, {OffsetNum, {MTime, MSize}, {NTime, NSize}}) ->
+    S = io_lib:format("~w\t~w\t~w\t~w\t~w\n", [OffsetNum, MTime, MSize, NTime, NSize]),
+    ok = file:write(F, S).
+
+run_test(C) ->
+    random:seed(os:timestamp()),
+    OffsetFn = "test/" ++ choose_filename(),
+    O = make_offsets_from_file(OffsetFn),
+    Fn = "csum_" ++ integer_to_list(C),
+    make_csum_file(".", Fn, O),
+
+    Osize = length(O),
+
+    {MTime, {ok, M}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", merklet) end),
+    {NTime, {ok, N}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", naive) end),
+
+    ?assertEqual(Fn, machi_merkle_tree:filename(M)),
+    ?assertEqual(Fn, machi_merkle_tree:filename(N)),
+
+    MTree = machi_merkle_tree:tree(M),
+    MSize = byte_size(term_to_binary(MTree)),
+
+    NTree = machi_merkle_tree:tree(N),
+    NSize = byte_size(term_to_binary(NTree)),
+
+    ?assertEqual(same, machi_merkle_tree:diff(N, N)),
+    ?assertEqual(same, machi_merkle_tree:diff(M, M)),
+    {Osize, {MTime, MSize}, {NTime, NSize}}.
+
+torture_test(C) ->
+    Results = [ run_torture_test() || _ <- lists:seq(1, C) ],
+    {ok, F} = file:open("torture_results.txt", [raw, write]),
+    lists:foreach(fun({MSize, MTime, NSize, NTime}) ->
+                      file:write(F, io_lib:format("~p\t~p\t~p\t~p\n",
+                                                [MSize, MTime, NSize, NTime]))
+                  end, Results),
+    ok = file:close(F).
+
+run_torture_test() ->
+    {NTime, N} = timer:tc(fun() -> naive_torture() end), 
+
+    MSize = byte_size(term_to_binary(M)),
+    NSize = byte_size(term_to_binary(N)),
+
+    {MSize, MTime, NSize, NTime}.
+
+naive_torture() ->
+    N = lists:foldl(fun(T, Acc) -> machi_merkle_tree:update_acc(T, Acc) end, [], torture_generator()),
+    T = #naive{ leaves = lists:reverse(N), chunk_size = 10010, recalc = true },
+    machi_merkle_tree:build_tree(T).
+
+torture_generator() ->
+    [ {O, 1, crypto:hash(sha, term_to_binary(now()))} || O <- lists:seq(1024, 1000000) ].
+-endif. % BENCH