From 715d1b5ee46bc10c2bd4c8943147440a56982ce5 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Thu, 29 Oct 2015 17:25:36 +0900 Subject: [PATCH] If I have a single test file @ 400MB: dd if=/dev/random of=test/foo-data-1 bs=1m count=400 And if I also remove the "config" dir that stores all of the .csum files. Then when I run `machi_merkle_tree_test:test(5)`, I see this output: machi_csum_table:open(./config/csum_1.csum) -> 2375 usec machi_csum_table:open(./config/csum_1.csum) -> 789 usec build_tree leaves = 1261 by pid <0.39.0> build_tree(size = 1261) -> 602 usec Osize 457 Osize2 1261 machi_csum_table:open(./config/csum_2.csum) -> 662 usec machi_csum_table:open(./config/csum_2.csum) -> 801 usec build_tree leaves = 1259 by pid <0.39.0> build_tree(size = 1259) -> 566 usec Osize 479 Osize2 1259 machi_csum_table:open(./config/csum_3.csum) -> 685 usec machi_csum_table:open(./config/csum_3.csum) -> 811 usec build_tree leaves = 1217 by pid <0.39.0> build_tree(size = 1217) -> 557 usec Osize 472 Osize2 1217 machi_csum_table:open(./config/csum_4.csum) -> 941 usec machi_csum_table:open(./config/csum_4.csum) -> 740 usec build_tree leaves = 1134 by pid <0.39.0> build_tree(size = 1134) -> 559 usec Osize 395 Osize2 1134 machi_csum_table:open(./config/csum_5.csum) -> 2541 usec machi_csum_table:open(./config/csum_5.csum) -> 725 usec build_tree leaves = 1284 by pid <0.39.0> build_tree(size = 1284) -> 572 usec Osize 425 Osize2 1284 ok ... and I see this in "results.txt": 473 32517 48778 1322 27788 424 29114 43283 1155 25188 452 27120 46868 1346 26828 462 28497 47198 1398 27322 424 29915 44685 1326 25596 --- src/machi_csum_table.erl | 10 ++++++---- src/machi_merkle_tree.erl | 19 ++++++++++++------- test/machi_merkle_tree_test.erl | 11 ++++++----- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/machi_csum_table.erl b/src/machi_csum_table.erl index 80f1765..c11fd95 100644 --- a/src/machi_csum_table.erl +++ b/src/machi_csum_table.erl @@ -79,10 +79,12 @@ open(CSumFilename, _Opts) -> list({machi_dt:file_offset(), machi_dt:file_size(), machi_dt:chunk_csum()|trimmed}). -find(#machi_csum_table{table=T}, Offset, Size) -> - ets:select(T, [{{'$1', '$2', '$3'}, - [inclusion_match_spec(Offset, Size)], - ['$_']}]). +find(_, _, _) -> + []. +%% find(#machi_csum_table{table=T}, Offset, Size) -> +%% ets:select(T, [{{'$1', '$2', '$3'}, +%% [inclusion_match_spec(Offset, Size)], +%% ['$_']}]). -ifdef(TEST). all(#machi_csum_table{table=T}) -> diff --git a/src/machi_merkle_tree.erl b/src/machi_merkle_tree.erl index a799edc..9cce444 100644 --- a/src/machi_merkle_tree.erl +++ b/src/machi_merkle_tree.erl @@ -121,11 +121,15 @@ load_filename(Filename, DataDir, naive) -> {Last, M} = do_load(Filename, DataDir, fun insert_csum_naive/2, []), ChunkSize = max(?MINIMUM_CHUNK, Last div 100), T = #naive{ leaves = lists:reverse(M), chunk_size = ChunkSize, recalc = true }, - build_tree(T). + %% io:format(user, "~p is going to call build_tree() in 15 seconds\n", [self()]), timer:sleep(15*1000), + {USec, Res} = timer:tc(fun() -> build_tree(T) end), + io:format(user, "build_tree(size = ~p) -> ~p usec\n", [length(M), USec]), + Res. do_load(Filename, DataDir, FoldFun, AccInit) -> CsumFile = machi_util:make_checksum_filename(DataDir, Filename), - {ok, T} = machi_csum_table:open(CsumFile, []), + {USec, {ok, T}} = timer:tc(fun() -> machi_csum_table:open(CsumFile, []) end), + io:format(user, "machi_csum_table:open(~s) -> ~p usec\n", [CsumFile, USec]), Acc = machi_csum_table:foldl_chunks(FoldFun, {0, AccInit}, T), ok = machi_csum_table:close(T), Acc. @@ -165,17 +169,18 @@ update_acc({Offset, Size, Csum}, MT) -> [ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, Csum)} | MT ]. build_tree(MT = #naive{ leaves = L, chunk_size = ChunkSize }) -> - lager:debug("Leaves: ~p~n", [L]), +io:format(user, "build_tree leaves = ~p by pid ~p\n", [length(L), self()]), + %%lager:debug("Leaves: ~p~n", [L]), Lvl1s = build_level_1(ChunkSize, L, 1, [ crypto:hash_init(?H) ]), - lager:debug("Lvl1: ~p~n", [Lvl1s]), + %%lager:debug("Lvl1: ~p~n", [Lvl1s]), Mod2 = length(Lvl1s) div ?LEVEL_SIZE, Lvl2s = build_int_level(Mod2, Lvl1s, 1, [ crypto:hash_init(?H) ]), - lager:debug("Lvl2: ~p~n", [Lvl2s]), + %%lager:debug("Lvl2: ~p~n", [Lvl2s]), Mod3 = length(Lvl2s) div 2, Lvl3s = build_int_level(Mod3, Lvl2s, 1, [ crypto:hash_init(?H) ]), - lager:debug("Lvl3: ~p~n", [Lvl3s]), + %%lager:debug("Lvl3: ~p~n", [Lvl3s]), Root = build_root(Lvl3s, crypto:hash_init(?H)), - lager:debug("Root: ~p~n", [Root]), + %%lager:debug("Root: ~p~n", [Root]), MT#naive{ root = Root, lvl1 = Lvl1s, lvl2 = Lvl2s, lvl3 = Lvl3s, recalc = false }. build_root([], Ctx) -> diff --git a/test/machi_merkle_tree_test.erl b/test/machi_merkle_tree_test.erl index 382c262..8a548ed 100644 --- a/test/machi_merkle_tree_test.erl +++ b/test/machi_merkle_tree_test.erl @@ -29,11 +29,9 @@ choose_filename() -> random_from_list([ - "def^c5ea7511-d649-47d6-a8c3-2b619379c237^1", - "jkl^b077eff7-b2be-4773-a73f-fea4acb8a732^1", - "stu^553fa47a-157c-4fac-b10f-2252c7d8c37a^1", - "vwx^ae015d68-7689-4c9f-9677-926c6664f513^1", - "yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1" + %% Created by running: + %% dd if=/dev/random of=test/foo-data-1 bs=1m count=400 + "foo-data-1" ]). @@ -114,6 +112,9 @@ run_test(C) -> {MTime, {ok, M}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", merklet) end), {NTime, {ok, N}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", naive) end), + Osize2 = length(element(8,element(3, N))), + io:format(user, "Osize ~p Osize2 ~p\n", [Osize, Osize2]), +%% io:format(user, "Osize ~p N ~P\n", [Osize, N, 10]), ?assertEqual(Fn, machi_merkle_tree:filename(M)), ?assertEqual(Fn, machi_merkle_tree:filename(N)),