If I have a single test file @ 400MB:

dd if=/dev/random of=test/foo-data-1 bs=1m count=400

And if I also remove the "config" dir that stores all of the .csum
files.

Then when I run `machi_merkle_tree_test:test(5)`, I see this output:

    machi_csum_table:open(./config/csum_1.csum) -> 2375 usec
    machi_csum_table:open(./config/csum_1.csum) -> 789 usec
    build_tree leaves = 1261 by pid <0.39.0>
    build_tree(size = 1261) -> 602 usec
    Osize 457 Osize2 1261
    machi_csum_table:open(./config/csum_2.csum) -> 662 usec
    machi_csum_table:open(./config/csum_2.csum) -> 801 usec
    build_tree leaves = 1259 by pid <0.39.0>
    build_tree(size = 1259) -> 566 usec
    Osize 479 Osize2 1259
    machi_csum_table:open(./config/csum_3.csum) -> 685 usec
    machi_csum_table:open(./config/csum_3.csum) -> 811 usec
    build_tree leaves = 1217 by pid <0.39.0>
    build_tree(size = 1217) -> 557 usec
    Osize 472 Osize2 1217
    machi_csum_table:open(./config/csum_4.csum) -> 941 usec
    machi_csum_table:open(./config/csum_4.csum) -> 740 usec
    build_tree leaves = 1134 by pid <0.39.0>
    build_tree(size = 1134) -> 559 usec
    Osize 395 Osize2 1134
    machi_csum_table:open(./config/csum_5.csum) -> 2541 usec
    machi_csum_table:open(./config/csum_5.csum) -> 725 usec
    build_tree leaves = 1284 by pid <0.39.0>
    build_tree(size = 1284) -> 572 usec
    Osize 425 Osize2 1284
    ok

... and I see this in "results.txt":

    473	32517	48778	1322	27788
    424	29114	43283	1155	25188
    452	27120	46868	1346	26828
    462	28497	47198	1398	27322
    424	29915	44685	1326	25596
This commit is contained in:
Scott Lystig Fritchie 2015-10-29 17:25:36 +09:00
parent 7086899941
commit 715d1b5ee4
3 changed files with 24 additions and 16 deletions

View file

@ -79,10 +79,12 @@ open(CSumFilename, _Opts) ->
list({machi_dt:file_offset(),
machi_dt:file_size(),
machi_dt:chunk_csum()|trimmed}).
find(#machi_csum_table{table=T}, Offset, Size) ->
ets:select(T, [{{'$1', '$2', '$3'},
[inclusion_match_spec(Offset, Size)],
['$_']}]).
find(_, _, _) ->
[].
%% find(#machi_csum_table{table=T}, Offset, Size) ->
%% ets:select(T, [{{'$1', '$2', '$3'},
%% [inclusion_match_spec(Offset, Size)],
%% ['$_']}]).
-ifdef(TEST).
all(#machi_csum_table{table=T}) ->

View file

@ -121,11 +121,15 @@ load_filename(Filename, DataDir, naive) ->
{Last, M} = do_load(Filename, DataDir, fun insert_csum_naive/2, []),
ChunkSize = max(?MINIMUM_CHUNK, Last div 100),
T = #naive{ leaves = lists:reverse(M), chunk_size = ChunkSize, recalc = true },
build_tree(T).
%% io:format(user, "~p is going to call build_tree() in 15 seconds\n", [self()]), timer:sleep(15*1000),
{USec, Res} = timer:tc(fun() -> build_tree(T) end),
io:format(user, "build_tree(size = ~p) -> ~p usec\n", [length(M), USec]),
Res.
do_load(Filename, DataDir, FoldFun, AccInit) ->
CsumFile = machi_util:make_checksum_filename(DataDir, Filename),
{ok, T} = machi_csum_table:open(CsumFile, []),
{USec, {ok, T}} = timer:tc(fun() -> machi_csum_table:open(CsumFile, []) end),
io:format(user, "machi_csum_table:open(~s) -> ~p usec\n", [CsumFile, USec]),
Acc = machi_csum_table:foldl_chunks(FoldFun, {0, AccInit}, T),
ok = machi_csum_table:close(T),
Acc.
@ -165,17 +169,18 @@ update_acc({Offset, Size, Csum}, MT) ->
[ {Offset, Size, ?NAIVE_ENCODE(Offset, Size, Csum)} | MT ].
build_tree(MT = #naive{ leaves = L, chunk_size = ChunkSize }) ->
lager:debug("Leaves: ~p~n", [L]),
io:format(user, "build_tree leaves = ~p by pid ~p\n", [length(L), self()]),
%%lager:debug("Leaves: ~p~n", [L]),
Lvl1s = build_level_1(ChunkSize, L, 1, [ crypto:hash_init(?H) ]),
lager:debug("Lvl1: ~p~n", [Lvl1s]),
%%lager:debug("Lvl1: ~p~n", [Lvl1s]),
Mod2 = length(Lvl1s) div ?LEVEL_SIZE,
Lvl2s = build_int_level(Mod2, Lvl1s, 1, [ crypto:hash_init(?H) ]),
lager:debug("Lvl2: ~p~n", [Lvl2s]),
%%lager:debug("Lvl2: ~p~n", [Lvl2s]),
Mod3 = length(Lvl2s) div 2,
Lvl3s = build_int_level(Mod3, Lvl2s, 1, [ crypto:hash_init(?H) ]),
lager:debug("Lvl3: ~p~n", [Lvl3s]),
%%lager:debug("Lvl3: ~p~n", [Lvl3s]),
Root = build_root(Lvl3s, crypto:hash_init(?H)),
lager:debug("Root: ~p~n", [Root]),
%%lager:debug("Root: ~p~n", [Root]),
MT#naive{ root = Root, lvl1 = Lvl1s, lvl2 = Lvl2s, lvl3 = Lvl3s, recalc = false }.
build_root([], Ctx) ->

View file

@ -29,11 +29,9 @@
choose_filename() ->
random_from_list([
"def^c5ea7511-d649-47d6-a8c3-2b619379c237^1",
"jkl^b077eff7-b2be-4773-a73f-fea4acb8a732^1",
"stu^553fa47a-157c-4fac-b10f-2252c7d8c37a^1",
"vwx^ae015d68-7689-4c9f-9677-926c6664f513^1",
"yza^4c784dc2-19bf-4ac6-91f6-58bbe5aa88e0^1"
%% Created by running:
%% dd if=/dev/random of=test/foo-data-1 bs=1m count=400
"foo-data-1"
]).
@ -114,6 +112,9 @@ run_test(C) ->
{MTime, {ok, M}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", merklet) end),
{NTime, {ok, N}} = timer:tc(fun() -> machi_merkle_tree:open(Fn, ".", naive) end),
Osize2 = length(element(8,element(3, N))),
io:format(user, "Osize ~p Osize2 ~p\n", [Osize, Osize2]),
%% io:format(user, "Osize ~p N ~P\n", [Osize, N, 10]),
?assertEqual(Fn, machi_merkle_tree:filename(M)),
?assertEqual(Fn, machi_merkle_tree:filename(N)),