Implement compression + block size
option {compression, none|gzip|snappy} ... except right now using snappy is broken, it seems that it causes bloom filters to crash. Needs investigation. option {block_size, 32768} ... writes data to disk in chunks of ~32k.
This commit is contained in:
parent
14ef03e06a
commit
d37b227936
8 changed files with 87 additions and 45 deletions
9
TODO
9
TODO
|
@ -1,10 +1,9 @@
|
||||||
* lsm_btree
|
* hanoi (in order of priority)
|
||||||
* [2i] secondary index support
|
* [2i] secondary index support
|
||||||
* atomic multi-commit/recovery
|
* atomic multi-commit/recovery
|
||||||
* add checkpoint/1 and sync/1 - flush pending writes to stable storage
|
* add checkpoint/1 and sync/1 - flush pending writes to stable storage
|
||||||
(nursery:finish() and finish/flush any merges)
|
(nursery:finish() and finish/flush any merges)
|
||||||
* [config] add config parameters on open
|
* [config] add config parameters on open
|
||||||
* {sync, boolean()} fdsync or not on write
|
|
||||||
* {cache, bytes(), name} share max(bytes) cache named 'name' via etc
|
* {cache, bytes(), name} share max(bytes) cache named 'name' via etc
|
||||||
* [stats] statistics
|
* [stats] statistics
|
||||||
* For each level {#merges, {merge-time-min, max, average}}
|
* For each level {#merges, {merge-time-min, max, average}}
|
||||||
|
@ -28,10 +27,10 @@
|
||||||
|
|
||||||
|
|
||||||
PHASE 2:
|
PHASE 2:
|
||||||
* lsm_btree
|
* hanoi
|
||||||
* Define a standard struct which is the metadata added at the end of the
|
* Define a standard struct which is the metadata added at the end of the
|
||||||
file, e.g. [btree-nodes] [meta-data] [offset of meta-data]. This is written
|
file, e.g. [btree-nodes] [meta-data] [offset of meta-data]. This is written
|
||||||
in lsm_btree_writer:flush_nodes, and read in lsm_btree_reader:open2.
|
in hanoi_writer:flush_nodes, and read in hanoi_reader:open2.
|
||||||
* [feature] compression, encryption on disk
|
* [feature] compression, encryption on disk
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,7 +41,7 @@ REVIEW LITERATURE AND OTHER SIMILAR IMPLEMENTATAIONS:
|
||||||
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.44.2782&rep=rep1&type=pdf
|
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.44.2782&rep=rep1&type=pdf
|
||||||
|
|
||||||
|
|
||||||
1: make the "first level" have more thatn 2^5 entries (controlled by the constant TOP_LEVEL in lsm_btree.hrl); this means a new set of files is opened/closed/merged for every 32 insert/updates/deletes. Setting this higher will just make the nursery correspondingly larger, which should be absolutely fine.
|
1: make the "first level" have more thatn 2^5 entries (controlled by the constant TOP_LEVEL in hanoi.hrl); this means a new set of files is opened/closed/merged for every 32 insert/updates/deletes. Setting this higher will just make the nursery correspondingly larger, which should be absolutely fine.
|
||||||
|
|
||||||
2: Right now, the streaming btree writer emits a btree page based on number of elements. This could be changed to be based on the size of the node (say, some block-size boudary) and then add padding at the end so that each node read becomes a clean block transfer. Right now, we're probably taking way to many reads.
|
2: Right now, the streaming btree writer emits a btree page based on number of elements. This could be changed to be based on the size of the node (say, some block-size boudary) and then add padding at the end so that each node read becomes a clean block transfer. Right now, we're probably taking way to many reads.
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
{erl_opts, [debug_info,{d,'TRIQ',true}]}.
|
{erl_opts, [debug_info,{d,'TRIQ',true}]}.
|
||||||
|
|
||||||
{deps, [
|
{deps, [
|
||||||
|
{snappy, "1.0.*", {git, "git://github.com/fdmanana/snappy-erlang-nif.git", {branch, "master"}}},
|
||||||
{plain_fsm, "1.1.*", {git, "git://github.com/uwiger/plain_fsm", {branch, "master"}}},
|
{plain_fsm, "1.1.*", {git, "git://github.com/uwiger/plain_fsm", {branch, "master"}}},
|
||||||
{ebloom, "1.1.*", {git, "git://github.com/basho/ebloom.git", {branch, "master"}}},
|
{ebloom, "1.1.*", {git, "git://github.com/basho/ebloom.git", {branch, "master"}}},
|
||||||
{basho_bench, ".*", {git, "git://github.com/basho/basho_bench.git", {branch, "master"}}},
|
{basho_bench, ".*", {git, "git://github.com/basho/basho_bench.git", {branch, "master"}}},
|
||||||
|
|
|
@ -31,10 +31,12 @@
|
||||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
||||||
terminate/2, code_change/3]).
|
terminate/2, code_change/3]).
|
||||||
|
|
||||||
-export([open/1, close/1, get/2, lookup/2, delete/2, put/3,
|
-export([open/1, open/2, close/1, get/2, lookup/2, delete/2, put/3,
|
||||||
async_fold/3, async_fold_range/4,
|
async_fold/3, async_fold_range/4,
|
||||||
fold/3, fold_range/4]).
|
fold/3, fold_range/4]).
|
||||||
|
|
||||||
|
-export([get_opt/2, get_opt/3]).
|
||||||
|
|
||||||
-include("hanoi.hrl").
|
-include("hanoi.hrl").
|
||||||
-include_lib("kernel/include/file.hrl").
|
-include_lib("kernel/include/file.hrl").
|
||||||
-include_lib("include/hanoi.hrl").
|
-include_lib("include/hanoi.hrl").
|
||||||
|
@ -194,12 +196,12 @@ async_receive_fold_range(PID,Fun,Acc0,Ref,Range) ->
|
||||||
init([Dir, Opts]) ->
|
init([Dir, Opts]) ->
|
||||||
case file:read_file_info(Dir) of
|
case file:read_file_info(Dir) of
|
||||||
{ok, #file_info{ type=directory }} ->
|
{ok, #file_info{ type=directory }} ->
|
||||||
{ok, TopLevel} = open_levels(Dir),
|
{ok, TopLevel} = open_levels(Dir,Opts),
|
||||||
{ok, Nursery} = hanoi_nursery:recover(Dir, TopLevel);
|
{ok, Nursery} = hanoi_nursery:recover(Dir, TopLevel);
|
||||||
|
|
||||||
{error, E} when E =:= enoent ->
|
{error, E} when E =:= enoent ->
|
||||||
ok = file:make_dir(Dir),
|
ok = file:make_dir(Dir),
|
||||||
{ok, TopLevel} = hanoi_level:open(Dir, ?TOP_LEVEL, undefined),
|
{ok, TopLevel} = hanoi_level:open(Dir, ?TOP_LEVEL, undefined, Opts),
|
||||||
{ok, Nursery} = hanoi_nursery:new(Dir)
|
{ok, Nursery} = hanoi_nursery:new(Dir)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
|
@ -207,7 +209,7 @@ init([Dir, Opts]) ->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
open_levels(Dir) ->
|
open_levels(Dir,Options) ->
|
||||||
{ok, Files} = file:list_dir(Dir),
|
{ok, Files} = file:list_dir(Dir),
|
||||||
|
|
||||||
%% parse file names and find max level
|
%% parse file names and find max level
|
||||||
|
@ -231,7 +233,7 @@ open_levels(Dir) ->
|
||||||
|
|
||||||
TopLevel =
|
TopLevel =
|
||||||
lists:foldl( fun(LevelNo, Prev) ->
|
lists:foldl( fun(LevelNo, Prev) ->
|
||||||
{ok, Level} = hanoi_level:open(Dir,LevelNo,Prev),
|
{ok, Level} = hanoi_level:open(Dir,LevelNo,Prev,Options),
|
||||||
Level
|
Level
|
||||||
end,
|
end,
|
||||||
undefined,
|
undefined,
|
||||||
|
@ -334,11 +336,14 @@ start_app() ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
get_opt(Key, Opts) ->
|
get_opt(Key, Opts) ->
|
||||||
|
get_opt(Key, Opts, undefined).
|
||||||
|
|
||||||
|
get_opt(Key, Opts, Default) ->
|
||||||
case proplists:get_value(Key, Opts) of
|
case proplists:get_value(Key, Opts) of
|
||||||
undefined ->
|
undefined ->
|
||||||
case application:get_env(?MODULE, Key) of
|
case application:get_env(?MODULE, Key) of
|
||||||
{ok, Value} -> Value;
|
{ok, Value} -> Value;
|
||||||
undefined -> undefined
|
undefined -> Default
|
||||||
end;
|
end;
|
||||||
Value ->
|
Value ->
|
||||||
Value
|
Value
|
||||||
|
|
|
@ -42,22 +42,23 @@
|
||||||
-behavior(plain_fsm).
|
-behavior(plain_fsm).
|
||||||
-export([data_vsn/0, code_change/3]).
|
-export([data_vsn/0, code_change/3]).
|
||||||
|
|
||||||
-export([open/3, lookup/2, inject/2, close/1, snapshot_range/3, blocking_range/3, incremental_merge/2]).
|
-export([open/4, lookup/2, inject/2, close/1, snapshot_range/3, blocking_range/3, incremental_merge/2]).
|
||||||
|
|
||||||
-include_lib("kernel/include/file.hrl").
|
-include_lib("kernel/include/file.hrl").
|
||||||
|
|
||||||
-record(state, {
|
-record(state, {
|
||||||
a, b, c, next, dir, level, inject_done_ref, merge_pid, folding = [],
|
a, b, c, next, dir, level, inject_done_ref, merge_pid, folding = [],
|
||||||
step_next_ref, step_caller, step_merge_ref
|
step_next_ref, step_caller, step_merge_ref,
|
||||||
|
opts = []
|
||||||
}).
|
}).
|
||||||
|
|
||||||
%%%%% PUBLIC OPERATIONS
|
%%%%% PUBLIC OPERATIONS
|
||||||
|
|
||||||
open(Dir,Level,Next) when Level>0 ->
|
open(Dir,Level,Next,Opts) when Level>0 ->
|
||||||
PID = plain_fsm:spawn_link(?MODULE,
|
PID = plain_fsm:spawn_link(?MODULE,
|
||||||
fun() ->
|
fun() ->
|
||||||
process_flag(trap_exit,true),
|
process_flag(trap_exit,true),
|
||||||
initialize(#state{dir=Dir,level=Level,next=Next})
|
initialize(#state{dir=Dir,level=Level,next=Next,opts=Opts})
|
||||||
end),
|
end),
|
||||||
{ok, PID}.
|
{ok, PID}.
|
||||||
|
|
||||||
|
@ -465,7 +466,8 @@ main_loop(State = #state{ next=Next }) ->
|
||||||
{merge_done, _, OutFileName} ->
|
{merge_done, _, OutFileName} ->
|
||||||
State1 =
|
State1 =
|
||||||
if Next =:= undefined ->
|
if Next =:= undefined ->
|
||||||
{ok, PID} = ?MODULE:open(State#state.dir, State#state.level + 1, undefined),
|
{ok, PID} = ?MODULE:open(State#state.dir, State#state.level + 1, undefined,
|
||||||
|
State#state.opts ),
|
||||||
State#state{ next=PID };
|
State#state{ next=PID };
|
||||||
true ->
|
true ->
|
||||||
State
|
State
|
||||||
|
@ -567,12 +569,20 @@ begin_merge(State) ->
|
||||||
file:delete(XFileName),
|
file:delete(XFileName),
|
||||||
|
|
||||||
MergePID = proc_lib:spawn_link(fun() ->
|
MergePID = proc_lib:spawn_link(fun() ->
|
||||||
|
try
|
||||||
{ok, OutCount} = hanoi_merger:merge(AFileName, BFileName, XFileName,
|
{ok, OutCount} = hanoi_merger:merge(AFileName, BFileName, XFileName,
|
||||||
?BTREE_SIZE(State#state.level + 1),
|
?BTREE_SIZE(State#state.level + 1),
|
||||||
State#state.next =:= undefined),
|
State#state.next =:= undefined,
|
||||||
|
State#state.opts ),
|
||||||
% error_logger:info_msg("merge done ~p,~p -> ~p~n", [AFileName, BFileName, XFileName]),
|
% error_logger:info_msg("merge done ~p,~p -> ~p~n", [AFileName, BFileName, XFileName]),
|
||||||
|
|
||||||
Owner ! {merge_done, OutCount, XFileName}
|
Owner ! {merge_done, OutCount, XFileName}
|
||||||
|
catch
|
||||||
|
C:E ->
|
||||||
|
error_logger:error_msg("merge failed ~p:~p ~p~n",
|
||||||
|
[C,E,erlang:get_stacktrace()]),
|
||||||
|
erlang:raise(C,E,erlang:get_stacktrace())
|
||||||
|
end
|
||||||
end),
|
end),
|
||||||
|
|
||||||
{ok, MergePID}.
|
{ok, MergePID}.
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
%% Merging two BTrees
|
%% Merging two BTrees
|
||||||
%%
|
%%
|
||||||
|
|
||||||
-export([merge/5]).
|
-export([merge/6]).
|
||||||
|
|
||||||
-include("hanoi.hrl").
|
-include("hanoi.hrl").
|
||||||
|
|
||||||
|
@ -40,14 +40,14 @@
|
||||||
%%
|
%%
|
||||||
-define(LOCAL_WRITER, true).
|
-define(LOCAL_WRITER, true).
|
||||||
|
|
||||||
merge(A,B,C, Size, IsLastLevel) ->
|
merge(A,B,C, Size, IsLastLevel, Options) ->
|
||||||
{ok, BT1} = hanoi_reader:open(A, sequential),
|
{ok, BT1} = hanoi_reader:open(A, sequential),
|
||||||
{ok, BT2} = hanoi_reader:open(B, sequential),
|
{ok, BT2} = hanoi_reader:open(B, sequential),
|
||||||
case ?LOCAL_WRITER of
|
case ?LOCAL_WRITER of
|
||||||
true ->
|
true ->
|
||||||
{ok, Out} = hanoi_writer:init([C, Size]);
|
{ok, Out} = hanoi_writer:init([C, [{size,Size} | Options]]);
|
||||||
false ->
|
false ->
|
||||||
{ok, Out} = hanoi_writer:open(C, Size)
|
{ok, Out} = hanoi_writer:open(C, [{size,Size} | Options])
|
||||||
end,
|
end,
|
||||||
|
|
||||||
{node, AKVs} = hanoi_reader:first_node(BT1),
|
{node, AKVs} = hanoi_reader:first_node(BT1),
|
||||||
|
@ -79,11 +79,8 @@ scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {0, FromPID}) ->
|
||||||
PID ! {Ref, step_done}
|
PID ! {Ref, step_done}
|
||||||
end,
|
end,
|
||||||
|
|
||||||
% error_logger:info_msg("waiting for step in ~p~n", [self()]),
|
|
||||||
|
|
||||||
receive
|
receive
|
||||||
{step, From, HowMany} ->
|
{step, From, HowMany} ->
|
||||||
% error_logger:info_msg("got step ~p,~p in ~p~n", [From,HowMany, self()]),
|
|
||||||
scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {HowMany, From})
|
scan(BT1, BT2, Out, IsLastLevel, AKVs, BKVs, Count, {HowMany, From})
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
|
@ -161,7 +161,8 @@ finish(#nursery{ dir=Dir, cache=Cache, log_file=LogFile, total_size=_TotalSize,
|
||||||
N when N>0 ->
|
N when N>0 ->
|
||||||
%% next, flush cache to a new BTree
|
%% next, flush cache to a new BTree
|
||||||
BTreeFileName = filename:join(Dir, "nursery.data"),
|
BTreeFileName = filename:join(Dir, "nursery.data"),
|
||||||
{ok, BT} = hanoi_writer:open(BTreeFileName, ?BTREE_SIZE(?TOP_LEVEL)),
|
{ok, BT} = hanoi_writer:open(BTreeFileName, [{size,?BTREE_SIZE(?TOP_LEVEL)},
|
||||||
|
{compress, none}]),
|
||||||
try
|
try
|
||||||
lists:foreach( fun({Key,Value}) ->
|
lists:foreach( fun({Key,Value}) ->
|
||||||
ok = hanoi_writer:add(BT, Key, Value)
|
ok = hanoi_writer:add(BT, Key, Value)
|
||||||
|
|
|
@ -45,16 +45,40 @@ estimate_node_size_increment(_KVList,Key,Value) ->
|
||||||
13
|
13
|
||||||
end.
|
end.
|
||||||
|
|
||||||
encode_index_node(Level, KVList) ->
|
-define(NO_COMPRESSION, 0).
|
||||||
Data = %zlib:zip(
|
-define(SNAPPY_COMPRESSION, 1).
|
||||||
erlang:term_to_binary(KVList)
|
-define(GZIP_COMPRESSION, 2).
|
||||||
% )
|
|
||||||
,
|
|
||||||
Size = byte_size(Data)+2,
|
|
||||||
{ok, Size+4, [ <<Size:32/unsigned, Level:16/unsigned>> | Data ] }.
|
|
||||||
|
|
||||||
decode_index_node(Level, <<Data/binary>>) ->
|
encode_index_node(Level, KVList, Compress) ->
|
||||||
KVList = erlang:binary_to_term(Data), %zlib:unzip(Data)),
|
|
||||||
|
TermData = erlang:term_to_binary(KVList),
|
||||||
|
|
||||||
|
case Compress of
|
||||||
|
snappy ->
|
||||||
|
{ok, Snappied} = snappy:compress(TermData),
|
||||||
|
CompressedData = [?SNAPPY_COMPRESSION|Snappied];
|
||||||
|
gzip ->
|
||||||
|
CompressedData = [?GZIP_COMPRESSION|zlib:gzip(TermData)];
|
||||||
|
_ ->
|
||||||
|
CompressedData = [?NO_COMPRESSION|TermData]
|
||||||
|
end,
|
||||||
|
|
||||||
|
Size = erlang:iolist_size(CompressedData),
|
||||||
|
|
||||||
|
{ok, Size+6, [ <<(Size+2):32/unsigned, Level:16/unsigned>> | CompressedData ] }.
|
||||||
|
|
||||||
|
decode_index_node(Level, <<Tag, Data/binary>>) ->
|
||||||
|
|
||||||
|
case Tag of
|
||||||
|
?NO_COMPRESSION ->
|
||||||
|
TermData = Data;
|
||||||
|
?SNAPPY_COMPRESSION ->
|
||||||
|
{ok, TermData} = snappy:decompress(Data);
|
||||||
|
?GZIP_COMPRESSION ->
|
||||||
|
TermData = zlib:gunzip(Data)
|
||||||
|
end,
|
||||||
|
|
||||||
|
KVList = erlang:binary_to_term(TermData),
|
||||||
{ok, {node, Level, KVList}}.
|
{ok, {node, Level, KVList}}.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -53,18 +53,19 @@
|
||||||
|
|
||||||
name :: string(),
|
name :: string(),
|
||||||
|
|
||||||
bloom
|
bloom,
|
||||||
|
block_size = ?NODE_SIZE,
|
||||||
|
compress = none :: none | snappy | gzip
|
||||||
}).
|
}).
|
||||||
|
|
||||||
|
|
||||||
%%% PUBLIC API
|
%%% PUBLIC API
|
||||||
|
|
||||||
open(Name,Size) ->
|
open(Name,Options) ->
|
||||||
gen_server:start_link(?MODULE, [Name,Size], []).
|
gen_server:start_link(?MODULE, [Name,Options], []).
|
||||||
|
|
||||||
|
|
||||||
open(Name) ->
|
open(Name) ->
|
||||||
gen_server:start_link(?MODULE, [Name,2048], []).
|
gen_server:start_link(?MODULE, [Name,[]], []).
|
||||||
|
|
||||||
|
|
||||||
add(Ref,Key,Data) ->
|
add(Ref,Key,Data) ->
|
||||||
|
@ -76,7 +77,9 @@ close(Ref) ->
|
||||||
%%%
|
%%%
|
||||||
|
|
||||||
|
|
||||||
init([Name,Size]) ->
|
init([Name,Options]) ->
|
||||||
|
|
||||||
|
Size = proplists:get_value(size, Options, 2048),
|
||||||
|
|
||||||
% io:format("got name: ~p~n", [Name]),
|
% io:format("got name: ~p~n", [Name]),
|
||||||
|
|
||||||
|
@ -86,7 +89,9 @@ init([Name,Size]) ->
|
||||||
{ok, BloomFilter} = ebloom:new(erlang:min(Size,16#ffffffff), 0.01, 123),
|
{ok, BloomFilter} = ebloom:new(erlang:min(Size,16#ffffffff), 0.01, 123),
|
||||||
{ok, #state{ name=Name,
|
{ok, #state{ name=Name,
|
||||||
index_file_pos=0, index_file=IdxFile,
|
index_file_pos=0, index_file=IdxFile,
|
||||||
bloom = BloomFilter
|
bloom = BloomFilter,
|
||||||
|
block_size = hanoi:get_opt(block_size, Options, ?NODE_SIZE),
|
||||||
|
compress = hanoi:get_opt(compress, Options, none)
|
||||||
}};
|
}};
|
||||||
{error, _}=Error ->
|
{error, _}=Error ->
|
||||||
error_logger:error_msg("hanoi_writer cannot open ~p: ~p~n", [Name, Error]),
|
error_logger:error_msg("hanoi_writer cannot open ~p: ~p~n", [Name, Error]),
|
||||||
|
@ -164,7 +169,7 @@ add_record(Level, Key, Value,
|
||||||
|
|
||||||
NewSize = NodeSize + hanoi_util:estimate_node_size_increment(List, Key, Value),
|
NewSize = NodeSize + hanoi_util:estimate_node_size_increment(List, Key, Value),
|
||||||
|
|
||||||
ebloom:insert( State#state.bloom, Key ),
|
ok = ebloom:insert( State#state.bloom, Key ),
|
||||||
|
|
||||||
NodeMembers = [{Key,Value} | List],
|
NodeMembers = [{Key,Value} | List],
|
||||||
if
|
if
|
||||||
|
@ -180,9 +185,9 @@ add_record(Level, Key, Value, #state{ nodes=Nodes }=State) ->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
close_node(#state{nodes=[#node{ level=Level, members=NodeMembers }|RestNodes]} = State) ->
|
close_node(#state{nodes=[#node{ level=Level, members=NodeMembers }|RestNodes], compress=Compress} = State) ->
|
||||||
OrderedMembers = lists:reverse(NodeMembers),
|
OrderedMembers = lists:reverse(NodeMembers),
|
||||||
{ok, DataSize, Data} = hanoi_util:encode_index_node(Level, OrderedMembers),
|
{ok, DataSize, Data} = hanoi_util:encode_index_node(Level, OrderedMembers, Compress),
|
||||||
NodePos = State#state.index_file_pos,
|
NodePos = State#state.index_file_pos,
|
||||||
ok = file:write(State#state.index_file, Data),
|
ok = file:write(State#state.index_file, Data),
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue