Initial import.

This commit is contained in:
Gregory Burd 2013-02-21 16:02:32 -05:00
commit 7b5b040f64
8 changed files with 659 additions and 0 deletions

.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@

Makefile Normal file
View file

@ -0,0 +1,58 @@
TARGET= hamt
REBAR= /usr/bin/env rebar
ERL= /usr/bin/env erl
DIALYZER= /usr/bin/env dialyzer
REBAR= /usr/bin/env rebar
ifdef suites
SUITE_OPTION := suites=$(suites)
ifdef tests
TESTS_OPTION := tests=$(tests)
.PHONY: deps test
all: deps compile
$(REBAR) get-deps
compile: deps
$(REBAR) compile
$(REBAR) clean
distclean: clean
$(REBAR) delete-deps
eunit: test
test: compile
$(REBAR) skip_deps=true $(EUNIT_OPTIONS) eunit
console: compile
erl -pa ebin deps/*/ebin
plt: compile
@$(DIALYZER) --build_plt --output_plt .$(TARGET).plt \
-pa deps/plain_fsm/ebin \
deps/plain_fsm/ebin \
--apps kernel stdlib
analyze: compile
$(DIALYZER) --plt .$(TARGET).plt \
-pa deps/plain_fsm/ebin \
-pa deps/ebloom/ebin \
$(ERL) -pz deps/*/ebin -pa ebin
erl -pa .eunit -pz deps/*/ebin -pz ebin -exec 'cd(".eunit").'
USE_GDB=1 erl -pa .eunit -pz deps/*/ebin -pz ebin -exec 'cd(".eunit").'

rebar.config Normal file
View file

@ -0,0 +1,33 @@
%%% -*- mode: erlang -*-
{require_otp_vsn, "R15"}.
{cover_enabled, true}.
%{clean_files, ["*.eunit", "ebin/*.beam"]}.
{eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}.
{erl_opts, [%{d,'DEBUG',true},
%{parse_transform, lager_transform},
{xref_checks, [undefined_function_calls]}.
{deps, [

src/bitpop.erl Normal file
View file

@ -0,0 +1,65 @@
%% -*- coding: utf-8 -*-
%% %CopyrightBegin%
%% Copyright (C) 2013 Gregory Burd. All Rights Reserved.
%% The contents of this file are subject to the Mozilla Public License,
%% Version 2, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Mozilla Public License along with this software. If not, it can be
%% retrieved online at
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% %CopyrightEnd%
count(0) -> 0;
count(X) when is_integer(X), X > 0 -> count(X, 0).
count(0, Acc) -> Acc;
count(X, Acc) -> count((X band (X - 1)), (Acc + 1)).
bitpop_test_() ->
[?_assertEqual(0, count(0)),
?_assertEqual(1, count(1)),
?_assertEqual(2, count(3)),
?_assertEqual(3, count(7)),
?_assertEqual(4, count(15)),
?_assertEqual(5, count(31)),
?_assertEqual(6, count(63)),
?_assertEqual(7, count(127)),
?_assertEqual(8, count(255)),
?_assertEqual(1, count(4)),
?_assertEqual(1, count(8)),
?_assertEqual(1, count(16)),
?_assertEqual(1, count(32)),
?_assertEqual(1, count(64)),
?_assertEqual(1, count(128)),
?_assertEqual(1, count(256)),
?_assertEqual(1, count(512)),
?_assertEqual(1, count(1024)),
?_assertEqual(1, count(2048)),
?_assertEqual(1, count(16#FFFF + 1)),
?_assertEqual(19, count(16#FFFFE)),
?_assertEqual(1, count(16#FFFFF + 1)),
?_assertEqual(23, count(16#FFFFFE)),
?_assertEqual(1, count(16#FFFFF + 1)),
?_assertEqual(27, count(16#FFFFFFE)),
?_assertEqual(1, count(16#FFFFF + 1)),
?_assertEqual(31, count(16#FFFFFFFE)),
?_assertException(error, function_clause, count(-1))].

src/ Normal file
View file

@ -0,0 +1,12 @@
{application, hamt,
{description, "Hash Array Mapped Tries"},
{vsn, "1.0.0"},
{registered, []},
{applications, [
{mod, {hamt_app, []}},
{env, []}

src/hamt.erl Normal file
View file

@ -0,0 +1,442 @@
%% -*- coding: utf-8 -*-
%% %CopyrightBegin%
%% Copyright (C) 2013 Gregory Burd. All Rights Reserved.
%% The contents of this file are subject to the Mozilla Public License,
%% Version 2, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Mozilla Public License along with this software. If not, it can be
%% retrieved online at
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% %CopyrightEnd%
%% =========================================================================
%% Ideal Hash Array Mapped Tries: an Erlang functional datatype
%% The Hash Array Mapped Trie (HAMT) is based on the simple notion of hashing a
%% key and storing the key in a trie based on this hash value. The AMT is used
%% to implement the required structure e#ciently. The Array Mapped Trie (AMT)
%% is a versatile data structure and yields attractive alternative to
%% contemporary algorithms in many applications. Here I describe how it is used
%% to develop Hash Trees with near ideal characteristics that avoid the
%% traditional problem, setting the size of the initial root hash table or
%% incurring the high cost of dynamic resizing to achieve an acceptable
%% performance.
%% Based on the paper "Ideal Hash Tries" by Phil Bagwell [2000].
%% @ARTICLE{Bagwell01idealhash,
%% author = {Phil Bagwell},
%% title = {Ideal Hash Trees},
%% journal = {Es Grands Champs},
%% year = {2001},
%% volume = {1195}
%% }
%% -------------------------------------------------------------------------
%% Operations:
%% - new(): returns empty hamt.
%% - is_empty(T): returns 'true' if T is an empty hamt, and 'false'
%% otherwise.
%% - get(K, T): retreives the value stored with key K in hamt T or
%% `not_found' if the key is not present in the hamt.
%% - put(K, V, T): inserts key K with value V into hamt T; if the key
%% is not present in the hamt, otherwise updates key X to value V in
%% T. Returns the new hamt.
%% - delete(K, T): removes key K from hamt T; returns new hamt. Assumes
%% that the key is present in the hamt.
%% - map(F, T): maps the function F(K, V) -> V' to all key-value pairs
%% of the hamt T and returns a new hamt T' with the same set of keys
%% as T and the new set of values V'.
%% - fold(F, T, Acc): applies the function F(K, V, Acc) -> V' to all
%% key-value pairs of the hamt T and returns the accumulated result.
-compile({parse_transform, pulse_instrument}).
-export([new/0, is_empty/1, get/2, put/2, put/3, delete/2,
map/2, fold/3,
from_list/1, to_list/1]).
%% The Hamt data structure consists of:
%% - {hamt, nil | {SNode, CNode, LNode}
%% - {snode, Key::binary(), Value::binary()}
%% - {cnode, Bitmap, Branch}
%% - {lnode, [snode]}
%% Some types.
-type hamt_snode() :: {snode, binary(), binary()}.
-type hamt_lnode() :: {lnode, [hamt_snode()]}.
-type hamt_cnode() :: {cnode, non_neg_integer(), [hamt_snode() | hamt_cnode() | hamt_lnode()]}.
-opaque hamt() :: {hamt, non_neg_integer(), nil | hamt_cnode()}.
-spec new() -> hamt().
new() ->
{hamt, nil}.
-spec is_empty(Hamt) -> boolean() when
Hamt :: hamt().
is_empty({hamt, nil}) ->
is_empty(_) ->
-spec get(Key, Hamt) -> not_found | Value when
Key :: binary(),
Value :: binary(),
Hamt :: hamt().
get(_Key, {hamt, nil}) ->
get(Key, {hamt, {snode, Key, Value}}) ->
get(Key, {hamt, {cnode, _Bitmap, _Nodes}=CN}) ->
case get_1(hash(Key), CN, 0) of
none ->
{Key, Value} ->
{list, List} ->
case get_2(Key, List) of
none -> not_found;
{Key, Value} -> Value;
{_Key, _Value} -> not_found
{_Key, _Value} ->
get_1(H, {cnode, Bitmap, Nodes}, L) ->
Bit = bitpos(H, L),
case exists(Bit, Bitmap) of
true -> get_1(H, ba_get(index(Bit, Bitmap), Nodes), L + 5);
false -> none
get_1(_H, {snode, Key, Value}, _L) ->
{Key, Value};
get_1(_H, {lnode, List}, _L) when is_list(List) ->
{list, List}.
get_2(_Key, []) ->
get_2(Key, [{Key, Value} | _Rest]) ->
{Key, Value};
get_2(Key, [{_DifferentKey, _Value} | Rest]) ->
get_2(Key, Rest).
from_list(L) ->
put(L, hamt:new()).
to_list({hamt, _}=T) ->
fold(fun(Key, Value, Acc) -> [{Key, Value} | Acc] end, T, []).
-spec put([{Key, Value}], Hamt1) -> Hamt2 when
Key :: binary(),
Value :: binary(),
Hamt1 :: hamt(),
Hamt2 :: hamt().
put([], {hamt, _Node}=T) ->
put([{Key, Value} | Rest], {hamt, _Node}=T) ->
put(Rest, put(Key, Value, T)).
-spec put(Key, Value, Hamt1) -> Hamt2 when
Key :: binary(),
Value :: binary(),
Hamt1 :: hamt(),
Hamt2 :: hamt().
put(Key, Value, {hamt, nil})
when is_binary(Key), is_binary(Value) ->
{hamt, {snode, Key, Value}};
put(Key, Value, {hamt, Node})
when is_binary(Key), is_binary(Value) ->
{hamt, put_1(hash(Key), Key, Value, Node, 0)}.
put_1(H, Key, Value, {cnode, Bitmap, Nodes}, L) when is_integer(L), L =< 30 ->
Bit = bitpos(H, L),
Idx = index(Bit, Bitmap),
case exists(Bit, Bitmap) of
true ->
CN = put_1(H, Key, Value, ba_get(Idx, Nodes), L + 5),
{cnode, Bitmap, ba_set(Idx, CN, Nodes)};
false ->
{cnode, (Bitmap bor Bit), ba_ins(Idx, {snode, Key, Value}, Nodes)}
put_1(_H, Key, Value, {snode, Key, _}, _L) ->
{snode, Key, Value};
put_1(H, Key, Value, {snode, SNKey, SNValue}, L) when is_integer(L), L =< 30 ->
put_1(H, Key, Value, split(SNKey, SNValue, L), L);
put_1(_H, Key, Value, {snode, _, _}, L) when L > 30 ->
{lnode, [{Key, Value}]};
put_1(_H, Key, Value, {lnode, List}, _L) when is_list(List) ->
{lnode, [{Key, Value} | List]}.
split(SNKey, SNValue, L) ->
{cnode, bitpos(hash(SNKey), L), [{snode, SNKey, SNValue}]}.
-spec delete(Key, Hamt1) -> Hamt2 when
Key :: binary(),
Hamt1 :: hamt(),
Hamt2 :: hamt().
delete(Key, {hamt, nil})
when is_binary(Key) ->
{hamt, nil};
delete(Key, {hamt, Node}=T)
when is_binary(Key) ->
case delete_1(hash(Key), Key, Node, 0) of
not_found -> T;
delete -> {hamt, nil};
N -> {hamt, N}
delete_1(H, Key, {cnode, Bitmap, Nodes}=CNode, L)
when is_integer(L), L =< 30 ->
Bit = bitpos(H, L),
Idx = index(Bit, Bitmap),
case exists(Bit, Bitmap) of
true ->
case delete_1(H, Key, ba_get(Idx, Nodes), L + 5) of
not_found -> not_found;
delete -> delete_2(Key, Bit, CNode);
N -> {cnode, Bitmap, ba_set(Idx, N, Nodes)}
false ->
delete_1(_H, Key, {snode, Key, _}, _L) ->
delete_1(_H, _Key, {snode, _, _}, _L) ->
delete_1(_H, Key, {lnode, List}, _L) ->
case length(List) > 2 of
true ->
{lnode, lists:filter(fun({snode, K, _}) when K =:= Key -> true;
({snode, _, _}) -> false end,
false ->
{snode, Key, lists:keyfind(Key, 2, List)}
%% @doc This CNode only has 2 elements in it and one is about to be
%% be deleted, time to collapse this CNode into an SNode.
delete_2(Key, _Bit, {cnode, _Bitmap, Nodes})
when length(Nodes) =:= 2 ->
[{snode, _, _}=SN] = ba_del(Key, Nodes),
%% @doc Remove the right key and update the bitmap
delete_2(Key, Bit, {cnode, Bitmap, Nodes}) ->
{cnode, (Bitmap bxor Bit), ba_del(Key, Nodes)}.
-spec map(Function, Hamt1) -> Hamt2 when
Function :: fun((K :: term(), V1 :: term()) -> V2 :: term()),
Hamt1 :: hamt(),
Hamt2 :: hamt().
map(F, {hamt, _}=T) when is_function(F, 2) ->
{map_1(F, T)}.
map_1(_, nil) -> nil;
map_1(F, {K, V, Smaller, Larger}) ->
{K, F(K, V), map_1(F, Smaller), map_1(F, Larger)}.
-spec fold(Function, Hamt, Acc) -> Hamt when
Function :: fun((K :: term(), V :: term()) -> V2 :: term()),
Hamt :: hamt(),
Acc :: any().
fold(Function, {hamt, Node}, Acc) ->
fold_1(Function, Acc, Node).
fold_1(F, Acc, {snode, Key, Value}) ->
F(Key, Value, Acc);
fold_1(_F, Acc, {cnode, _, []}) ->
fold_1(F, Acc, {cnode, _, [Node]}) ->
fold_1(F, Acc, Node);
fold_1(F, Acc, {cnode, Bitmap, [Node | Nodes]}) ->
fold_1(F, fold_1(F, Acc, Node), {cnode, Bitmap, Nodes});
fold_1(F, Acc, {lnode, Nodes}) ->
lists:foldl(F, Acc, Nodes).
ba_get(I, Nodes)
when I =< 32, erlang:length(Nodes) =< 32 ->
lists:nth(I, Nodes).
ba_set(1, V, [_|T]=Nodes)
when erlang:length(Nodes) =< 32 ->
ba_set(I, V, [H|T]=Nodes)
when I =< 32, erlang:length(Nodes) =< 32 ->
[H|ba_set(I-1, V, T)].
ba_ins(1, V, [H|T]=Nodes)
when erlang:length(Nodes) =< 32 ->
ba_ins(I, V, [H|T]=Nodes)
when I =< 32, erlang:length(Nodes) =< 32 ->
[H|ba_ins(I-1, V, T)];
ba_ins(1, V, [H]) -> [H, V];
ba_ins(2, V, [H]) -> [V, H];
ba_ins(_I, V, []) -> [V].
ba_del(Key, Nodes) ->
lists:filter(fun({snode, K, _}) when K =:= Key -> false;
({snode, _, _}) -> true;
({cnode, _, _}) -> true;
({lnode, _}) -> true
end, Nodes).
mask(Hash, Shift) ->
(Hash bsr Shift) band 2#11111.
bitpos(Hash, Shift) ->
1 bsl mask(Hash, Shift).
index(Bit, Bitmap) ->
bitpop:count(Bitmap band (Bit - 1)) + 1. % Arrays start with index 1, not 0
exists(Bit, Bitmap) ->
(Bitmap band Bit) =:= Bit.
hash(Key) when is_binary(Key) ->
create_a_hamt_test_() ->
[?_assertEqual({hamt, nil}, hamt:new()),
?_assertEqual(true, hamt:is_empty(hamt:new())),
?_assertEqual(false, hamt:is_empty(hamt:put(<<"k">>, <<"v">>, hamt:new()))),
?_assertEqual(<<"v">>, hamt:get(<<"k">>, hamt:put(<<"k">>, <<"v">>, hamt:new())))].
put_causes_split_root_snode_test() ->
?assertEqual(hamt:put(<<"k2">>, <<"v2">>, {hamt,{snode,<<"k1">>,<<"v1">>}}),
put_causes_2_splits_test() ->
put_existing_key_replaces_value_test() ->
?assertEqual(hamt:put(<<"k1">>, <<"v'">>,
del_from_empty_trie_test() ->
?assertEqual(hamt:delete(<<"k1">>, {hamt, nil}), {hamt, nil}).
del_last_key_in_trie_test() ->
?assertEqual(hamt:delete(<<"k1">>, {hamt,{snode,<<"k1">>,<<"v1">>}}), {hamt, nil}).
del_one_of_many_keys_test() ->
del_causes_cascading_cnode_collapse_test() ->
H = hamt:put([{<<X>>, <<X>>} || X <- lists:seq(1,5)], hamt:new()),
?assertEqual(hamt:delete(<<5>>, H),
hamt:put([{<<X>>, <<X>>} || X <- lists:seq(1,4)], hamt:new())).
put_lots_test() ->
KVPs = [{<<X>>, <<X>>} || X <- lists:seq(1,10000)],
hamt:put(KVPs, hamt:new()).
%% test() ->
%% test(500).
%% test(NumTimes) ->
%% eqc:quickcheck(eqc:numtests(NumTimes, prop_to_list())).
%% input_list() ->
%% list({list(int()), int()}).
%% prop_to_list() ->
%% ?FORALL(Xs, input_list(),
%% equiv_to_orddict(Xs)).
%% equiv_to_orddict(Xs) ->
%% orddict:from_list(hamt:to_list(Xs)) =:= orddict:from_list(Xs).
%% from_list(L) ->
%% lists:foldl(fun insert_fun/2, empty(), L).
%% insert_fun({Key, Value}, Acc) ->
%% put(Key, Value, Acc).

src/hamt_app.erl Normal file
View file

@ -0,0 +1,16 @@
%% Application callbacks
-export([start/2, stop/1]).
%% ===================================================================
%% Application callbacks
%% ===================================================================
start(_StartType, _StartArgs) ->
stop(_State) ->

src/hamt_sup.erl Normal file
View file

@ -0,0 +1,27 @@
%% API
%% Supervisor callbacks
%% Helper macro for declaring children of supervisor
-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}).
%% ===================================================================
%% API functions
%% ===================================================================
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
%% ===================================================================
%% Supervisor callbacks
%% ===================================================================
init([]) ->
{ok, { {one_for_one, 5, 10}, []} }.