machi/src/machi_chash.erl
2015-04-08 17:32:01 +09:00

461 lines
16 KiB
Erlang

%%%-------------------------------------------------------------------
%%% Copyright (c) 2007-2011 Gemini Mobile Technologies, Inc. All rights reserved.
%%% Copyright (c) 2013-2015 Basho Technologies, Inc. All rights reserved.
%%%
%%% Licensed under the Apache License, Version 2.0 (the "License");
%%% you may not use this file except in compliance with the License.
%%% You may obtain a copy of the License at
%%%
%%% http://www.apache.org/licenses/LICENSE-2.0
%%%
%%% Unless required by applicable law or agreed to in writing, software
%%% distributed under the License is distributed on an "AS IS" BASIS,
%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%%% See the License for the specific language governing permissions and
%%% limitations under the License.
%%%
%%%-------------------------------------------------------------------
%% @doc Consistent hashing library. Also known as "random slicing".
%%
%% This code was originally from the Hibari DB source code at
%% [https://github.com/hibari]
-module(machi_chash).
%% TODO items:
%%
%% 1. Refactor to use bigints instead of floating point numbers. The
%% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE macro below doesn't allow as
%% much wiggle-room for making really small hashing range
%% definitions.
-define(SMALLEST_SIGNIFICANT_FLOAT_SIZE, 0.1e-12).
-define(SHA_MAX, (1 bsl (20*8))).
%% -compile(export_all).
-export([make_float_map/1, make_float_map/2,
sum_map_weights/1,
make_tree/1,
query_tree/2,
hash_binary_via_float_map/2,
hash_binary_via_float_tree/2,
pretty_with_integers/2,
pretty_with_integers/3]).
-export([make_demo_map1/0, make_demo_map2/0]).
-export([zzz_usage_details/0]). % merely to give EDoc a hint of our intent
-type owner_name() :: term().
%% Owner for a range on the unit interval. We are agnostic about its
%% type.
-type weight() :: non_neg_integer().
%% For this library, a weight is an integer which specifies the
%% capacity of a "owner" relative to other owners. For example, if
%% owner A with a weight of 10, and if owner B has a weight of 20,
%% then B will be assigned twice as much of the unit interval as A.
-type float_map() :: [{owner_name(), float()}].
%% A float map subdivides the unit interval, starting at 0.0, to
%% partitions that are assigned to various owners. The sum of all
%% floats must be exactly 1.0 (or close enough for floating point
%% purposes).
-opaque float_tree() :: gb_trees:tree(float(), owner_name()).
%% We can't use gb_trees:tree() because 'nil' (the empty tree) is
%% never valid in our case. But teaching Dialyzer that is difficult.
-type owner_int_range() :: {owner_name(), non_neg_integer(), non_neg_integer()}.
%% Used when "prettying" a float map.
-type owner_weight() :: {owner_name(), weight()}.
-type owner_weight_list() :: [owner_weight()].
%% A owner_weight_list is a definition of brick assignments over the
%% unit interval [0.0, 1.0]. The sum of all floats must be 1.0. For
%% example, [{{br1,nd1}, 0.25}, {{br2,nd1}, 0.5}, {{br3,nd1}, 0.25}].
-export_type([float_map/0, float_tree/0]).
%% @doc Create a float map, based on a basic owner weight list.
-spec make_float_map(owner_weight_list()) -> float_map().
make_float_map(NewOwnerWeights) ->
make_float_map([], NewOwnerWeights).
%% @doc Create a float map, based on an older float map and a new weight
%% list.
%%
%% The weights in the new weight list may be different than (or the
%% same as) whatever weights were used to make the older float map.
-spec make_float_map(float_map(), owner_weight_list()) -> float_map().
make_float_map([], NewOwnerWeights) ->
Sum = add_all_weights(NewOwnerWeights),
DiffMap = [{Ch, Wt/Sum} || {Ch, Wt} <- NewOwnerWeights],
make_float_map2([{unused, 1.0}], DiffMap, NewOwnerWeights);
make_float_map(OldFloatMap, NewOwnerWeights) ->
NewSum = add_all_weights(NewOwnerWeights),
%% Normalize to unit interval
%% NewOwnerWeights2 = [{Ch, Wt / NewSum} || {Ch, Wt} <- NewOwnerWeights],
%% Reconstruct old owner weights (will be normalized to unit interval)
SumOldFloatsDict =
lists:foldl(fun({Ch, Wt}, OrdDict) ->
orddict:update_counter(Ch, Wt, OrdDict)
end, orddict:new(), OldFloatMap),
OldOwnerWeights = orddict:to_list(SumOldFloatsDict),
OldSum = add_all_weights(OldOwnerWeights),
OldChs = [Ch || {Ch, _} <- OldOwnerWeights],
NewChs = [Ch || {Ch, _} <- NewOwnerWeights],
OldChsOnly = OldChs -- NewChs,
%% Mark any space in by a deleted owner as unused.
OldFloatMap2 = lists:map(
fun({Ch, Wt} = ChWt) ->
case lists:member(Ch, OldChsOnly) of
true ->
{unused, Wt};
false ->
ChWt
end
end, OldFloatMap),
%% Create a diff map of changing owners and added owners
DiffMap = lists:map(fun({Ch, NewWt}) ->
case orddict:find(Ch, SumOldFloatsDict) of
{ok, OldWt} ->
{Ch, (NewWt / NewSum) -
(OldWt / OldSum)};
error ->
{Ch, NewWt / NewSum}
end
end, NewOwnerWeights),
make_float_map2(OldFloatMap2, DiffMap, NewOwnerWeights).
make_float_map2(OldFloatMap, DiffMap, _NewOwnerWeights) ->
FloatMap = apply_diffmap(DiffMap, OldFloatMap),
XX = combine_neighbors(collapse_unused_in_float_map(FloatMap)),
XX.
apply_diffmap(DiffMap, FloatMap) ->
SubtractDiff = [{Ch, abs(Diff)} || {Ch, Diff} <- DiffMap, Diff < 0],
AddDiff = [D || {_Ch, Diff} = D <- DiffMap, Diff > 0],
TmpFloatMap = iter_diffmap_subtract(SubtractDiff, FloatMap),
iter_diffmap_add(AddDiff, TmpFloatMap).
add_all_weights(OwnerWeights) ->
lists:foldl(fun({_Ch, Weight}, Sum) -> Sum + Weight end, 0.0, OwnerWeights).
iter_diffmap_subtract([{Ch, Diff}|T], FloatMap) ->
iter_diffmap_subtract(T, apply_diffmap_subtract(Ch, Diff, FloatMap));
iter_diffmap_subtract([], FloatMap) ->
FloatMap.
iter_diffmap_add([{Ch, Diff}|T], FloatMap) ->
iter_diffmap_add(T, apply_diffmap_add(Ch, Diff, FloatMap));
iter_diffmap_add([], FloatMap) ->
FloatMap.
apply_diffmap_subtract(Ch, Diff, [{Ch, Wt}|T]) ->
if Wt == Diff ->
[{unused, Wt}|T];
Wt > Diff ->
[{Ch, Wt - Diff}, {unused, Diff}|T];
Wt < Diff ->
[{unused, Wt}|apply_diffmap_subtract(Ch, Diff - Wt, T)]
end;
apply_diffmap_subtract(Ch, Diff, [H|T]) ->
[H|apply_diffmap_subtract(Ch, Diff, T)];
apply_diffmap_subtract(_Ch, _Diff, []) ->
[].
apply_diffmap_add(Ch, Diff, [{unused, Wt}|T]) ->
if Wt == Diff ->
[{Ch, Wt}|T];
Wt > Diff ->
[{Ch, Diff}, {unused, Wt - Diff}|T];
Wt < Diff ->
[{Ch, Wt}|apply_diffmap_add(Ch, Diff - Wt, T)]
end;
apply_diffmap_add(Ch, Diff, [H|T]) ->
[H|apply_diffmap_add(Ch, Diff, T)];
apply_diffmap_add(_Ch, _Diff, []) ->
[].
combine_neighbors([{Ch, Wt1}, {Ch, Wt2}|T]) ->
combine_neighbors([{Ch, Wt1 + Wt2}|T]);
combine_neighbors([H|T]) ->
[H|combine_neighbors(T)];
combine_neighbors([]) ->
[].
collapse_unused_in_float_map([{Ch, Wt1}, {unused, Wt2}|T]) ->
collapse_unused_in_float_map([{Ch, Wt1 + Wt2}|T]);
collapse_unused_in_float_map([{unused, _}] = L) ->
L; % Degenerate case only
collapse_unused_in_float_map([H|T]) ->
[H|collapse_unused_in_float_map(T)];
collapse_unused_in_float_map([]) ->
[].
chash_float_map_to_nextfloat_list(FloatMap) when length(FloatMap) > 0 ->
%% QuickCheck found a bug ... need to weed out stuff smaller than
%% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE here.
FM1 = [P || {_X, Y} = P <- FloatMap, Y > ?SMALLEST_SIGNIFICANT_FLOAT_SIZE],
{_Sum, NFs0} = lists:foldl(fun({Name, Amount}, {Sum, List}) ->
{Sum+Amount, [{Sum+Amount, Name}|List]}
end, {0, []}, FM1),
lists:reverse(NFs0).
chash_nextfloat_list_to_gb_tree([]) ->
gb_trees:balance(gb_trees:from_orddict([]));
chash_nextfloat_list_to_gb_tree(NextFloatList) ->
{_FloatPos, Name} = lists:last(NextFloatList),
%% QuickCheck found a bug ... it really helps to add a catch-all item
%% at the far "right" of the list ... 42.0 is much greater than 1.0.
NFs = NextFloatList ++ [{42.0, Name}],
gb_trees:balance(gb_trees:from_orddict(orddict:from_list(NFs))).
-spec chash_gb_next(float(), float_tree()) -> {float(), owner_name()}.
chash_gb_next(X, {_, GbTree}) ->
chash_gb_next1(X, GbTree).
chash_gb_next1(X, {Key, Val, Left, _Right}) when X < Key ->
case chash_gb_next1(X, Left) of
nil ->
{Key, Val};
Res ->
Res
end;
chash_gb_next1(X, {Key, _Val, _Left, Right}) when X >= Key ->
chash_gb_next1(X, Right);
chash_gb_next1(_X, nil) ->
nil.
%% @doc Not used directly, but can give a developer an idea of how well
%% chash_float_map_to_nextfloat_list will do for a given value of Max.
%%
%% For example:
%% <verbatim>
%% NewFloatMap = make_float_map([{unused, 1.0}],
%% [{a,100}, {b, 100}, {c, 10}]),
%% ChashMap = chash_scale_to_int_interval(NewFloatMap, 100),
%% io:format("QQQ: int int = ~p\n", [ChashIntInterval]),
%% -> [{a,1,47},{b,48,94},{c,94,100}]
%% </verbatim>
%%
%% Interpretation: out of the 100 slots:
%% <ul>
%% <li> 'a' uses the slots 1-47 </li>
%% <li> 'b' uses the slots 48-94 </li>
%% <li> 'c' uses the slots 95-100 </li>
%% </ul>
chash_scale_to_int_interval(NewFloatMap, Max) ->
chash_scale_to_int_interval(NewFloatMap, 0, Max).
%% @type nextfloat_list() = list({float(), brick()}). A nextfloat_list
%% differs from a float_map in two respects: 1) nextfloat_list contains
%% tuples with the brick name in 2nd position, 2) the float() at each
%% position I_n > I_m, for all n, m such that n > m.
%% For example, a nextfloat_list of the float_map example above,
%% [{0.25, {br1, nd1}}, {0.75, {br2, nd1}}, {1.0, {br3, nd1}].
chash_scale_to_int_interval([{Ch, _Wt}], Cur, Max) ->
[{Ch, Cur, Max}];
chash_scale_to_int_interval([{Ch, Wt}|T], Cur, Max) ->
Int = trunc(Wt * Max),
[{Ch, Cur + 1, Cur + Int}|chash_scale_to_int_interval(T, Cur + Int, Max)].
%%%%%%%%%%%%%
%% @doc Make a pretty/human-friendly version of a float map that describes
%% integer ranges between 1 and `Scale'.
-spec pretty_with_integers(float_map(), integer()) -> [owner_int_range()].
pretty_with_integers(Map, Scale) ->
chash_scale_to_int_interval(Map, Scale).
%% @doc Make a pretty/human-friendly version of a float map (based
%% upon a float map created from `OldWeights' and `NewWeights') that
%% describes integer ranges between 1 and `Scale'.
-spec pretty_with_integers(owner_weight_list(), owner_weight_list(),integer())->
[owner_int_range()].
pretty_with_integers(OldWeights, NewWeights, Scale) ->
chash_scale_to_int_interval(
make_float_map(make_float_map(OldWeights),
NewWeights),
Scale).
%% @doc Create a float tree, which is the rapid lookup data structure
%% for consistent hash queries.
-spec make_tree(float_map()) -> float_tree().
make_tree(Map) ->
chash_nextfloat_list_to_gb_tree(
chash_float_map_to_nextfloat_list(Map)).
%% @doc Low-level function for querying a float tree: the (floating
%% point) point within the unit interval.
-spec query_tree(float(), float_tree()) -> {float(), owner_name()}.
query_tree(Val, Tree) when is_float(Val), 0.0 =< Val, Val =< 1.0 ->
chash_gb_next(Val, Tree).
%% @doc Create a sample float map.
-spec make_demo_map1() -> float_map().
make_demo_map1() ->
{_, Res} = make_demo_map1_i(),
Res.
make_demo_map1_i() ->
Fail1 = {b, 100},
L1 = [{a, 100}, Fail1, {c, 100}],
L2 = L1 ++ [{d, 100}, {e, 100}],
L3 = L2 -- [Fail1],
L4 = L3 ++ [{giant, 300}],
{L4, lists:foldl(fun(New, Old) -> make_float_map(Old, New) end,
make_float_map(L1), [L2, L3, L4])}.
%% @doc Create a sample float map.
-spec make_demo_map2() -> float_map().
make_demo_map2() ->
{L0, _} = make_demo_map1_i(),
L1 = L0 ++ [{h, 100}],
L2 = L1 ++ [{i, 100}],
L3 = L2 ++ [{j, 100}],
lists:foldl(fun(New, Old) -> make_float_map(Old, New) end,
make_demo_map1(), [L1, L2, L3]).
%% @doc Create a human-friendly summary of a float map.
%%
%% The two parts of the summary are: a per-owner total of the unit
%% interval range(s) owned by each owner, and a total sum of all
%% per-owner ranges (which should be 1.0 but is not enforced).
-spec sum_map_weights(float_map()) ->
{{per_owner, float_map()}, {weight_sum, float()}}.
sum_map_weights(Map) ->
L = sum_map_weights(lists:sort(Map), undefined, 0.0) -- [{undefined,0.0}],
WeightSum = lists:sum([Weight || {_, Weight} <- L]),
{{per_owner, L}, {weight_sum, WeightSum}}.
sum_map_weights([{SZ, Weight}|T], SZ, SZ_total) ->
sum_map_weights(T, SZ, SZ_total + Weight);
sum_map_weights([{SZ, Weight}|T], LastSZ, LastSZ_total) ->
[{LastSZ, LastSZ_total}|sum_map_weights(T, SZ, Weight)];
sum_map_weights([], LastSZ, LastSZ_total) ->
[{LastSZ, LastSZ_total}].
%% @doc Query a float map with a binary (inefficient).
-spec hash_binary_via_float_map(binary(), float_map()) ->
{float(), owner_name()}.
hash_binary_via_float_map(Key, Map) ->
Tree = make_tree(Map),
<<Int:(20*8)/unsigned>> = crypto:hash(sha, Key),
Float = Int / ?SHA_MAX,
query_tree(Float, Tree).
%% @doc Query a float tree with a binary.
-spec hash_binary_via_float_tree(binary(), float_tree()) ->
{float(), owner_name()}.
hash_binary_via_float_tree(Key, Tree) ->
<<Int:(20*8)/unsigned>> = crypto:hash(sha, Key),
Float = Int / ?SHA_MAX,
query_tree(Float, Tree).
%%%%% @doc Various usage examples, see source code below this function
%%%%% for full details.
zzz_usage_details() ->
%% %% Make a map. See the code for make_demo_map1() for the order of
%% %% additions & deletions. Here's a brief summary of the 4 steps.
%% %%
%% %% * 'a' through 'e' are weighted @ 100.
%% %% * 'giant' is weighted @ 300.
%% %% * 'b' is removed at step #3.
%% 40> M1 = machi_chash:make_demo_map1().
%% [{a,0.09285714285714286},
%% {giant,0.10714285714285715},
%% {d,0.026190476190476153},
%% {giant,0.10714285714285715},
%% {a,0.04999999999999999},
%% {giant,0.04999999999999999},
%% {d,0.04999999999999999},
%% {giant,0.050000000000000044},
%% {d,0.06666666666666671},
%% {e,0.009523809523809434},
%% {giant,0.05714285714285716},
%% {c,0.14285714285714285},
%% {giant,0.05714285714285716},
%% {e,0.13333333333333341}]
%% %% Map M1 onto the interval of integers 0-10,1000
%% %%
%% %% output = list({SZ_name::term(), Start::integer(), End::integer()})
%% 41> machi_chash:pretty_with_integers(M1, 10*1000).
%% [{a,1,928},
%% {giant,929,1999},
%% {d,2000,2260},
%% {giant,2261,3331},
%% {a,3332,3830},
%% {giant,3831,4329},
%% {d,4330,4828},
%% {giant,4829,5328},
%% {d,5329,5994},
%% {e,5995,6089},
%% {giant,6090,6660},
%% {c,6661,8088},
%% {giant,8089,8659},
%% {e,8659,10000}]
%% %% Sum up all of the weights, make sure it's what we expect:
%% 55> machi_chash:sum_map_weights(M1).
%% {{per_owner,[{a,0.14285714285714285},
%% {c,0.14285714285714285},
%% {d,0.14285714285714285},
%% {e,0.14285714285714285},
%% {giant,0.42857142857142866}]},
%% {weight_sum,1.0}}
%% %% Make a tree, then query it
%% %% (Hash::float(), tree()) -> {NextLargestBoundary::float(), szone()}
%% 58> T1 = machi_chash:make_tree(M1).
%% 59> machi_chash:query_tree(0.2555, T1).
%% {0.3333333333333333,giant}
%% 60> machi_chash:query_tree(0.3555, T1).
%% {0.3833333333333333,a}
%% 61> machi_chash:query_tree(0.4555, T1).
%% {0.4833333333333333,d}
%% %% How about hashing a bunch of strings and see what happens?
%% 74> Key1 = "Hello, world!".
%% "Hello, world!"
%% 75> [{K, element(2, machi_chash:hash_binary_via_float_map(K, M1))} || K <- [lists:sublist(Key1, X) || X <- lists:seq(1, length(Key1))]].
%% [{"H",giant},
%% {"He",giant},
%% {"Hel",giant},
%% {"Hell",e},
%% {"Hello",e},
%% {"Hello,",giant},
%% {"Hello, ",e},
%% {"Hello, w",e},
%% {"Hello, wo",giant},
%% {"Hello, wor",d},
%% {"Hello, worl",giant},
%% {"Hello, world",e},
%% {"Hello, world!",d}]
ok.