%%%------------------------------------------------------------------- %%% Copyright (c) 2007-2011 Gemini Mobile Technologies, Inc. All rights reserved. %%% Copyright (c) 2013-2015 Basho Technologies, Inc. All rights reserved. %%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at %%% %%% http://www.apache.org/licenses/LICENSE-2.0 %%% %%% Unless required by applicable law or agreed to in writing, software %%% distributed under the License is distributed on an "AS IS" BASIS, %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. %%% %%%------------------------------------------------------------------- %% Consistent hashing library. Also known as "random slicing". %% Originally from the Hibari DB source code at https://github.com/hibari %% %% TODO items: %% %% 1. Refactor to use bigints instead of floating point numbers. The %% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE macro below doesn't allow as %% much wiggle-room for making really small hashing range %% definitions. -module(machi_chash). -define(SMALLEST_SIGNIFICANT_FLOAT_SIZE, 0.1e-12). -define(SHA_MAX, (1 bsl (20*8))). %% -compile(export_all). -export([make_float_map/1, make_float_map/2, sum_map_weights/1, make_tree/1, query_tree/2, hash_binary_via_float_map/2, hash_binary_via_float_tree/2, pretty_with_integers/2, pretty_with_integers/3]). -export([make_demo_map1/0, make_demo_map2/0]). -export([zzz_usage_details/0]). % merely to give EDoc a hint of our intent -type owner_name() :: term(). %% Owner for a range on the unit interval. We are agnostic about its %% type. -type weight() :: non_neg_integer(). %% For this library, a weight is an integer which specifies the %% capacity of a "owner" relative to other owners. For example, if %% owner A with a weight of 10, and if owner B has a weight of 20, %% then B will be assigned twice as much of the unit interval as A. -type float_map() :: [{owner_name(), float()}]. %% A float map subdivides the unit interval, starting at 0.0, to %% partitions that are assigned to various owners. The sum of all %% floats must be exactly 1.0 (or close enough for floating point %% purposes). -opaque float_tree() :: gb_trees:tree(float(), owner_name()). %% We can't use gb_trees:tree() because 'nil' (the empty tree) is %% never valid in our case. But teaching Dialyzer that is difficult. -type owner_int_range() :: {owner_name(), non_neg_integer(), non_neg_integer()}. %% Used when "prettying" a float map. -type owner_weight() :: {owner_name(), weight()}. -type owner_weight_list() :: [owner_weight()]. %% A owner_weight_list is a definition of brick assignments over the %% unit interval [0.0, 1.0]. The sum of all floats must be 1.0. For %% example, [{{br1,nd1}, 0.25}, {{br2,nd1}, 0.5}, {{br3,nd1}, 0.25}]. -export_type([float_map/0, float_tree/0]). %% @doc Create a float map, based on a basic owner weight list. -spec make_float_map(owner_weight_list()) -> float_map(). make_float_map(NewOwnerWeights) -> make_float_map([], NewOwnerWeights). %% @doc Create a float map, based on an older float map and a new weight %% list. %% %% The weights in the new weight list may be different than (or the %% same as) whatever weights were used to make the older float map. -spec make_float_map(float_map(), owner_weight_list()) -> float_map(). make_float_map([], NewOwnerWeights) -> Sum = add_all_weights(NewOwnerWeights), DiffMap = [{Ch, Wt/Sum} || {Ch, Wt} <- NewOwnerWeights], make_float_map2([{unused, 1.0}], DiffMap, NewOwnerWeights); make_float_map(OldFloatMap, NewOwnerWeights) -> NewSum = add_all_weights(NewOwnerWeights), %% Normalize to unit interval %% NewOwnerWeights2 = [{Ch, Wt / NewSum} || {Ch, Wt} <- NewOwnerWeights], %% Reconstruct old owner weights (will be normalized to unit interval) SumOldFloatsDict = lists:foldl(fun({Ch, Wt}, OrdDict) -> orddict:update_counter(Ch, Wt, OrdDict) end, orddict:new(), OldFloatMap), OldOwnerWeights = orddict:to_list(SumOldFloatsDict), OldSum = add_all_weights(OldOwnerWeights), OldChs = [Ch || {Ch, _} <- OldOwnerWeights], NewChs = [Ch || {Ch, _} <- NewOwnerWeights], OldChsOnly = OldChs -- NewChs, %% Mark any space in by a deleted owner as unused. OldFloatMap2 = lists:map( fun({Ch, Wt} = ChWt) -> case lists:member(Ch, OldChsOnly) of true -> {unused, Wt}; false -> ChWt end end, OldFloatMap), %% Create a diff map of changing owners and added owners DiffMap = lists:map(fun({Ch, NewWt}) -> case orddict:find(Ch, SumOldFloatsDict) of {ok, OldWt} -> {Ch, (NewWt / NewSum) - (OldWt / OldSum)}; error -> {Ch, NewWt / NewSum} end end, NewOwnerWeights), make_float_map2(OldFloatMap2, DiffMap, NewOwnerWeights). make_float_map2(OldFloatMap, DiffMap, _NewOwnerWeights) -> FloatMap = apply_diffmap(DiffMap, OldFloatMap), XX = combine_neighbors(collapse_unused_in_float_map(FloatMap)), XX. apply_diffmap(DiffMap, FloatMap) -> SubtractDiff = [{Ch, abs(Diff)} || {Ch, Diff} <- DiffMap, Diff < 0], AddDiff = [D || {_Ch, Diff} = D <- DiffMap, Diff > 0], TmpFloatMap = iter_diffmap_subtract(SubtractDiff, FloatMap), iter_diffmap_add(AddDiff, TmpFloatMap). add_all_weights(OwnerWeights) -> lists:foldl(fun({_Ch, Weight}, Sum) -> Sum + Weight end, 0.0, OwnerWeights). iter_diffmap_subtract([{Ch, Diff}|T], FloatMap) -> iter_diffmap_subtract(T, apply_diffmap_subtract(Ch, Diff, FloatMap)); iter_diffmap_subtract([], FloatMap) -> FloatMap. iter_diffmap_add([{Ch, Diff}|T], FloatMap) -> iter_diffmap_add(T, apply_diffmap_add(Ch, Diff, FloatMap)); iter_diffmap_add([], FloatMap) -> FloatMap. apply_diffmap_subtract(Ch, Diff, [{Ch, Wt}|T]) -> if Wt == Diff -> [{unused, Wt}|T]; Wt > Diff -> [{Ch, Wt - Diff}, {unused, Diff}|T]; Wt < Diff -> [{unused, Wt}|apply_diffmap_subtract(Ch, Diff - Wt, T)] end; apply_diffmap_subtract(Ch, Diff, [H|T]) -> [H|apply_diffmap_subtract(Ch, Diff, T)]; apply_diffmap_subtract(_Ch, _Diff, []) -> []. apply_diffmap_add(Ch, Diff, [{unused, Wt}|T]) -> if Wt == Diff -> [{Ch, Wt}|T]; Wt > Diff -> [{Ch, Diff}, {unused, Wt - Diff}|T]; Wt < Diff -> [{Ch, Wt}|apply_diffmap_add(Ch, Diff - Wt, T)] end; apply_diffmap_add(Ch, Diff, [H|T]) -> [H|apply_diffmap_add(Ch, Diff, T)]; apply_diffmap_add(_Ch, _Diff, []) -> []. combine_neighbors([{Ch, Wt1}, {Ch, Wt2}|T]) -> combine_neighbors([{Ch, Wt1 + Wt2}|T]); combine_neighbors([H|T]) -> [H|combine_neighbors(T)]; combine_neighbors([]) -> []. collapse_unused_in_float_map([{Ch, Wt1}, {unused, Wt2}|T]) -> collapse_unused_in_float_map([{Ch, Wt1 + Wt2}|T]); collapse_unused_in_float_map([{unused, _}] = L) -> L; % Degenerate case only collapse_unused_in_float_map([H|T]) -> [H|collapse_unused_in_float_map(T)]; collapse_unused_in_float_map([]) -> []. chash_float_map_to_nextfloat_list(FloatMap) when length(FloatMap) > 0 -> %% QuickCheck found a bug ... need to weed out stuff smaller than %% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE here. FM1 = [P || {_X, Y} = P <- FloatMap, Y > ?SMALLEST_SIGNIFICANT_FLOAT_SIZE], {_Sum, NFs0} = lists:foldl(fun({Name, Amount}, {Sum, List}) -> {Sum+Amount, [{Sum+Amount, Name}|List]} end, {0, []}, FM1), lists:reverse(NFs0). chash_nextfloat_list_to_gb_tree([]) -> gb_trees:balance(gb_trees:from_orddict([])); chash_nextfloat_list_to_gb_tree(NextFloatList) -> {_FloatPos, Name} = lists:last(NextFloatList), %% QuickCheck found a bug ... it really helps to add a catch-all item %% at the far "right" of the list ... 42.0 is much greater than 1.0. NFs = NextFloatList ++ [{42.0, Name}], gb_trees:balance(gb_trees:from_orddict(orddict:from_list(NFs))). -spec chash_gb_next(float(), float_tree()) -> {float(), owner_name()}. chash_gb_next(X, {_, GbTree}) -> chash_gb_next1(X, GbTree). chash_gb_next1(X, {Key, Val, Left, _Right}) when X < Key -> case chash_gb_next1(X, Left) of nil -> {Key, Val}; Res -> Res end; chash_gb_next1(X, {Key, _Val, _Left, Right}) when X >= Key -> chash_gb_next1(X, Right); chash_gb_next1(_X, nil) -> nil. %% @doc Not used directly, but can give a developer an idea of how well %% chash_float_map_to_nextfloat_list will do for a given value of Max. %% %% For example: %% %% NewFloatMap = make_float_map([{unused, 1.0}], %% [{a,100}, {b, 100}, {c, 10}]), %% ChashMap = chash_scale_to_int_interval(NewFloatMap, 100), %% io:format("QQQ: int int = ~p\n", [ChashIntInterval]), %% -> [{a,1,47},{b,48,94},{c,94,100}] %% %% %% Interpretation: out of the 100 slots: %% chash_scale_to_int_interval(NewFloatMap, Max) -> chash_scale_to_int_interval(NewFloatMap, 0, Max). %% @type nextfloat_list() = list({float(), brick()}). A nextfloat_list %% differs from a float_map in two respects: 1) nextfloat_list contains %% tuples with the brick name in 2nd position, 2) the float() at each %% position I_n > I_m, for all n, m such that n > m. %% For example, a nextfloat_list of the float_map example above, %% [{0.25, {br1, nd1}}, {0.75, {br2, nd1}}, {1.0, {br3, nd1}]. chash_scale_to_int_interval([{Ch, _Wt}], Cur, Max) -> [{Ch, Cur, Max}]; chash_scale_to_int_interval([{Ch, Wt}|T], Cur, Max) -> Int = trunc(Wt * Max), [{Ch, Cur + 1, Cur + Int}|chash_scale_to_int_interval(T, Cur + Int, Max)]. %%%%%%%%%%%%% %% @doc Make a pretty/human-friendly version of a float map that describes %% integer ranges between 1 and `Scale'. -spec pretty_with_integers(float_map(), integer()) -> [owner_int_range()]. pretty_with_integers(Map, Scale) -> chash_scale_to_int_interval(Map, Scale). %% @doc Make a pretty/human-friendly version of a float map (based %% upon a float map created from `OldWeights' and `NewWeights') that %% describes integer ranges between 1 and `Scale'. -spec pretty_with_integers(owner_weight_list(), owner_weight_list(),integer())-> [owner_int_range()]. pretty_with_integers(OldWeights, NewWeights, Scale) -> chash_scale_to_int_interval( make_float_map(make_float_map(OldWeights), NewWeights), Scale). %% @doc Create a float tree, which is the rapid lookup data structure %% for consistent hash queries. -spec make_tree(float_map()) -> float_tree(). make_tree(Map) -> chash_nextfloat_list_to_gb_tree( chash_float_map_to_nextfloat_list(Map)). %% @doc Low-level function for querying a float tree: the (floating %% point) point within the unit interval. -spec query_tree(float(), float_tree()) -> {float(), owner_name()}. query_tree(Val, Tree) when is_float(Val), 0.0 =< Val, Val =< 1.0 -> chash_gb_next(Val, Tree). %% @doc Create a sample float map. -spec make_demo_map1() -> float_map(). make_demo_map1() -> {_, Res} = make_demo_map1_i(), Res. make_demo_map1_i() -> Fail1 = {b, 100}, L1 = [{a, 100}, Fail1, {c, 100}], L2 = L1 ++ [{d, 100}, {e, 100}], L3 = L2 -- [Fail1], L4 = L3 ++ [{giant, 300}], {L4, lists:foldl(fun(New, Old) -> make_float_map(Old, New) end, make_float_map(L1), [L2, L3, L4])}. %% @doc Create a sample float map. -spec make_demo_map2() -> float_map(). make_demo_map2() -> {L0, _} = make_demo_map1_i(), L1 = L0 ++ [{h, 100}], L2 = L1 ++ [{i, 100}], L3 = L2 ++ [{j, 100}], lists:foldl(fun(New, Old) -> make_float_map(Old, New) end, make_demo_map1(), [L1, L2, L3]). %% @doc Create a human-friendly summary of a float map. %% %% The two parts of the summary are: a per-owner total of the unit %% interval range(s) owned by each owner, and a total sum of all %% per-owner ranges (which should be 1.0 but is not enforced). -spec sum_map_weights(float_map()) -> {{per_owner, float_map()}, {weight_sum, float()}}. sum_map_weights(Map) -> L = sum_map_weights(lists:sort(Map), undefined, 0.0) -- [{undefined,0.0}], WeightSum = lists:sum([Weight || {_, Weight} <- L]), {{per_owner, L}, {weight_sum, WeightSum}}. sum_map_weights([{SZ, Weight}|T], SZ, SZ_total) -> sum_map_weights(T, SZ, SZ_total + Weight); sum_map_weights([{SZ, Weight}|T], LastSZ, LastSZ_total) -> [{LastSZ, LastSZ_total}|sum_map_weights(T, SZ, Weight)]; sum_map_weights([], LastSZ, LastSZ_total) -> [{LastSZ, LastSZ_total}]. %% @doc Query a float map with a binary (inefficient). -spec hash_binary_via_float_map(binary(), float_map()) -> {float(), owner_name()}. hash_binary_via_float_map(Key, Map) -> Tree = make_tree(Map), <> = crypto:hash(sha, Key), Float = Int / ?SHA_MAX, query_tree(Float, Tree). %% @doc Query a float tree with a binary. -spec hash_binary_via_float_tree(binary(), float_tree()) -> {float(), owner_name()}. hash_binary_via_float_tree(Key, Tree) -> <> = crypto:hash(sha, Key), Float = Int / ?SHA_MAX, query_tree(Float, Tree). %%%%% @doc Various usage examples, see source code below this function %%%%% for full details. zzz_usage_details() -> %% %% Make a map. See the code for make_demo_map1() for the order of %% %% additions & deletions. Here's a brief summary of the 4 steps. %% %% %% %% * 'a' through 'e' are weighted @ 100. %% %% * 'giant' is weighted @ 300. %% %% * 'b' is removed at step #3. %% 40> M1 = machi_chash:make_demo_map1(). %% [{a,0.09285714285714286}, %% {giant,0.10714285714285715}, %% {d,0.026190476190476153}, %% {giant,0.10714285714285715}, %% {a,0.04999999999999999}, %% {giant,0.04999999999999999}, %% {d,0.04999999999999999}, %% {giant,0.050000000000000044}, %% {d,0.06666666666666671}, %% {e,0.009523809523809434}, %% {giant,0.05714285714285716}, %% {c,0.14285714285714285}, %% {giant,0.05714285714285716}, %% {e,0.13333333333333341}] %% %% Map M1 onto the interval of integers 0-10,1000 %% %% %% %% output = list({SZ_name::term(), Start::integer(), End::integer()}) %% 41> machi_chash:pretty_with_integers(M1, 10*1000). %% [{a,1,928}, %% {giant,929,1999}, %% {d,2000,2260}, %% {giant,2261,3331}, %% {a,3332,3830}, %% {giant,3831,4329}, %% {d,4330,4828}, %% {giant,4829,5328}, %% {d,5329,5994}, %% {e,5995,6089}, %% {giant,6090,6660}, %% {c,6661,8088}, %% {giant,8089,8659}, %% {e,8659,10000}] %% %% Sum up all of the weights, make sure it's what we expect: %% 55> machi_chash:sum_map_weights(M1). %% {{per_owner,[{a,0.14285714285714285}, %% {c,0.14285714285714285}, %% {d,0.14285714285714285}, %% {e,0.14285714285714285}, %% {giant,0.42857142857142866}]}, %% {weight_sum,1.0}} %% %% Make a tree, then query it %% %% (Hash::float(), tree()) -> {NextLargestBoundary::float(), szone()} %% 58> T1 = machi_chash:make_tree(M1). %% 59> machi_chash:query_tree(0.2555, T1). %% {0.3333333333333333,giant} %% 60> machi_chash:query_tree(0.3555, T1). %% {0.3833333333333333,a} %% 61> machi_chash:query_tree(0.4555, T1). %% {0.4833333333333333,d} %% %% How about hashing a bunch of strings and see what happens? %% 74> Key1 = "Hello, world!". %% "Hello, world!" %% 75> [{K, element(2, machi_chash:hash_binary_via_float_map(K, M1))} || K <- [lists:sublist(Key1, X) || X <- lists:seq(1, length(Key1))]]. %% [{"H",giant}, %% {"He",giant}, %% {"Hel",giant}, %% {"Hell",e}, %% {"Hello",e}, %% {"Hello,",giant}, %% {"Hello, ",e}, %% {"Hello, w",e}, %% {"Hello, wo",giant}, %% {"Hello, wor",d}, %% {"Hello, worl",giant}, %% {"Hello, world",e}, %% {"Hello, world!",d}] ok.