%%%-------------------------------------------------------------------
%%% Copyright (c) 2007-2011 Gemini Mobile Technologies, Inc. All rights reserved.
%%% Copyright (c) 2013-2015 Basho Technologies, Inc. All rights reserved.
%%%
%%% Licensed under the Apache License, Version 2.0 (the "License");
%%% you may not use this file except in compliance with the License.
%%% You may obtain a copy of the License at
%%%
%%% http://www.apache.org/licenses/LICENSE-2.0
%%%
%%% Unless required by applicable law or agreed to in writing, software
%%% distributed under the License is distributed on an "AS IS" BASIS,
%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%%% See the License for the specific language governing permissions and
%%% limitations under the License.
%%%
%%%-------------------------------------------------------------------
%% @doc Consistent hashing library. Also known as "random slicing".
%%
%% This code was originally from the Hibari DB source code at
%% [https://github.com/hibari]
-module(machi_chash).
%% TODO items:
%%
%% 1. Refactor to use bigints instead of floating point numbers. The
%% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE macro below doesn't allow as
%% much wiggle-room for making really small hashing range
%% definitions.
-define(SMALLEST_SIGNIFICANT_FLOAT_SIZE, 0.1e-12).
-define(SHA_MAX, (1 bsl (20*8))).
%% -compile(export_all).
-export([make_float_map/1, make_float_map/2,
sum_map_weights/1,
make_tree/1,
query_tree/2,
hash_binary_via_float_map/2,
hash_binary_via_float_tree/2,
pretty_with_integers/2,
pretty_with_integers/3]).
-export([make_demo_map1/0, make_demo_map2/0]).
-export([zzz_usage_details/0]). % merely to give EDoc a hint of our intent
-type owner_name() :: term().
%% Owner for a range on the unit interval. We are agnostic about its
%% type.
-type weight() :: non_neg_integer().
%% For this library, a weight is an integer which specifies the
%% capacity of a "owner" relative to other owners. For example, if
%% owner A with a weight of 10, and if owner B has a weight of 20,
%% then B will be assigned twice as much of the unit interval as A.
-type float_map() :: [{owner_name(), float()}].
%% A float map subdivides the unit interval, starting at 0.0, to
%% partitions that are assigned to various owners. The sum of all
%% floats must be exactly 1.0 (or close enough for floating point
%% purposes).
-opaque float_tree() :: gb_trees:tree(float(), owner_name()).
%% We can't use gb_trees:tree() because 'nil' (the empty tree) is
%% never valid in our case. But teaching Dialyzer that is difficult.
-type owner_int_range() :: {owner_name(), non_neg_integer(), non_neg_integer()}.
%% Used when "prettying" a float map.
-type owner_weight() :: {owner_name(), weight()}.
-type owner_weight_list() :: [owner_weight()].
%% A owner_weight_list is a definition of brick assignments over the
%% unit interval [0.0, 1.0]. The sum of all floats must be 1.0. For
%% example, [{{br1,nd1}, 0.25}, {{br2,nd1}, 0.5}, {{br3,nd1}, 0.25}].
-export_type([float_map/0, float_tree/0]).
%% @doc Create a float map, based on a basic owner weight list.
-spec make_float_map(owner_weight_list()) -> float_map().
make_float_map(NewOwnerWeights) ->
make_float_map([], NewOwnerWeights).
%% @doc Create a float map, based on an older float map and a new weight
%% list.
%%
%% The weights in the new weight list may be different than (or the
%% same as) whatever weights were used to make the older float map.
-spec make_float_map(float_map(), owner_weight_list()) -> float_map().
make_float_map([], NewOwnerWeights) ->
Sum = add_all_weights(NewOwnerWeights),
DiffMap = [{Ch, Wt/Sum} || {Ch, Wt} <- NewOwnerWeights],
make_float_map2([{unused, 1.0}], DiffMap, NewOwnerWeights);
make_float_map(OldFloatMap, NewOwnerWeights) ->
NewSum = add_all_weights(NewOwnerWeights),
%% Normalize to unit interval
%% NewOwnerWeights2 = [{Ch, Wt / NewSum} || {Ch, Wt} <- NewOwnerWeights],
%% Reconstruct old owner weights (will be normalized to unit interval)
SumOldFloatsDict =
lists:foldl(fun({Ch, Wt}, OrdDict) ->
orddict:update_counter(Ch, Wt, OrdDict)
end, orddict:new(), OldFloatMap),
OldOwnerWeights = orddict:to_list(SumOldFloatsDict),
OldSum = add_all_weights(OldOwnerWeights),
OldChs = [Ch || {Ch, _} <- OldOwnerWeights],
NewChs = [Ch || {Ch, _} <- NewOwnerWeights],
OldChsOnly = OldChs -- NewChs,
%% Mark any space in by a deleted owner as unused.
OldFloatMap2 = lists:map(
fun({Ch, Wt} = ChWt) ->
case lists:member(Ch, OldChsOnly) of
true ->
{unused, Wt};
false ->
ChWt
end
end, OldFloatMap),
%% Create a diff map of changing owners and added owners
DiffMap = lists:map(fun({Ch, NewWt}) ->
case orddict:find(Ch, SumOldFloatsDict) of
{ok, OldWt} ->
{Ch, (NewWt / NewSum) -
(OldWt / OldSum)};
error ->
{Ch, NewWt / NewSum}
end
end, NewOwnerWeights),
make_float_map2(OldFloatMap2, DiffMap, NewOwnerWeights).
make_float_map2(OldFloatMap, DiffMap, _NewOwnerWeights) ->
FloatMap = apply_diffmap(DiffMap, OldFloatMap),
XX = combine_neighbors(collapse_unused_in_float_map(FloatMap)),
XX.
apply_diffmap(DiffMap, FloatMap) ->
SubtractDiff = [{Ch, abs(Diff)} || {Ch, Diff} <- DiffMap, Diff < 0],
AddDiff = [D || {_Ch, Diff} = D <- DiffMap, Diff > 0],
TmpFloatMap = iter_diffmap_subtract(SubtractDiff, FloatMap),
iter_diffmap_add(AddDiff, TmpFloatMap).
add_all_weights(OwnerWeights) ->
lists:foldl(fun({_Ch, Weight}, Sum) -> Sum + Weight end, 0.0, OwnerWeights).
iter_diffmap_subtract([{Ch, Diff}|T], FloatMap) ->
iter_diffmap_subtract(T, apply_diffmap_subtract(Ch, Diff, FloatMap));
iter_diffmap_subtract([], FloatMap) ->
FloatMap.
iter_diffmap_add([{Ch, Diff}|T], FloatMap) ->
iter_diffmap_add(T, apply_diffmap_add(Ch, Diff, FloatMap));
iter_diffmap_add([], FloatMap) ->
FloatMap.
apply_diffmap_subtract(Ch, Diff, [{Ch, Wt}|T]) ->
if Wt == Diff ->
[{unused, Wt}|T];
Wt > Diff ->
[{Ch, Wt - Diff}, {unused, Diff}|T];
Wt < Diff ->
[{unused, Wt}|apply_diffmap_subtract(Ch, Diff - Wt, T)]
end;
apply_diffmap_subtract(Ch, Diff, [H|T]) ->
[H|apply_diffmap_subtract(Ch, Diff, T)];
apply_diffmap_subtract(_Ch, _Diff, []) ->
[].
apply_diffmap_add(Ch, Diff, [{unused, Wt}|T]) ->
if Wt == Diff ->
[{Ch, Wt}|T];
Wt > Diff ->
[{Ch, Diff}, {unused, Wt - Diff}|T];
Wt < Diff ->
[{Ch, Wt}|apply_diffmap_add(Ch, Diff - Wt, T)]
end;
apply_diffmap_add(Ch, Diff, [H|T]) ->
[H|apply_diffmap_add(Ch, Diff, T)];
apply_diffmap_add(_Ch, _Diff, []) ->
[].
combine_neighbors([{Ch, Wt1}, {Ch, Wt2}|T]) ->
combine_neighbors([{Ch, Wt1 + Wt2}|T]);
combine_neighbors([H|T]) ->
[H|combine_neighbors(T)];
combine_neighbors([]) ->
[].
collapse_unused_in_float_map([{Ch, Wt1}, {unused, Wt2}|T]) ->
collapse_unused_in_float_map([{Ch, Wt1 + Wt2}|T]);
collapse_unused_in_float_map([{unused, _}] = L) ->
L; % Degenerate case only
collapse_unused_in_float_map([H|T]) ->
[H|collapse_unused_in_float_map(T)];
collapse_unused_in_float_map([]) ->
[].
chash_float_map_to_nextfloat_list(FloatMap) when length(FloatMap) > 0 ->
%% QuickCheck found a bug ... need to weed out stuff smaller than
%% ?SMALLEST_SIGNIFICANT_FLOAT_SIZE here.
FM1 = [P || {_X, Y} = P <- FloatMap, Y > ?SMALLEST_SIGNIFICANT_FLOAT_SIZE],
{_Sum, NFs0} = lists:foldl(fun({Name, Amount}, {Sum, List}) ->
{Sum+Amount, [{Sum+Amount, Name}|List]}
end, {0, []}, FM1),
lists:reverse(NFs0).
chash_nextfloat_list_to_gb_tree([]) ->
gb_trees:balance(gb_trees:from_orddict([]));
chash_nextfloat_list_to_gb_tree(NextFloatList) ->
{_FloatPos, Name} = lists:last(NextFloatList),
%% QuickCheck found a bug ... it really helps to add a catch-all item
%% at the far "right" of the list ... 42.0 is much greater than 1.0.
NFs = NextFloatList ++ [{42.0, Name}],
gb_trees:balance(gb_trees:from_orddict(orddict:from_list(NFs))).
-spec chash_gb_next(float(), float_tree()) -> {float(), owner_name()}.
chash_gb_next(X, {_, GbTree}) ->
chash_gb_next1(X, GbTree).
chash_gb_next1(X, {Key, Val, Left, _Right}) when X < Key ->
case chash_gb_next1(X, Left) of
nil ->
{Key, Val};
Res ->
Res
end;
chash_gb_next1(X, {Key, _Val, _Left, Right}) when X >= Key ->
chash_gb_next1(X, Right);
chash_gb_next1(_X, nil) ->
nil.
%% @doc Not used directly, but can give a developer an idea of how well
%% chash_float_map_to_nextfloat_list will do for a given value of Max.
%%
%% For example:
%%