From b1a3ab5e7431304aaf71166edffdb5bb7daa725f Mon Sep 17 00:00:00 2001 From: Gregory Burd Date: Sun, 24 Feb 2013 16:29:54 -0500 Subject: [PATCH] Added an erlang implementation of the murmur3 hash function which is only a bit slower than phash2 but may generate hash values that are more uniformly distributed in our trie structure. --- src/murmur3.erl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/murmur3.erl diff --git a/src/murmur3.erl b/src/murmur3.erl new file mode 100644 index 0000000..12b3a73 --- /dev/null +++ b/src/murmur3.erl @@ -0,0 +1,68 @@ +%% -*- coding: utf-8 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright (C) 2013 Gregory Burd. All Rights Reserved. +%% +%% The contents of this file are subject to the Mozilla Public License, +%% Version 2, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Mozilla Public License along with this software. If not, it can be +%% retrieved online at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% + +%% Inspired by: +%% https://github.com/PeterScott/murmur3/blob/master/murmur3.c +%% https://raw.github.com/rdamodharan/redbloom/master/src/murmur3.erl + +-module(murmur3). +-export([hash/1, hash/2, hash/3]). + +-define(MASK32, 16#FFFFFFFF). + +rotl32(Num, R) -> ((Num bsl R) bor (Num bsr (32 - R))) band ?MASK32. + +hash32_mmix(K1) -> + K2 = (K1 * 16#CC9E2D51) band ?MASK32, + K3 = rotl32(K2, 15), + (K3 * 16#1B873593) band ?MASK32. + +hash32_fmix(H) -> + H2 = ((H bxor (H bsr 16)) * 16#85EBCA6B) band ?MASK32, + H3 = ((H2 bxor (H2 bsr 13)) * 16#C2B2AE35) band ?MASK32, + H3 bxor (H3 bsr 16). + +hash32_tail_mix(K1, Hash) -> + Hash bxor hash32_mmix(K1). + +% 4-byte blocks +hash32_body(<>, Hash) -> + K2 = hash32_mmix(K1), + Hash2 = Hash bxor K2, + Hash3 = rotl32(Hash2, 13), + Hash4 = (Hash3 * 5 + 16#E6546B64) band ?MASK32, + hash32_body(Rest, Hash4); +% Tail of the data (1-,2-,3- byte blocks) +hash32_body(<>, Hash) -> hash32_tail_mix(K1, Hash); +hash32_body(<>, Hash) -> hash32_tail_mix(K1, Hash); +hash32_body(<>, Hash) -> hash32_tail_mix(K1, Hash); +hash32_body(<<>>, Hash) -> Hash. + +hash32_impl(Data, Seed) -> + Len = byte_size(Data), + hash32_fmix(hash32_body(Data, Seed) bxor Len). + +hash32(Data, Seed) when is_binary(Data) -> hash32_impl(Data, Seed); +hash32(Data, Seed) -> hash32(term_to_binary(Data), Seed). + +hash32(Data) -> hash32(Data, 16#9E3779B9). % Seed is "golden ratio" FWIW. + +hash(Data) -> hash32(Data). +hash(32, Data) -> hash32(Data). +hash(32, Data, Seed) -> hash32(Data, Seed).