Vowpal Wabbit
hash.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5  *///
6 // MurmurHash3, by Austin Appleby
7 //
8 // Originals at:
9 // http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
10 // http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.h
11 //
12 // Notes:
13 // 1) this code assumes we can read a 4-byte value from any address
14 // without crashing (i.e non aligned access is supported). This is
15 // not a problem on Intel/x86/AMD64 machines (including new Macs)
16 // 2) It produces different results on little-endian and big-endian machines.
17 //
18 // Adopted for VW and contributed by Ariel Faigon.
19 //
20 
21 //-----------------------------------------------------------------------------
22 // MurmurHash3 was written by Austin Appleby, and is placed in the public
23 // domain. The author hereby disclaims copyright to this source code.
24 
25 // Note - The x86 and x64 versions do _not_ produce the same results, as the
26 // algorithms are optimized for their respective platforms. You can still
27 // compile and run any of them on any platform, but your performance with the
28 // non-native version will be less than optimal.
29 //----
30 #pragma once
31 
32 #include "future_compat.h"
33 
34 #include <sys/types.h>
35 #include <cstdint>
36 
37 // All modern compilers will optimize this to the rotate intrinsic.
38 constexpr inline uint32_t rotl32(uint32_t x, int8_t r) noexcept
39 {
40  return (x << r) | (x >> (32 - r));
41 }
42 
43 namespace MURMUR_HASH_3
44 {
45  //-----------------------------------------------------------------------------
46  // Finalization mix - force all bits of a hash block to avalanche
47  VW_STD14_CONSTEXPR static inline uint32_t fmix(uint32_t h) noexcept
48  {
49  h ^= h >> 16;
50  h *= 0x85ebca6b;
51  h ^= h >> 13;
52  h *= 0xc2b2ae35;
53  h ^= h >> 16;
54 
55  return h;
56  }
57 
58  //-----------------------------------------------------------------------------
59  // Block read - if your platform needs to do endian-swapping or can only
60  // handle aligned reads, do the conversion here
61  constexpr static inline uint32_t getblock(const uint32_t * p, int i) noexcept
62  {
63  return p[i];
64  }
65 }
66 
67 VW_STD14_CONSTEXPR inline uint64_t uniform_hash(const void* key, size_t len, uint64_t seed)
68 {
69  const uint8_t* data = (const uint8_t*)key;
70  const int nblocks = (int)len / 4;
71 
72  uint32_t h1 = (uint32_t)seed;
73 
74  const uint32_t c1 = 0xcc9e2d51;
75  const uint32_t c2 = 0x1b873593;
76 
77  // --- body
78  const uint32_t* blocks = (const uint32_t *)(data + nblocks * 4);
79 
80  for (int i = -nblocks; i; i++)
81  {
82  uint32_t k1 = MURMUR_HASH_3::getblock(blocks, i);
83 
84  k1 *= c1;
85  k1 = rotl32(k1, 15);
86  k1 *= c2;
87 
88  h1 ^= k1;
89  h1 = rotl32(h1, 13);
90  h1 = h1 * 5 + 0xe6546b64;
91  }
92 
93  // --- tail
94  const uint8_t * tail = (const uint8_t*)(data + nblocks * 4);
95 
96  uint32_t k1 = 0;
97 
98  // The 'fall through' comments below silence the implicit-fallthrough warning introduced in GCC 7.
99  // Once we move to C++17 these should be replaced with the [[fallthrough]] attribute.
100  switch (len & 3u)
101  {
102  case 3:
103  k1 ^= tail[2] << 16;
104  // fall through
105  case 2:
106  k1 ^= tail[1] << 8;
107  // fall through
108  case 1: k1 ^= tail[0];
109  k1 *= c1;
110  k1 = rotl32(k1, 15);
111  k1 *= c2; h1 ^= k1;
112  default:
113  break;
114  }
115 
116  // --- finalization
117  h1 ^= len;
118 
119  return MURMUR_HASH_3::fmix(h1);
120 }
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
constexpr uint32_t rotl32(uint32_t x, int8_t r) noexcept
Definition: hash.h:38
static VW_STD14_CONSTEXPR uint32_t fmix(uint32_t h) noexcept
Definition: hash.h:47
static constexpr uint32_t getblock(const uint32_t *p, int i) noexcept
Definition: hash.h:61