Vowpal Wabbit
v_hashmap.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #pragma once
7 #include <cstdio>
8 #include <iostream>
9 #include <cstdlib>
10 #include <cstring>
11 #include "v_array.h"
12 
13 template <class K, class V>
14 class v_hashmap
15 {
16  public:
17  struct hash_elem
18  {
19  bool occupied;
20  K key;
21  V val;
22  uint64_t hash;
23  };
24 
25  bool (*equivalent)(void*, const K&, const K&);
26  bool (*equivalent_no_data)(const K&, const K&);
27  // size_t (*hash)(K);
30  size_t last_position;
31  size_t num_occupants;
32  void* eq_data;
33  // size_t num_linear_steps, num_clear, total_size_at_clears;
34 
35  size_t base_size() { return dat.end_array - dat.begin(); }
36 
37  void set_default_value(const V& def) { default_value = def; }
38 
39  void init_dat(size_t min_size, const V& def, bool (*eq)(void*, const K&, const K&), void* eq_dat = nullptr)
40  {
41  if (min_size < 1023)
42  min_size = 1023;
43  dat.resize(min_size, true); // resize sets to 0 ==> occupied=false
44 
45  default_value = def;
46  equivalent = eq;
47  equivalent_no_data = nullptr;
48  eq_data = eq_dat;
49 
50  last_position = 0;
51  num_occupants = 0;
52  }
53 
54  void init(size_t min_size, const V& def, bool (*eq)(const K&, const K&))
55  {
56  if (min_size < 1023)
57  min_size = 1023;
58  dat.resize(min_size); // resize sets to 0 ==> occupied=false
59 
60  default_value = def;
61  equivalent = nullptr;
62  equivalent_no_data = eq;
63  eq_data = nullptr;
64 
65  last_position = 0;
66  num_occupants = 0;
67  }
68 
69  void init(size_t min_size, bool (*eq)(const K&, const K&))
70  {
71  if (min_size < 1023)
72  min_size = 1023;
73  dat.resize(min_size); // resize sets to 0 ==> occupied=false
74 
75  equivalent = nullptr;
76  equivalent_no_data = eq;
77  eq_data = nullptr;
78 
79  last_position = 0;
80  num_occupants = 0;
81  }
82 
83  v_hashmap(size_t min_size, const V& def, bool (*eq)(void*, const K&, const K&), void* eq_dat = nullptr)
84  {
85  init_dat(min_size, def, eq, eq_dat);
86  }
87  v_hashmap(size_t min_size, V& def, bool (*eq)(const K&, const K&)) { init(min_size, def, eq); }
88  v_hashmap() { init(1023, nullptr); }
89 
90  void set_equivalent(bool (*eq)(void*, const K&, const K&), void* eq_dat = nullptr)
91  {
92  equivalent = eq;
93  eq_data = eq_dat;
94  equivalent_no_data = nullptr;
95  }
96  void set_equivalent(bool (*eq)(const K&, const K&))
97  {
98  equivalent_no_data = eq;
99  eq_data = nullptr;
100  equivalent = nullptr;
101  }
102 
103  void delete_v() { dat.delete_v(); }
104 
106 
107  void clear()
108  {
109  if (num_occupants == 0)
110  return;
111  memset(dat.begin(), 0, base_size() * sizeof(hash_elem));
112  last_position = 0;
113  num_occupants = 0;
114  }
115 
116  void* iterator_next(void* prev)
117  {
118  hash_elem* e = (hash_elem*)prev;
119  if (e == nullptr)
120  return nullptr;
121  e++;
122  while (e != dat.end_array)
123  {
124  if (e->occupied)
125  return e;
126  e++;
127  }
128  return nullptr;
129  }
130 
131  void* iterator()
132  {
133  hash_elem* e = dat.begin();
134  while (e != dat.end_array)
135  {
136  if (e->occupied)
137  return e;
138  e++;
139  }
140  return nullptr;
141  }
142 
143  V* iterator_get_value(void* el)
144  {
145  hash_elem* e = (hash_elem*)el;
146  return &e->val;
147  }
148 
149  void iter(void (*func)(K, V))
150  { // for (size_t lp=0; lp<base_size(); lp++) {
151  for (hash_elem* e = dat.begin(); e != dat.end_array; e++)
152  { // hash_elem* e = dat.begin()+lp;
153  if (e->occupied)
154  { // printf(" [lp=%d\tocc=%d\thash=%llu]\n", lp, e->occupied, e->hash);
155  func(e->key, e->val);
156  }
157  }
158  }
159 
160  void put_after_get_nogrow(const K& key, uint64_t hash, const V& val)
161  { // printf("++[lp=%d\tocc=%d\thash=%llu]\n", last_position, dat[last_position].occupied, hash);
162  dat[last_position].occupied = true;
163  dat[last_position].key = key;
164  dat[last_position].val = val;
165  dat[last_position].hash = hash;
166  }
167 
168  void double_size()
169  { // printf("doubling size!\n");
170  // remember the old occupants
172  tmp.resize(num_occupants + 10);
173  for (hash_elem* e = dat.begin(); e != dat.end_array; e++)
174  if (e->occupied)
175  tmp.push_back(*e);
176 
177  // double the size and clear
178  // std::cerr<<"doubling to "<<(base_size()*2) << " units == " << (base_size()*2*sizeof(hash_elem)) << " bytes / " <<
179  // ((size_t)-1)<<std::endl;
180  dat.resize(base_size() * 2);
181  memset(dat.begin(), 0, base_size() * sizeof(hash_elem));
182 
183  // re-insert occupants
184  for (auto& e : tmp)
185  {
186  get(e.key, e.hash);
187  // std::cerr << "reinserting " << e->key << " at " << last_position << std::endl;
188  put_after_get_nogrow(e.key, e.hash, e.val);
189  }
190  tmp.delete_v();
191  }
192 
193  bool is_equivalent(const K& key, const K& key2)
194  {
195  if ((equivalent == nullptr) && (equivalent_no_data == nullptr))
196  return true;
197  else if (equivalent != nullptr)
198  return equivalent(eq_data, key, key2);
199  else
200  return equivalent_no_data(key, key2);
201  }
202 
203  V& get(const K& key, uint64_t hash)
204  {
205  size_t sz = base_size();
206  size_t first_position = hash % sz;
207  last_position = first_position;
208  while (true)
209  { // if there's nothing there, obviously we don't contain it
210  if (!dat[last_position].occupied)
211  return default_value;
212 
213  // there's something there: maybe it's us
214  if ((dat[last_position].hash == hash) && is_equivalent(key, dat[last_position].key))
215  return dat[last_position].val;
216 
217  // there's something there that's NOT us -- advance pointer
218  // cerr << "+";
219  // num_linear_steps++;
220  last_position++;
221  if (last_position >= sz)
222  last_position = 0;
223 
224  // check to make sure we haven't cycled around -- this is a bug!
225  if (last_position == first_position)
226  THROW("error: v_hashmap did not grow enough!");
227  }
228  }
229 
230  bool contains(const K& key, size_t hash)
231  {
232  size_t sz = base_size();
233  size_t first_position = hash % sz;
234  last_position = first_position;
235  while (true)
236  { // if there's nothing there, obviously we don't contain it
237  if (!dat[last_position].occupied)
238  return false;
239 
240  // there's something there: maybe it's us
241  if ((dat[last_position].hash == hash) && is_equivalent(key, dat[last_position].key))
242  return true;
243 
244  // there's something there that's NOT us -- advance pointer
245  last_position++;
246  if (last_position >= sz)
247  last_position = 0;
248 
249  // check to make sure we haven't cycled around -- this is a bug!
250  if (last_position == first_position)
251  THROW("error: v_hashmap did not grow enough!");
252  }
253  }
254 
255  // only call put_after_get(key, hash, val) if you've already
256  // run get(key, hash). if you haven't already run get, then
257  // you should use put() rather than put_after_get(). these
258  // both will overwrite previous values, if they exist.
259  void put_after_get(const K& key, uint64_t hash, const V& val)
260  {
261  if (!dat[last_position].occupied)
262  {
263  num_occupants++;
264  if (num_occupants * 4 >= base_size()) // grow when we're a quarter full
265  {
266  double_size();
267  get(key, hash); // probably should change last_position-- this is the lazy man's way to do it
268  }
269  }
270 
271  // now actually insert it
272  put_after_get_nogrow(key, hash, val);
273  }
274 
275  void put(const K& key, uint64_t hash, const V& val)
276  {
277  get(key, hash);
278  put_after_get(key, hash, val);
279  }
280 
281  size_t size() { return num_occupants; }
282 };
283 
284 void test_v_hashmap();
void resize(size_t length)
Definition: v_array.h:69
v_hashmap()
Definition: v_hashmap.h:88
v_hashmap(size_t min_size, const V &def, bool(*eq)(void *, const K &, const K &), void *eq_dat=nullptr)
Definition: v_hashmap.h:83
bool(* equivalent)(void *, const K &, const K &)
Definition: v_hashmap.h:25
size_t last_position
Definition: v_hashmap.h:30
V * iterator_get_value(void *el)
Definition: v_hashmap.h:143
~v_hashmap()
Definition: v_hashmap.h:105
void put_after_get(const K &key, uint64_t hash, const V &val)
Definition: v_hashmap.h:259
void test_v_hashmap()
v_array< hash_elem > dat
Definition: v_hashmap.h:29
void * iterator()
Definition: v_hashmap.h:131
size_t num_occupants
Definition: v_hashmap.h:31
void * iterator_next(void *prev)
Definition: v_hashmap.h:116
T *& begin()
Definition: v_array.h:42
void set_equivalent(bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:96
V default_value
Definition: v_hashmap.h:28
void set_default_value(const V &def)
Definition: v_hashmap.h:37
void push_back(const T &new_ele)
Definition: v_array.h:107
void init(size_t min_size, bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:69
void clear()
Definition: v_hashmap.h:107
bool is_equivalent(const K &key, const K &key2)
Definition: v_hashmap.h:193
bool(* equivalent_no_data)(const K &, const K &)
Definition: v_hashmap.h:26
void put_after_get_nogrow(const K &key, uint64_t hash, const V &val)
Definition: v_hashmap.h:160
size_t base_size()
Definition: v_hashmap.h:35
void * eq_data
Definition: v_hashmap.h:32
void double_size()
Definition: v_hashmap.h:168
void init(size_t min_size, const V &def, bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:54
void set_equivalent(bool(*eq)(void *, const K &, const K &), void *eq_dat=nullptr)
Definition: v_hashmap.h:90
void delete_v()
Definition: v_hashmap.h:103
bool contains(const K &key, size_t hash)
Definition: v_hashmap.h:230
v_hashmap(size_t min_size, V &def, bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:87
void delete_v()
Definition: v_array.h:98
void put(const K &key, uint64_t hash, const V &val)
Definition: v_hashmap.h:275
#define THROW(args)
Definition: vw_exception.h:181
void iter(void(*func)(K, V))
Definition: v_hashmap.h:149
size_t size()
Definition: v_hashmap.h:281
void init_dat(size_t min_size, const V &def, bool(*eq)(void *, const K &, const K &), void *eq_dat=nullptr)
Definition: v_hashmap.h:39
T * end_array
Definition: v_array.h:38