Vowpal Wabbit
explore_internal.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include "hash.h"
4 
5 // get the error code defined in master
6 #include "explore.h"
7 
8 #include <cstdint>
9 #include <stdexcept>
10 #include <algorithm>
11 #include <numeric>
12 #include <cstring>
13 #include <cmath>
14 
15 namespace exploration
16 {
17  const uint64_t a = 0xeece66d5deece66dULL;
18  const uint64_t c = 2147483647;
19 
20  const int bias = 127 << 23u;
21 
22  union int_float
23  {
24  int32_t i;
25  float f;
26  };
27 
28  // uniform random between 0 and 1
29  inline float uniform_random_merand48(uint64_t initial)
30  {
31  initial = a * initial + c;
32  int_float temp;
33  temp.i = ((initial >> 25) & 0x7FFFFF) | bias;
34  return temp.f - 1;
35  }
36 
37  template<typename It>
38  int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last, std::random_access_iterator_tag /* pdf_tag */)
39  {
40  if (pdf_last < pdf_first)
42 
43  size_t num_actions = pdf_last - pdf_first;
44  if (num_actions == 0)
46 
47  if (top_action >= num_actions)
48  top_action = (uint32_t)num_actions - 1;
49 
50  float prob = epsilon / (float)num_actions;
51 
52  for (It d = pdf_first; d != pdf_last; ++d)
53  *d = prob;
54 
55  *(pdf_first + top_action) += 1.f - epsilon;
56 
57  return S_EXPLORATION_OK;
58  }
59 
60  template<typename It>
61  int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
62  {
63  typedef typename std::iterator_traits<It>::iterator_category pdf_category;
64  return generate_epsilon_greedy(epsilon, top_action, pdf_first, pdf_last, pdf_category());
65  }
66 
67  template<typename InputIt, typename OutputIt>
68  int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, std::input_iterator_tag /* scores_tag */, OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag /* pdf_tag */)
69  {
70  if (scores_last < scores_first || pdf_last < pdf_first)
72 
73  size_t num_actions_scores = scores_last - scores_first;
74  size_t num_actions_pdf = pdf_last - pdf_first;
75 
76  if (num_actions_scores != num_actions_pdf)
77  {
78  // fallback to the minimum
79  scores_last = scores_first + std::min(num_actions_scores, num_actions_pdf);
80  OutputIt pdf_new_last = pdf_first + std::min(num_actions_scores, num_actions_pdf);
81 
82  // zero out pdf
83  for (OutputIt d = pdf_new_last; d != pdf_last; ++d)
84  *d = 0;
85 
86  pdf_last = pdf_new_last;
87  }
88 
89  if (pdf_last - pdf_first == 0)
91 
92  float norm = 0.;
93  float max_score = lambda > 0 ? *std::max_element(scores_first, scores_last)
94  : *std::min_element(scores_first, scores_last);
95 
96  InputIt s = scores_first;
97  for (OutputIt d = pdf_first; d != pdf_last && s != scores_last; ++d, ++s)
98  {
99  float prob = exp(lambda*(*s - max_score));
100  norm += prob;
101 
102  *d = prob;
103  }
104 
105  // normalize
106  for (OutputIt d = pdf_first; d != pdf_last; ++d)
107  *d /= norm;
108 
109  return S_EXPLORATION_OK;
110  }
111 
112  template<typename InputIt, typename OutputIt>
113  int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
114  {
115  typedef typename std::iterator_traits<InputIt>::iterator_category scores_category;
116  typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
117 
118  return generate_softmax(lambda, scores_first, scores_last, scores_category(), pdf_first, pdf_last, pdf_category());
119  }
120 
121  template<typename InputIt, typename OutputIt>
122  int generate_bag(InputIt top_actions_first, InputIt top_actions_last, std::input_iterator_tag /* top_actions_tag */, OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag /* pdf_tag */)
123  {
124  // iterators don't support <= in general
125  if (pdf_first == pdf_last || pdf_last < pdf_first)
127 
128  float num_models = (float)std::accumulate(top_actions_first, top_actions_last, 0.);
129  if (num_models <= 1e-6)
130  {
131  // based on above checks we have at least 1 element in pdf
132  *pdf_first = 1;
133  for (OutputIt d = pdf_first + 1; d != pdf_last; ++d)
134  *d = 0;
135 
136  return S_EXPLORATION_OK;
137  }
138 
139  // divide late to improve numeric stability
140  InputIt t_a = top_actions_first;
141  float normalizer = 1.f / num_models;
142  for (OutputIt d = pdf_first; d != pdf_last && t_a != top_actions_last; ++d, ++t_a)
143  *d = *t_a * normalizer;
144 
145  return S_EXPLORATION_OK;
146  }
147 
148  template<typename InputIt, typename OutputIt>
149  int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
150  {
151  typedef typename std::iterator_traits<InputIt>::iterator_category top_actions_category;
152  typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
153 
154  return generate_bag(top_actions_first, top_actions_last, top_actions_category(), pdf_first, pdf_last, pdf_category());
155  }
156 
157  template<typename It>
158  int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last, std::random_access_iterator_tag /* pdf_tag */)
159  {
160  // iterators don't support <= in general
161  if (pdf_first == pdf_last || pdf_last < pdf_first)
163 
164  size_t num_actions = pdf_last - pdf_first;
165 
166  if (minimum_uniform > 0.999) // uniform exploration
167  {
168  size_t support_size = num_actions;
169  if (!update_zero_elements)
170  {
171  for (It d = pdf_first; d != pdf_last; ++d)
172  if (*d == 0)
173  support_size--;
174  }
175 
176  for (It d = pdf_first; d != pdf_last; ++d)
177  if (update_zero_elements || *d > 0)
178  *d = 1.f / support_size;
179 
180  return S_EXPLORATION_OK;
181  }
182 
183  minimum_uniform /= num_actions;
184  float touched_mass = 0.;
185  float untouched_mass = 0.;
186  uint16_t num_actions_touched = 0;
187 
188  for (It d = pdf_first; d != pdf_last; ++d)
189  {
190  auto& prob = *d;
191  if ((prob > 0 || (prob == 0 && update_zero_elements)) && prob <= minimum_uniform)
192  {
193  touched_mass += minimum_uniform;
194  prob = minimum_uniform;
195  ++num_actions_touched;
196  }
197  else
198  untouched_mass += prob;
199  }
200 
201  if (touched_mass > 0.)
202  {
203  if (touched_mass > 0.999)
204  {
205  minimum_uniform = (1.f - untouched_mass) / (float)num_actions_touched;
206  for (It d = pdf_first; d != pdf_last; ++d)
207  {
208  auto& prob = *d;
209  if ((prob > 0 || (prob == 0 && update_zero_elements)) && prob <= minimum_uniform)
210  prob = minimum_uniform;
211  }
212  }
213  else
214  {
215  float ratio = (1.f - touched_mass) / untouched_mass;
216  for (It d = pdf_first; d != pdf_last; ++d)
217  if (*d > minimum_uniform)
218  *d *= ratio;
219  }
220  }
221 
222  return S_EXPLORATION_OK;
223  }
224 
225  template<typename It>
226  int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
227  {
228  typedef typename std::iterator_traits<It>::iterator_category pdf_category;
229 
230  return enforce_minimum_probability(minimum_uniform, update_zero_elements, pdf_first, pdf_last, pdf_category());
231  }
232 
233  // Warning: `seed` must be sufficiently random for the PRNG to produce uniform random values. Using sequential seeds will result in a very biased distribution.
234  // If unsure how to update seed between calls, merand48 (in rand48.h) can be used to inplace mutate it.
235  template<typename It>
236  int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t& chosen_index, std::input_iterator_tag /* pdf_category */)
237  {
238  if (pdf_first == pdf_last || pdf_last < pdf_first)
240  // Create a discrete_distribution based on the returned weights. This class handles the
241  // case where the sum of the weights is < or > 1, by normalizing agains the sum.
242  float total = 0.f;
243  for (It pdf = pdf_first; pdf != pdf_last; ++pdf)
244  {
245  if (*pdf < 0)
246  *pdf = 0;
247 
248  total += *pdf;
249  }
250 
251  // assume the first is the best
252  if (total == 0)
253  {
254  chosen_index = 0;
255  *pdf_first = 1;
256  return S_EXPLORATION_OK;
257  }
258 
259  float draw = total * uniform_random_merand48(seed);
260  if (draw > total) //make very sure that draw can not be greater than total.
261  draw = total;
262 
263  bool index_found = false; //found chosen action
264  float sum = 0.f;
265  uint32_t i = 0;
266  for (It pdf = pdf_first; pdf != pdf_last; ++pdf, ++i)
267  {
268  sum += *pdf;
269  if (!index_found && sum > draw)
270  {
271  chosen_index = i;
272  index_found = true;
273  }
274  *pdf /= total;
275  }
276 
277  if(!index_found)
278  chosen_index = i - 1;
279 
280  return S_EXPLORATION_OK;
281  }
282 
283  // Warning: `seed` must be sufficiently random for the PRNG to produce uniform random values. Using sequential seeds will result in a very biased distribution.
284  // If unsure how to update seed between calls, merand48 (in rand48.h) can be used to inplace mutate it.
285  template<typename It>
286  int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t& chosen_index)
287  {
288  typedef typename std::iterator_traits<It>::iterator_category pdf_category;
289  return sample_after_normalizing(seed, pdf_first, pdf_last, chosen_index, pdf_category());
290  }
291 
292  // Warning: `seed` must be sufficiently random for the PRNG to produce uniform random values. Using sequential seeds will result in a very biased distribution.
293  // If unsure how to update seed between calls, merand48 (in rand48.h) can be used to inplace mutate it.
294  template<typename It>
295  int sample_after_normalizing(const char* seed, It pdf_first, It pdf_last, uint32_t& chosen_index, std::random_access_iterator_tag pdf_category)
296  {
297  uint64_t seed_hash = uniform_hash(seed, strlen(seed), 0);
298  return sample_after_normalizing(seed_hash, pdf_first, pdf_last, chosen_index, pdf_category);
299  }
300 
301  // Warning: `seed` must be sufficiently random for the PRNG to produce uniform random values. Using sequential seeds will result in a very biased distribution.
302  // If unsure how to update seed between calls, merand48 (in rand48.h) can be used to inplace mutate it.
303  template<typename It>
304  int sample_after_normalizing(const char* seed, It pdf_first, It pdf_last, uint32_t& chosen_index)
305  {
306  typedef typename std::iterator_traits<It>::iterator_category pdf_category;
307  return sample_after_normalizing(seed, pdf_first, pdf_last, chosen_index, pdf_category());
308  }
309 
310  template<typename ActionIt>
311  int swap_chosen(ActionIt action_first, ActionIt action_last, std::forward_iterator_tag /* action_category */, uint32_t chosen_index)
312  {
313  if ( action_last < action_first )
315 
316  size_t action_size = action_last - action_first;
317 
318  if ( action_size == 0 )
320 
321  if ( chosen_index >= action_size )
323 
324  // swap top element with chosen one
325  if ( chosen_index != 0 ) {
326  std::iter_swap(action_first, action_first + chosen_index);
327  }
328 
329  return S_EXPLORATION_OK;
330  }
331 
332  template<typename ActionsIt>
333  int swap_chosen(ActionsIt action_first, ActionsIt action_last, uint32_t chosen_index) {
334  typedef typename std::iterator_traits<ActionsIt>::iterator_category actionit_category;
335  return swap_chosen(action_first, action_last, actionit_category(), chosen_index);
336  }
337 } // end-of-namespace
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
void accumulate(vw &all, parameters &weights, size_t offset)
Definition: accumulate.cc:20
const int bias
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
const uint64_t a
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
Swap the first value with the chosen index.
#define E_EXPLORATION_BAD_RANGE
Definition: explore.h:4
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
const uint64_t c
#define S_EXPLORATION_OK
Definition: explore.h:3
float uniform_random_merand48(uint64_t initial)