17 const uint64_t
a = 0xeece66d5deece66dULL;
18 const uint64_t
c = 2147483647;
20 const int bias = 127 << 23u;
31 initial = a * initial +
c;
33 temp.
i = ((initial >> 25) & 0x7FFFFF) |
bias;
38 int generate_epsilon_greedy(
float epsilon, uint32_t top_action, It pdf_first, It pdf_last, std::random_access_iterator_tag )
40 if (pdf_last < pdf_first)
43 size_t num_actions = pdf_last - pdf_first;
47 if (top_action >= num_actions)
48 top_action = (uint32_t)num_actions - 1;
50 float prob = epsilon / (float)num_actions;
52 for (It d = pdf_first; d != pdf_last; ++d)
55 *(pdf_first + top_action) += 1.
f - epsilon;
63 typedef typename std::iterator_traits<It>::iterator_category pdf_category;
67 template<
typename InputIt,
typename OutputIt>
68 int generate_softmax(
float lambda, InputIt scores_first, InputIt scores_last, std::input_iterator_tag , OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag )
70 if (scores_last < scores_first || pdf_last < pdf_first)
73 size_t num_actions_scores = scores_last - scores_first;
74 size_t num_actions_pdf = pdf_last - pdf_first;
76 if (num_actions_scores != num_actions_pdf)
79 scores_last = scores_first + std::min(num_actions_scores, num_actions_pdf);
80 OutputIt pdf_new_last = pdf_first + std::min(num_actions_scores, num_actions_pdf);
83 for (OutputIt d = pdf_new_last; d != pdf_last; ++d)
86 pdf_last = pdf_new_last;
89 if (pdf_last - pdf_first == 0)
93 float max_score = lambda > 0 ? *std::max_element(scores_first, scores_last)
94 : *std::min_element(scores_first, scores_last);
96 InputIt s = scores_first;
97 for (OutputIt d = pdf_first; d != pdf_last && s != scores_last; ++d, ++s)
99 float prob = exp(lambda*(*s - max_score));
106 for (OutputIt d = pdf_first; d != pdf_last; ++d)
112 template<
typename InputIt,
typename OutputIt>
113 int generate_softmax(
float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
115 typedef typename std::iterator_traits<InputIt>::iterator_category scores_category;
116 typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
118 return generate_softmax(lambda, scores_first, scores_last, scores_category(), pdf_first, pdf_last, pdf_category());
121 template<
typename InputIt,
typename OutputIt>
122 int generate_bag(InputIt top_actions_first, InputIt top_actions_last, std::input_iterator_tag , OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag )
125 if (pdf_first == pdf_last || pdf_last < pdf_first)
128 float num_models = (float)
std::accumulate(top_actions_first, top_actions_last, 0.);
129 if (num_models <= 1e-6)
133 for (OutputIt d = pdf_first + 1; d != pdf_last; ++d)
140 InputIt t_a = top_actions_first;
141 float normalizer = 1.f / num_models;
142 for (OutputIt d = pdf_first; d != pdf_last && t_a != top_actions_last; ++d, ++t_a)
143 *d = *t_a * normalizer;
148 template<
typename InputIt,
typename OutputIt>
149 int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
151 typedef typename std::iterator_traits<InputIt>::iterator_category top_actions_category;
152 typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
154 return generate_bag(top_actions_first, top_actions_last, top_actions_category(), pdf_first, pdf_last, pdf_category());
157 template<
typename It>
161 if (pdf_first == pdf_last || pdf_last < pdf_first)
164 size_t num_actions = pdf_last - pdf_first;
166 if (minimum_uniform > 0.999)
168 size_t support_size = num_actions;
169 if (!update_zero_elements)
171 for (It d = pdf_first; d != pdf_last; ++d)
176 for (It d = pdf_first; d != pdf_last; ++d)
177 if (update_zero_elements || *d > 0)
178 *d = 1.f / support_size;
183 minimum_uniform /= num_actions;
184 float touched_mass = 0.;
185 float untouched_mass = 0.;
186 uint16_t num_actions_touched = 0;
188 for (It d = pdf_first; d != pdf_last; ++d)
191 if ((
prob > 0 || (
prob == 0 && update_zero_elements)) &&
prob <= minimum_uniform)
193 touched_mass += minimum_uniform;
194 prob = minimum_uniform;
195 ++num_actions_touched;
198 untouched_mass +=
prob;
201 if (touched_mass > 0.)
203 if (touched_mass > 0.999)
205 minimum_uniform = (1.f - untouched_mass) / (
float)num_actions_touched;
206 for (It d = pdf_first; d != pdf_last; ++d)
209 if ((
prob > 0 || (
prob == 0 && update_zero_elements)) &&
prob <= minimum_uniform)
210 prob = minimum_uniform;
215 float ratio = (1.f - touched_mass) / untouched_mass;
216 for (It d = pdf_first; d != pdf_last; ++d)
217 if (*d > minimum_uniform)
225 template<
typename It>
228 typedef typename std::iterator_traits<It>::iterator_category pdf_category;
235 template<
typename It>
238 if (pdf_first == pdf_last || pdf_last < pdf_first)
243 for (It pdf = pdf_first; pdf != pdf_last; ++pdf)
263 bool index_found =
false;
266 for (It pdf = pdf_first; pdf != pdf_last; ++pdf, ++
i)
269 if (!index_found && sum > draw)
278 chosen_index = i - 1;
285 template<
typename It>
288 typedef typename std::iterator_traits<It>::iterator_category pdf_category;
294 template<
typename It>
295 int sample_after_normalizing(
const char* seed, It pdf_first, It pdf_last, uint32_t& chosen_index, std::random_access_iterator_tag pdf_category)
297 uint64_t seed_hash =
uniform_hash(seed, strlen(seed), 0);
303 template<
typename It>
306 typedef typename std::iterator_traits<It>::iterator_category pdf_category;
310 template<
typename ActionIt>
311 int swap_chosen(ActionIt action_first, ActionIt action_last, std::forward_iterator_tag , uint32_t chosen_index)
313 if ( action_last < action_first )
316 size_t action_size = action_last - action_first;
318 if ( action_size == 0 )
321 if ( chosen_index >= action_size )
325 if ( chosen_index != 0 ) {
326 std::iter_swap(action_first, action_first + chosen_index);
332 template<
typename ActionsIt>
333 int swap_chosen(ActionsIt action_first, ActionsIt action_last, uint32_t chosen_index) {
334 typedef typename std::iterator_traits<ActionsIt>::iterator_category actionit_category;
335 return swap_chosen(action_first, action_last, actionit_category(), chosen_index);
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
void accumulate(vw &all, parameters &weights, size_t offset)
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
Swap the first value with the chosen index.
#define E_EXPLORATION_BAD_RANGE
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
float uniform_random_merand48(uint64_t initial)