Vowpal Wabbit
explore.h
Go to the documentation of this file.
1 #pragma once
2 
3 #define S_EXPLORATION_OK 0
4 #define E_EXPLORATION_BAD_RANGE 1
5 #define E_EXPLORATION_PDF_RANKING_SIZE_MISMATCH 2
6 
7 #include "explore_internal.h"
8 
9 namespace exploration {
20  template<typename It>
21  int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last);
22 
35  template<typename InputIt, typename OutputIt>
36  int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last);
37 
49  template<typename InputIt, typename OutputIt>
50  int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last);
51 
62  template<typename It>
63  int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last);
64 
75  template<typename It>
76  int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t& chosen_index);
77 
88  template<typename It>
89  int sample_after_normalizing(const char* seed, It pdf_first, It pdf_last, uint32_t& chosen_index);
90 
100  template<typename ActionIt>
101  int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index);
102 }
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
Swap the first value with the chosen index.
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.