3 #define S_EXPLORATION_OK 0 4 #define E_EXPLORATION_BAD_RANGE 1 5 #define E_EXPLORATION_PDF_RANKING_SIZE_MISMATCH 2 35 template<
typename InputIt,
typename OutputIt>
36 int generate_softmax(
float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last);
49 template<
typename InputIt,
typename OutputIt>
50 int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last);
100 template<
typename ActionIt>
101 int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index);
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int swap_chosen(ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
Swap the first value with the chosen index.
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.