Vowpal Wabbit
Classes | Functions | Variables
exploration Namespace Reference

Classes

union  int_float
 

Functions

template<typename It >
int generate_epsilon_greedy (float epsilon, uint32_t top_action, It pmf_first, It pmf_last)
 Generates epsilon-greedy style exploration distribution. More...
 
template<typename InputIt , typename OutputIt >
int generate_softmax (float lambda, InputIt scores_first, InputIt scores_last, OutputIt pmf_first, OutputIt pmf_last)
 Generates softmax style exploration distribution. More...
 
template<typename InputIt , typename OutputIt >
int generate_bag (InputIt top_actions_first, InputIt top_actions_last, OutputIt pmf_first, OutputIt pmf_last)
 Generates an exploration distribution according to votes on actions. More...
 
template<typename It >
int enforce_minimum_probability (float minimum_uniform, bool update_zero_elements, It pmf_first, It pmf_last)
 Updates the pmf to ensure each action is explored with at least minimum_uniform/num_actions. More...
 
template<typename It >
int sample_after_normalizing (uint64_t seed, It pmf_first, It pmf_last, uint32_t &chosen_index)
 Sample an index from the provided pmf. If the pmf is not normalized it will be updated in-place. More...
 
template<typename It >
int sample_after_normalizing (const char *seed, It pmf_first, It pmf_last, uint32_t &chosen_index)
 Sample an index from the provided pmf. If the pmf is not normalized it will be updated in-place. More...
 
template<typename ActionIt >
int swap_chosen (ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
 Swap the first value with the chosen index. More...
 
template<typename It >
int sample_pdf (uint64_t *p_seed, It pdf_first, It pdf_last, float &chosen_value, float &pdf_value)
 Sample a continuous value from the provided pdf. More...
 
float uniform_random_merand48_advance (uint64_t &initial)
 
float uniform_random_merand48 (uint64_t initial)
 
template<typename It >
int generate_epsilon_greedy (float epsilon, uint32_t top_action, It pmf_first, It pmf_last, std::random_access_iterator_tag)
 
template<typename InputIt , typename OutputIt >
int generate_softmax (float lambda, InputIt scores_first, InputIt scores_last, std::input_iterator_tag, OutputIt pmf_first, OutputIt pmf_last, std::random_access_iterator_tag)
 
template<typename InputIt , typename OutputIt >
int generate_bag (InputIt top_actions_first, InputIt top_actions_last, std::input_iterator_tag, OutputIt pmf_first, OutputIt pmf_last, std::random_access_iterator_tag)
 
template<typename It >
int enforce_minimum_probability (float minimum_uniform, bool update_zero_elements, It pmf_first, It pmf_last, std::random_access_iterator_tag)
 
template<typename It >
int sample_after_normalizing (uint64_t seed, It pmf_first, It pmf_last, uint32_t &chosen_index, std::input_iterator_tag)
 
template<typename It >
int sample_after_normalizing (const char *seed, It pmf_first, It pmf_last, uint32_t &chosen_index, std::random_access_iterator_tag pmf_category)
 
template<typename ActionIt >
int swap_chosen (ActionIt action_first, ActionIt action_last, std::forward_iterator_tag, uint32_t chosen_index)
 
template<typename ActionsIt >
int swap_chosen (ActionsIt action_first, ActionsIt action_last, uint32_t chosen_index)
 
template<typename It >
int sample_scores (uint64_t *p_seed, It scores_first, It scores_last, uint32_t &chosen_index, std::random_access_iterator_tag)
 
template<typename It >
int sample_pdf (uint64_t *p_seed, It pdf_first, It pdf_last, float &chosen_value, float &pdf_value, std::random_access_iterator_tag)
 Sample a continuous value from the provided pdf. More...
 

Variables

constexpr uint64_t CONSTANT_A = 0xeece66d5deece66dULL
 
constexpr uint64_t CONSTANT_C = 2147483647
 
constexpr int BIAS = 127 << 23u
 

Function Documentation

template<typename It >
int exploration::enforce_minimum_probability ( float  minimum_uniform,
bool  update_zero_elements,
It  pmf_first,
It  pmf_last 
)

Updates the pmf to ensure each action is explored with at least minimum_uniform/num_actions.

Template Parameters
ItIterator type of the pmf. Must be a RandomAccessIterator.
Parameters
minimum_uniformThe minimum amount of uniform distribution to impose on the pmf.
update_zero_elementsIf true elements with zero probability are updated, otherwise those actions will be unchanged.
pmf_firstIterator pointing to the pre-allocated beginning of the pmf to be generated by this function.
pmf_lastIterator pointing to the pre-allocated end of the pmf to be generated by this function.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::enforce_minimum_probability ( float  minimum_uniform,
bool  update_zero_elements,
It  pmf_first,
It  pmf_last,
std::random_access_iterator_tag   
)
template<typename InputIt , typename OutputIt >
int exploration::generate_bag ( InputIt  top_actions_first,
InputIt  top_actions_last,
OutputIt  pmf_first,
OutputIt  pmf_last 
)

Generates an exploration distribution according to votes on actions.

Template Parameters
InputItIterator type of the input actions. Must be an InputIterator.
OutputItIterator type of the pre-allocated pmf. Must be a RandomAccessIterator.
Parameters
top_actions_firstIterator pointing to the beginning of the top actions.
top_actions_lastIterator pointing to the end of the top actions.
pmf_firstIterator pointing to the pre-allocated beginning of the pmf to be generated by this function.
pmf_lastIterator pointing to the pre-allocated end of the pmf to be generated by this function.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename InputIt , typename OutputIt >
int exploration::generate_bag ( InputIt  top_actions_first,
InputIt  top_actions_last,
std::input_iterator_tag  ,
OutputIt  pmf_first,
OutputIt  pmf_last,
std::random_access_iterator_tag   
)
template<typename It >
int exploration::generate_epsilon_greedy ( float  epsilon,
uint32_t  top_action,
It  pmf_first,
It  pmf_last 
)

Generates epsilon-greedy style exploration distribution.

Template Parameters
ItIterator type of the pre-allocated pmf. Must be a RandomAccessIterator.
Parameters
epsilonMinimum probability used to explore among options. Each action is explored with at least epsilon/num_actions.
top_actionIndex of the exploit actions. This action will be get probability mass of 1-epsilon + (epsilon/num_actions).
pmf_firstIterator pointing to the pre-allocated beginning of the pmf to be generated by this function.
pmf_lastIterator pointing to the pre-allocated end of the pmf to be generated by this function.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::generate_epsilon_greedy ( float  epsilon,
uint32_t  top_action,
It  pmf_first,
It  pmf_last,
std::random_access_iterator_tag   
)
template<typename InputIt , typename OutputIt >
int exploration::generate_softmax ( float  lambda,
InputIt  scores_first,
InputIt  scores_last,
OutputIt  pmf_first,
OutputIt  pmf_last 
)

Generates softmax style exploration distribution.

Template Parameters
InputItIterator type of the input scores. Must be an InputIterator.
OutputItIterator type of the pre-allocated pmf. Must be a RandomAccessIterator.
Parameters
lambdaLambda parameter of softmax.
scores_firstIterator pointing to beginning of the scores.
scores_lastIterator pointing to end of the scores.
pmf_firstIterator pointing to the pre-allocated beginning of the pmf to be generated by this function.
pmf_lastIterator pointing to the pre-allocated end of the pmf to be generated by this function.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename InputIt , typename OutputIt >
int exploration::generate_softmax ( float  lambda,
InputIt  scores_first,
InputIt  scores_last,
std::input_iterator_tag  ,
OutputIt  pmf_first,
OutputIt  pmf_last,
std::random_access_iterator_tag   
)
template<typename It >
int exploration::sample_after_normalizing ( uint64_t  seed,
It  pmf_first,
It  pmf_last,
uint32_t &  chosen_index 
)

Sample an index from the provided pmf. If the pmf is not normalized it will be updated in-place.

Template Parameters
InputItIterator type of the pmf. Must be a RandomAccessIterator.
Parameters
seedThe seed for the pseudo-random generator.
pmf_firstIterator pointing to the beginning of the pmf.
pmf_lastIterator pointing to the end of the pmf.
chosen_indexreturns the chosen index.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::sample_after_normalizing ( const char *  seed,
It  pmf_first,
It  pmf_last,
uint32_t &  chosen_index 
)

Sample an index from the provided pmf. If the pmf is not normalized it will be updated in-place.

Template Parameters
ItIterator type of the pmf. Must be a RandomAccessIterator.
Parameters
seedThe seed for the pseudo-random generator. Will be hashed using MURMUR hash.
pmf_firstIterator pointing to the beginning of the pmf.
pmf_lastIterator pointing to the end of the pmf.
chosen_indexreturns the chosen index.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::sample_after_normalizing ( uint64_t  seed,
It  pmf_first,
It  pmf_last,
uint32_t &  chosen_index,
std::input_iterator_tag   
)
template<typename It >
int exploration::sample_after_normalizing ( const char *  seed,
It  pmf_first,
It  pmf_last,
uint32_t &  chosen_index,
std::random_access_iterator_tag  pmf_category 
)
template<typename It >
int exploration::sample_pdf ( uint64_t *  p_seed,
It  pdf_first,
It  pdf_last,
float &  chosen_value,
float &  pdf_value 
)

Sample a continuous value from the provided pdf.

Template Parameters
ItIterator type of the pmf. Must be a RandomAccessIterator.
Parameters
p_seedThe seed for the pseudo-random generator. Will be hashed using MURMUR hash. The seed state will be advanced
pdf_firstIterator pointing to the beginning of the pdf.
pdf_lastIterator pointing to the end of the pdf.
chosen_valuereturns the sampled continuous value.
pdf_valuereturns the probablity density at the sampled location.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::sample_pdf ( uint64_t *  p_seed,
It  pdf_first,
It  pdf_last,
float &  chosen_value,
float &  pdf_value,
std::random_access_iterator_tag   
)

Sample a continuous value from the provided pdf.

Template Parameters
ItIterator type of the pmf. Must be a RandomAccessIterator.
Parameters
p_seedThe seed for the pseudo-random generator. Will be hashed using MURMUR hash. The seed state will be advanced
pdf_firstIterator pointing to the beginning of the pdf.
pdf_lastIterator pointing to the end of the pdf.
chosen_valuereturns the sampled continuous value.
pdf_valuereturns the probablity density at the sampled location.
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename It >
int exploration::sample_scores ( uint64_t *  p_seed,
It  scores_first,
It  scores_last,
uint32_t &  chosen_index,
std::random_access_iterator_tag   
)
template<typename ActionIt >
int exploration::swap_chosen ( ActionIt  action_first,
ActionIt  action_last,
uint32_t  chosen_index 
)

Swap the first value with the chosen index.

Template Parameters
ActionItIterator type of the action. Must be a forward_iterator.
Parameters
action_firstIterator pointing to the beginning of the pdf.
action_lastIterator pointing to the end of the pdf.
chosen_indexThe index value that should be swapped with the first element
Returns
int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.
template<typename ActionIt >
int exploration::swap_chosen ( ActionIt  action_first,
ActionIt  action_last,
std::forward_iterator_tag  ,
uint32_t  chosen_index 
)
template<typename ActionsIt >
int exploration::swap_chosen ( ActionsIt  action_first,
ActionsIt  action_last,
uint32_t  chosen_index 
)
float exploration::uniform_random_merand48 ( uint64_t  initial)
inline
float exploration::uniform_random_merand48_advance ( uint64_t &  initial)
inline

Variable Documentation

constexpr int exploration::BIAS = 127 << 23u
constexpr uint64_t exploration::CONSTANT_A = 0xeece66d5deece66dULL
constexpr uint64_t exploration::CONSTANT_C = 2147483647