Classes
union	int_float

Functions
template<typename It >
int	generate_epsilon_greedy (float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
	Generates epsilon-greedy style exploration distribution. More...

template<typename InputIt , typename OutputIt >
int	generate_softmax (float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
	Generates softmax style exploration distribution. More...

template<typename InputIt , typename OutputIt >
int	generate_bag (InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
	Generates an exploration distribution according to votes on actions. More...

template<typename It >
int	enforce_minimum_probability (float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
	Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions. More...

template<typename It >
int	sample_after_normalizing (uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
	Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place. More...

template<typename It >
int	sample_after_normalizing (const char *seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
	Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place. More...

template<typename ActionIt >
int	swap_chosen (ActionIt action_first, ActionIt action_last, uint32_t chosen_index)
	Swap the first value with the chosen index. More...

float	uniform_random_merand48 (uint64_t initial)

template<typename It >
int	generate_epsilon_greedy (float epsilon, uint32_t top_action, It pdf_first, It pdf_last, std::random_access_iterator_tag)

template<typename InputIt , typename OutputIt >
int	generate_softmax (float lambda, InputIt scores_first, InputIt scores_last, std::input_iterator_tag, OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag)

template<typename InputIt , typename OutputIt >
int	generate_bag (InputIt top_actions_first, InputIt top_actions_last, std::input_iterator_tag, OutputIt pdf_first, OutputIt pdf_last, std::random_access_iterator_tag)

template<typename It >
int	enforce_minimum_probability (float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last, std::random_access_iterator_tag)

template<typename It >
int	sample_after_normalizing (uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index, std::input_iterator_tag)

template<typename It >
int	sample_after_normalizing (const char *seed, It pdf_first, It pdf_last, uint32_t &chosen_index, std::random_access_iterator_tag pdf_category)

template<typename ActionIt >
int	swap_chosen (ActionIt action_first, ActionIt action_last, std::forward_iterator_tag, uint32_t chosen_index)

template<typename ActionsIt >
int	swap_chosen (ActionsIt action_first, ActionsIt action_last, uint32_t chosen_index)

Variables
const uint64_t	a = 0xeece66d5deece66dULL

const uint64_t	c = 2147483647

const int	bias = 127 << 23u

Function Documentation

◆ enforce_minimum_probability() [1/2]

template<typename It >

int exploration::enforce_minimum_probability	(	float	minimum_uniform,
		bool	update_zero_elements,
		It	pdf_first,
		It	pdf_last
	)

Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.

Template Parameters

It	Iterator type of the pdf. Must be a RandomAccessIterator.

Parameters

minimum_uniform	The minimum amount of uniform distribution to impose on the pdf.
update_zero_elements	If true elements with zero probability are updated, otherwise those actions will be unchanged.
pdf_first	Iterator pointing to the pre-allocated beginning of the pdf to be generated by this function.
pdf_last	Iterator pointing to the pre-allocated end of the pdf to be generated by this function.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 226 of file explore_internal.h.

Referenced by CB_EXPLORE::get_cover_probabilities(), vw_slim::vw_predict< W >::predict(), VW::cb_explore_adf::softmax::cb_explore_adf_softmax::predict_or_learn_impl(), VW::cb_explore_adf::first::cb_explore_adf_first::predict_or_learn_impl(), VW::cb_explore_adf::bag::cb_explore_adf_bag::predict_or_learn_impl(), VW::cb_explore_adf::cover::cb_explore_adf_cover::predict_or_learn_impl(), VW::cb_explore_adf::regcb::cb_explore_adf_regcb::predict_or_learn_impl(), and TEST().

   {
       typedef typename std::iterator_traits<It>::iterator_category pdf_category;
 
       return enforce_minimum_probability(minimum_uniform, update_zero_elements, pdf_first, pdf_last, pdf_category());
   }

◆ enforce_minimum_probability() [2/2]

template<typename It >

int exploration::enforce_minimum_probability	(	float	minimum_uniform,
		bool	update_zero_elements,
		It	pdf_first,
		It	pdf_last,
		std::random_access_iterator_tag
	)

Definition at line 158 of file explore_internal.h.

References E_EXPLORATION_BAD_RANGE, prediction_type::prob, and S_EXPLORATION_OK.

   {
     // iterators don't support <= in general
     if (pdf_first == pdf_last || pdf_last < pdf_first)
       return E_EXPLORATION_BAD_RANGE;
 
       size_t num_actions = pdf_last - pdf_first;
 
     if (minimum_uniform > 0.999) // uniform exploration
     {
       size_t support_size = num_actions;
       if (!update_zero_elements)
       {
         for (It d = pdf_first; d != pdf_last; ++d)
           if (*d == 0)
             support_size--;
       }
 
         for (It d = pdf_first; d != pdf_last; ++d)
         if (update_zero_elements || *d > 0)
           *d = 1.f / support_size;
 
       return S_EXPLORATION_OK;
     }
 
     minimum_uniform /= num_actions;
     float touched_mass = 0.;
     float untouched_mass = 0.;
     uint16_t num_actions_touched = 0;
 
       for (It d = pdf_first; d != pdf_last; ++d)
     {
       auto& prob = *d;
       if ((prob > 0 || (prob == 0 && update_zero_elements)) && prob <= minimum_uniform)
       {
         touched_mass += minimum_uniform;
         prob = minimum_uniform;
         ++num_actions_touched;
       }
       else
         untouched_mass += prob;
     }
 
     if (touched_mass > 0.)
     {
       if (touched_mass > 0.999)
       {
         minimum_uniform = (1.f - untouched_mass) / (float)num_actions_touched;
         for (It d = pdf_first; d != pdf_last; ++d)
         {
           auto& prob = *d;
           if ((prob > 0 || (prob == 0 && update_zero_elements)) && prob <= minimum_uniform)
             prob = minimum_uniform;
         }
       }
       else
       {
         float ratio = (1.f - touched_mass) / untouched_mass;
         for (It d = pdf_first; d != pdf_last; ++d)
           if (*d > minimum_uniform)
             *d *= ratio;
       }
     }
 
     return S_EXPLORATION_OK;
   }

◆ generate_bag() [1/2]

template<typename InputIt , typename OutputIt >

int exploration::generate_bag	(	InputIt	top_actions_first,
		InputIt	top_actions_last,
		OutputIt	pdf_first,
		OutputIt	pdf_last
	)

Generates an exploration distribution according to votes on actions.

Template Parameters

InputIt	Iterator type of the input actions. Must be an InputIterator.
OutputIt	Iterator type of the pre-allocated pdf. Must be a RandomAccessIterator.

Parameters

top_actions_first	Iterator pointing to the beginning of the top actions.
top_actions_last	Iterator pointing to the end of the top actions.
pdf_first	Iterator pointing to the pre-allocated beginning of the pdf to be generated by this function.
pdf_last	Iterator pointing to the pre-allocated end of the pdf to be generated by this function.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 149 of file explore_internal.h.

Referenced by vw_slim::vw_predict< W >::predict(), VW::cb_explore_adf::bag::cb_explore_adf_bag::predict_or_learn_impl(), and TEST().

   {
     typedef typename std::iterator_traits<InputIt>::iterator_category top_actions_category;
     typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
 
     return generate_bag(top_actions_first, top_actions_last, top_actions_category(), pdf_first, pdf_last, pdf_category());
   }

◆ generate_bag() [2/2]

template<typename InputIt , typename OutputIt >

int exploration::generate_bag	(	InputIt	top_actions_first,
		InputIt	top_actions_last,
		std::input_iterator_tag	,
		OutputIt	pdf_first,
		OutputIt	pdf_last,
		std::random_access_iterator_tag
	)

Definition at line 122 of file explore_internal.h.

References accumulate(), E_EXPLORATION_BAD_RANGE, and S_EXPLORATION_OK.

   {
     // iterators don't support <= in general
     if (pdf_first == pdf_last || pdf_last < pdf_first)
       return E_EXPLORATION_BAD_RANGE;
 
     float num_models = (float)std::accumulate(top_actions_first, top_actions_last, 0.);
     if (num_models <= 1e-6)
     {
       // based on above checks we have at least 1 element in pdf
       *pdf_first = 1;
       for (OutputIt d = pdf_first + 1; d != pdf_last; ++d)
         *d = 0;
 
       return S_EXPLORATION_OK;
     }
 
     // divide late to improve numeric stability
     InputIt t_a = top_actions_first;
     float normalizer = 1.f / num_models;
     for (OutputIt d = pdf_first; d != pdf_last && t_a != top_actions_last; ++d, ++t_a)
       *d = *t_a * normalizer;
 
     return S_EXPLORATION_OK;
   }

◆ generate_epsilon_greedy() [1/2]

template<typename It >

int exploration::generate_epsilon_greedy	(	float	epsilon,
		uint32_t	top_action,
		It	pdf_first,
		It	pdf_last
	)

Generates epsilon-greedy style exploration distribution.

Template Parameters

It	Iterator type of the pre-allocated pdf. Must be a RandomAccessIterator.

Parameters

epsilon	Minimum probability used to explore among options. Each action is explored with at least epsilon/num_actions.
top_action	Index of the exploit actions. This action will be get probability mass of 1-epsilon + (epsilon/num_actions).
pdf_first	Iterator pointing to the pre-allocated beginning of the pdf to be generated by this function.
pdf_last	Iterator pointing to the pre-allocated end of the pdf to be generated by this function.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 61 of file explore_internal.h.

Referenced by vw_slim::vw_predict< W >::predict(), CB_EXPLORE::predict_or_learn_greedy(), and TEST().

   {
     typedef typename std::iterator_traits<It>::iterator_category pdf_category;
     return generate_epsilon_greedy(epsilon, top_action, pdf_first, pdf_last, pdf_category());
   }

◆ generate_epsilon_greedy() [2/2]

template<typename It >

int exploration::generate_epsilon_greedy	(	float	epsilon,
		uint32_t	top_action,
		It	pdf_first,
		It	pdf_last,
		std::random_access_iterator_tag
	)

Definition at line 38 of file explore_internal.h.

References E_EXPLORATION_BAD_RANGE, exploration::int_float::f, prediction_type::prob, and S_EXPLORATION_OK.

   {
     if (pdf_last < pdf_first)
       return E_EXPLORATION_BAD_RANGE;
 
     size_t num_actions = pdf_last - pdf_first;
     if (num_actions == 0)
       return E_EXPLORATION_BAD_RANGE;
 
     if (top_action >= num_actions)
       top_action = (uint32_t)num_actions - 1;
 
     float prob = epsilon / (float)num_actions;
 
     for (It d = pdf_first; d != pdf_last; ++d)
       *d = prob;
 
     *(pdf_first + top_action) += 1.f - epsilon;
 
     return S_EXPLORATION_OK;
   }

◆ generate_softmax() [1/2]

template<typename InputIt , typename OutputIt >

int exploration::generate_softmax	(	float	lambda,
		InputIt	scores_first,
		InputIt	scores_last,
		OutputIt	pdf_first,
		OutputIt	pdf_last
	)

Generates softmax style exploration distribution.

Template Parameters

InputIt	Iterator type of the input scores. Must be an InputIterator.
OutputIt	Iterator type of the pre-allocated pdf. Must be a RandomAccessIterator.

Parameters

lambda	Lambda parameter of softmax.
scores_first	Iterator pointing to beginning of the scores.
scores_last	Iterator pointing to end of the scores.
pdf_first	Iterator pointing to the pre-allocated beginning of the pdf to be generated by this function.
pdf_last	Iterator pointing to the pre-allocated end of the pdf to be generated by this function.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 113 of file explore_internal.h.

Referenced by CB_ADF::cb_adf::learn_SM(), vw_slim::vw_predict< W >::predict(), VW::cb_explore_adf::softmax::cb_explore_adf_softmax::predict_or_learn_impl(), and TEST().

   {
     typedef typename std::iterator_traits<InputIt>::iterator_category scores_category;
     typedef typename std::iterator_traits<OutputIt>::iterator_category pdf_category;
 
     return generate_softmax(lambda, scores_first, scores_last, scores_category(), pdf_first, pdf_last, pdf_category());
   }

◆ generate_softmax() [2/2]

template<typename InputIt , typename OutputIt >

int exploration::generate_softmax	(	float	lambda,
		InputIt	scores_first,
		InputIt	scores_last,
		std::input_iterator_tag	,
		OutputIt	pdf_first,
		OutputIt	pdf_last,
		std::random_access_iterator_tag
	)

Definition at line 68 of file explore_internal.h.

References E_EXPLORATION_BAD_RANGE, prediction_type::prob, and S_EXPLORATION_OK.

   {
     if (scores_last < scores_first || pdf_last < pdf_first)
       return E_EXPLORATION_BAD_RANGE;
 
     size_t num_actions_scores = scores_last - scores_first;
     size_t num_actions_pdf = pdf_last - pdf_first;
 
     if (num_actions_scores != num_actions_pdf)
     {
       // fallback to the minimum
       scores_last = scores_first + std::min(num_actions_scores, num_actions_pdf);
       OutputIt pdf_new_last = pdf_first + std::min(num_actions_scores, num_actions_pdf);
 
       // zero out pdf
       for (OutputIt d = pdf_new_last; d != pdf_last; ++d)
         *d = 0;
 
       pdf_last = pdf_new_last;
     }
 
     if (pdf_last - pdf_first == 0)
       return E_EXPLORATION_BAD_RANGE;
 
     float norm = 0.;
     float max_score = lambda > 0 ? *std::max_element(scores_first, scores_last)
                                  : *std::min_element(scores_first, scores_last);
 
     InputIt s = scores_first;
     for (OutputIt d = pdf_first; d != pdf_last && s != scores_last; ++d, ++s)
     {
       float prob = exp(lambda*(*s - max_score));
       norm += prob;
 
       *d = prob;
     }
 
     // normalize
     for (OutputIt d = pdf_first; d != pdf_last; ++d)
       *d /= norm;
 
     return S_EXPLORATION_OK;
   }

◆ sample_after_normalizing() [1/4]

template<typename It >

int exploration::sample_after_normalizing	(	uint64_t	seed,
		It	pdf_first,
		It	pdf_last,
		uint32_t &	chosen_index
	)

Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place.

Template Parameters

InputIt Iterator type of the pdf. Must be a RandomAccessIterator.

Parameters

seed	The seed for the pseudo-random generator.
pdf_first	Iterator pointing to the beginning of the pdf.
pdf_last	Iterator pointing to the end of the pdf.
chosen_index	returns the chosen index.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 286 of file explore_internal.h.

Referenced by do_actual_learning_ldf(), VW::cb_sample_data::learn_or_predict(), vw_slim::vw_predict< W >::predict(), predict_bandit_adf(), predict_or_learn(), predict_or_learn_adf(), sample_after_normalizing(), and TEST().

   {
       typedef typename std::iterator_traits<It>::iterator_category pdf_category;
     return sample_after_normalizing(seed, pdf_first, pdf_last, chosen_index, pdf_category());
   }

◆ sample_after_normalizing() [2/4]

template<typename It >

int exploration::sample_after_normalizing	(	const char *	seed,
		It	pdf_first,
		It	pdf_last,
		uint32_t &	chosen_index
	)

Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place.

Template Parameters

It	Iterator type of the pdf. Must be a RandomAccessIterator.

Parameters

seed	The seed for the pseudo-random generator. Will be hashed using MURMUR hash.
pdf_first	Iterator pointing to the beginning of the pdf.
pdf_last	Iterator pointing to the end of the pdf.
chosen_index	returns the chosen index.

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Definition at line 304 of file explore_internal.h.

References sample_after_normalizing().

   {
       typedef typename std::iterator_traits<It>::iterator_category pdf_category;
     return sample_after_normalizing(seed, pdf_first, pdf_last, chosen_index, pdf_category());
   }

◆ sample_after_normalizing() [3/4]

template<typename It >

int exploration::sample_after_normalizing	(	uint64_t	seed,
		It	pdf_first,
		It	pdf_last,
		uint32_t &	chosen_index,
		std::input_iterator_tag
	)

Definition at line 236 of file explore_internal.h.

References E_EXPLORATION_BAD_RANGE, exploration::int_float::i, S_EXPLORATION_OK, and uniform_random_merand48().

   {
     if (pdf_first == pdf_last || pdf_last < pdf_first)
       return E_EXPLORATION_BAD_RANGE;
     // Create a discrete_distribution based on the returned weights. This class handles the
     // case where the sum of the weights is < or > 1, by normalizing agains the sum.
     float total = 0.f;
     for (It pdf = pdf_first; pdf != pdf_last; ++pdf)
     {
       if (*pdf < 0)
         *pdf = 0;
 
       total += *pdf;
     }
 
     // assume the first is the best
     if (total == 0)
     {
       chosen_index = 0;
       *pdf_first = 1;
       return S_EXPLORATION_OK;
     }
 
     float draw = total * uniform_random_merand48(seed);
     if (draw > total) //make very sure that draw can not be greater than total.
       draw = total;
 
     bool index_found = false; //found chosen action
     float sum = 0.f;
     uint32_t i = 0;
     for (It pdf = pdf_first; pdf != pdf_last; ++pdf, ++i)
     {
       sum += *pdf;
       if (!index_found && sum > draw)
       {
         chosen_index = i;
         index_found = true;
       }
       *pdf /= total;
     }
 
     if(!index_found)
       chosen_index = i - 1;
 
     return S_EXPLORATION_OK;
   }

◆ sample_after_normalizing() [4/4]

template<typename It >

int exploration::sample_after_normalizing	(	const char *	seed,
		It	pdf_first,
		It	pdf_last,
		uint32_t &	chosen_index,
		std::random_access_iterator_tag	pdf_category
	)

Definition at line 295 of file explore_internal.h.

References sample_after_normalizing(), and uniform_hash().

   {
     uint64_t seed_hash = uniform_hash(seed, strlen(seed), 0);
     return sample_after_normalizing(seed_hash, pdf_first, pdf_last, chosen_index, pdf_category);
   }

◆ swap_chosen() [1/3]

template<typename ActionIt >

int exploration::swap_chosen	(	ActionIt	action_first,
		ActionIt	action_last,
		uint32_t	chosen_index
	)

Swap the first value with the chosen index.

Template Parameters

ActionIt Iterator type of the action. Must be a forward_iterator.

Parameters

action_first	Iterator pointing to the beginning of the pdf.
action_last	Iterator pointing to the end of the pdf.
chosen_index	The index value that should be swapped with the first element

Returns: int returns 0 on success, otherwise an error code as defined by E_EXPLORATION_*.

Referenced by VW::cb_sample_data::learn_or_predict(), and swap_chosen().

◆ swap_chosen() [2/3]

template<typename ActionIt >

int exploration::swap_chosen	(	ActionIt	action_first,
		ActionIt	action_last,
		std::forward_iterator_tag	,
		uint32_t	chosen_index
	)

Definition at line 311 of file explore_internal.h.

References E_EXPLORATION_BAD_RANGE, and S_EXPLORATION_OK.

   {
     if ( action_last < action_first )
       return E_EXPLORATION_BAD_RANGE;
 
     size_t action_size = action_last - action_first;
 
     if ( action_size == 0 )
       return E_EXPLORATION_BAD_RANGE;
 
     if ( chosen_index >= action_size )
       return E_EXPLORATION_BAD_RANGE;
 
     // swap top element with chosen one
     if ( chosen_index != 0 ) {
       std::iter_swap(action_first, action_first + chosen_index);
     }
 
     return S_EXPLORATION_OK;
   }

◆ swap_chosen() [3/3]

template<typename ActionsIt >

int exploration::swap_chosen	(	ActionsIt	action_first,
		ActionsIt	action_last,
		uint32_t	chosen_index
	)

Definition at line 333 of file explore_internal.h.

References swap_chosen().

                                                                                         {
     typedef typename std::iterator_traits<ActionsIt>::iterator_category actionit_category;
     return swap_chosen(action_first, action_last, actionit_category(), chosen_index);
   }

◆ uniform_random_merand48()

float exploration::uniform_random_merand48 ( uint64_t initial )

inline

Definition at line 29 of file explore_internal.h.

References bias, c, exploration::int_float::f, and exploration::int_float::i.

Referenced by sample_after_normalizing(), and TYPED_TEST_P().

   {
     initial = a * initial + c;
     int_float temp;
     temp.i = ((initial >> 25) & 0x7FFFFF) | bias;
     return temp.f - 1;
   }

Variable Documentation

◆ a

const uint64_t exploration::a = 0xeece66d5deece66dULL

Definition at line 17 of file explore_internal.h.

◆ bias

const int exploration::bias = 127 << 23u

Definition at line 20 of file explore_internal.h.

Referenced by uniform_random_merand48().

◆ c

const uint64_t exploration::c = 2147483647

Definition at line 18 of file explore_internal.h.

Referenced by uniform_random_merand48().

Classes

Functions

Variables

Function Documentation

◆ enforce_minimum_probability() [1/2]

◆ enforce_minimum_probability() [2/2]

◆ generate_bag() [1/2]

◆ generate_bag() [2/2]

◆ generate_epsilon_greedy() [1/2]

◆ generate_epsilon_greedy() [2/2]

◆ generate_softmax() [1/2]

◆ generate_softmax() [2/2]

◆ sample_after_normalizing() [1/4]

◆ sample_after_normalizing() [2/4]

◆ sample_after_normalizing() [3/4]

◆ sample_after_normalizing() [4/4]

◆ swap_chosen() [1/3]

◆ swap_chosen() [2/3]

◆ swap_chosen() [3/3]

◆ uniform_random_merand48()

Variable Documentation

◆ a

◆ bias

◆ c