15 namespace cb_explore_adf
34 template <
bool is_learn>
40 template <
bool is_learn>
43 LEARNER::multiline_learn_or_predict<is_learn>(base, examples, examples[0]->ft_offset);
55 bool cb_explore_adf_option =
false;
63 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
64 .
add(
make_option(
"epsilon", epsilon).keep().help(
"epsilon-greedy exploration"))
65 .
add(
make_option(
"softmax", softmax).keep().help(
"softmax exploration"))
66 .
add(
make_option(
"lambda", lambda).keep().default_value(1.
f).help(
"parameter for softmax"));
69 if (!cb_explore_adf_option || !softmax)
78 options.
insert(
"cb_adf",
"");
84 size_t problem_multiplier = 1;
91 auto data = scoped_calloc_or_throw<explore_type>(epsilon, lambda);
void(* delete_prediction)(void *)
cb_explore_adf_softmax(float epsilon, float lambda)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
label_type::label_type_t label_type
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
score_iterator begin_scores(action_scores &a_s)
score_iterator end_scores(action_scores &a_s)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
virtual bool was_supplied(const std::string &key)=0
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
virtual void insert(const std::string &key, const std::string &value)=0
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
~cb_explore_adf_softmax()=default
void learn(LEARNER::multi_learner &base, multi_ex &examples)
typed_option< T > make_option(std::string name, T &location)
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
void predict(LEARNER::multi_learner &base, multi_ex &examples)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)