20 namespace cb_explore_adf
39 float epsilon,
size_t bag_size,
bool greedify,
bool first_only, std::shared_ptr<rand_state> random_state);
47 template <
bool is_learn>
52 float epsilon,
size_t bag_size,
bool greedify,
bool first_only, std::shared_ptr<rand_state> random_state)
57 template <
bool is_learn>
62 uint32_t num_actions = (uint32_t)examples.
size();
70 for (uint32_t i = 0; i < num_actions; i++)
_scores.push_back(0.f);
78 if (is_learn && count > 0)
79 LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset, i);
81 LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset, i);
83 assert(preds.
size() == num_actions);
84 for (
auto e : preds)
_scores[e.action] += e.score;
89 for (
size_t i = 0; i < tied_actions; ++i)
_top_actions[preds[i].
action] += 1.
f / tied_actions;
94 for (uint32_t j = 1; j < count; j++)
95 LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset, i);
109 for (
size_t i = 0; i < num_actions; i++) preds[i] =
_action_probs[i];
117 bool cb_explore_adf_option =
false;
120 bool greedify =
false;
121 bool first_only =
false;
126 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
127 .
add(
make_option(
"epsilon", epsilon).keep().help(
"epsilon-greedy exploration"))
128 .
add(
make_option(
"bag", bag_size).keep().help(
"bagging-based exploration"))
129 .
add(
make_option(
"greedify", greedify).keep().help(
"always update first policy once in bagging"))
130 .
add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"));
133 if (!cb_explore_adf_option || !options.
was_supplied(
"bag"))
139 options.
insert(
"cb_adf",
"");
144 size_t problem_multiplier = bag_size;
150 auto data = scoped_calloc_or_throw<explore_type>(epsilon, bag_size, greedify, first_only, all.
get_random_state());
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
uint32_t weight_gen(std::shared_ptr< rand_state > &state)
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
void(* delete_prediction)(void *)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
void predict(LEARNER::multi_learner &base, multi_ex &examples)
label_type::label_type_t label_type
cb_explore_adf_bag(float epsilon, size_t bag_size, bool greedify, bool first_only, std::shared_ptr< rand_state > random_state)
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
void learn(LEARNER::multi_learner &base, multi_ex &examples)
score_iterator begin_scores(action_scores &a_s)
size_t fill_tied(v_array< ACTION_SCORE::action_score > &preds)
std::shared_ptr< rand_state > get_random_state()
score_iterator end_scores(action_scores &a_s)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
void push_back(const T &new_ele)
std::vector< float > _scores
virtual bool was_supplied(const std::string &key)=0
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
std::shared_ptr< rand_state > _random_state
virtual void insert(const std::string &key, const std::string &value)=0
std::vector< float > _top_actions
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
void sort_action_probs(v_array< ACTION_SCORE::action_score > &probs, const std::vector< float > &scores)
v_array< ACTION_SCORE::action_score > _action_probs
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)