18 namespace cb_explore_adf
50 template <
bool is_learn>
62 template <
bool is_learn>
77 LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset);
82 LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset);
85 const uint32_t num_actions = (uint32_t)preds.
size();
87 float additive_probability = 1.f / (float)
_cover_size;
88 const float min_prob = (std::min)(1.
f / num_actions, 1.
f / (
float)std::sqrt(
_counter * num_actions));
92 for (uint32_t i = 0; i < num_actions; i++)
_scores.push_back(preds[i].score);
97 for (
size_t i = 0; i < tied_actions; ++i)
101 _action_probs[preds[0].action].score += additive_probability;
103 float norm = min_prob * num_actions + (additive_probability - min_prob);
115 for (uint32_t j = 0; j < num_actions; j++)
121 GEN_CS::call_cs_ldf<true>(
125 GEN_CS::call_cs_ldf<false>(
128 for (uint32_t i = 0; i < num_actions; i++)
_scores[i] += preds[i].score;
132 const float add_prob = additive_probability / tied_actions;
133 for (
size_t i = 0; i < tied_actions; ++i)
144 uint32_t
action = preds[0].action;
146 norm += (std::max)(0.
f, additive_probability - (min_prob -
_action_probs[action].score));
148 norm += additive_probability;
157 for (
size_t i = 0; i < num_actions; i++) preds[i] =
_action_probs[i];
178 bool cb_explore_adf_option =
false;
179 std::string type_string =
"mtr";
180 size_t cover_size = 0;
183 bool first_only =
false;
189 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
190 .
add(
make_option(
"cover", cover_size).keep().help(
"Online cover based exploration"))
191 .
add(
make_option(
"psi", psi).keep().default_value(1.0
f).help(
"disagreement parameter for cover"))
192 .
add(
make_option(
"nounif", nounif).keep().help(
"do not explore uniformly on zero-probability actions in cover"))
193 .
add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"))
196 .help(
"contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
199 if (!cb_explore_adf_option || !options.
was_supplied(
"cover"))
205 options.
insert(
"cb_type", type_string);
212 options.
insert(
"cb_adf",
"");
219 if (type_string.compare(
"dr") == 0)
221 else if (type_string.compare(
"ips") == 0)
223 else if (type_string.compare(
"mtr") == 0)
225 all.
trace_message <<
"warning: currently, mtr is only used for the first policy in cover, other policies use dr" 231 all.
trace_message <<
"warning: cb_type must be in {'ips','dr','mtr'}; resetting to mtr." << std::endl;
232 options.
replace(
"cb_type",
"mtr");
237 size_t problem_multiplier = cover_size + 1;
244 auto data = scoped_calloc_or_throw<explore_type>(
void predict(LEARNER::multi_learner &base, multi_ex &examples)
v_array< COST_SENSITIVE::label > _prepped_cs_labels
LEARNER::base_learner * cost_sensitive
void learn(LEARNER::multi_learner &base, multi_ex &examples)
void(* delete_prediction)(void *)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
COST_SENSITIVE::label _cs_labels
virtual void replace(const std::string &key, const std::string &value)=0
label_type::label_type_t label_type
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
COST_SENSITIVE::label _cs_labels_2
score_iterator begin_scores(action_scores &a_s)
CB::cb_class get_observed_cost(multi_ex &examples)
size_t fill_tied(v_array< ACTION_SCORE::action_score > &preds)
score_iterator end_scores(action_scores &a_s)
v_array< ACTION_SCORE::action_score > _action_probs
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
void push_back(const T &new_ele)
virtual bool was_supplied(const std::string &key)=0
LEARNER::multi_learner * _cs_ldf_learner
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
std::vector< float > _scores
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
void gen_cs_example_ips(multi_ex &examples, COST_SENSITIVE::label &cs_labels, float clip_p)
GEN_CS::cb_to_cs_adf _gen_cs
LEARNER::single_learner * scorer
virtual void insert(const std::string &key, const std::string &value)=0
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup(config::options_i &options, vw &all)
v_array< CB::label > _cb_labels
LEARNER::single_learner * scorer
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
cb_explore_adf_cover(size_t cover_size, float psi, bool nounif, bool first_only, LEARNER::multi_learner *cs_ldf_learner, LEARNER::single_learner *scorer, size_t cb_type)
void sort_action_probs(v_array< ACTION_SCORE::action_score > &probs, const std::vector< float > &scores)
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)
COST_SENSITIVE::label pred_scores