232 bool cb_explore_adf_option =
false;
234 const std::string mtr =
"mtr";
235 std::string type_string(mtr);
236 bool regcbopt =
false;
238 bool first_only =
false;
239 float min_cb_cost = 0.;
240 float max_cb_cost = 0.;
241 config::option_group_definition new_options(
"Contextual Bandit Exploration with Action Dependent Features");
243 .add(
make_option(
"cb_explore_adf", cb_explore_adf_option)
245 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
246 .add(
make_option(
"regcb", regcb).keep().help(
"RegCB-elim exploration"))
247 .add(
make_option(
"regcbopt", regcbopt).keep().help(
"RegCB optimistic exploration"))
248 .add(
make_option(
"mellowness", c0).keep().default_value(0.1
f).help(
"RegCB mellowness parameter c_0. Default 0.1"))
249 .add(
make_option(
"cb_min_cost", min_cb_cost).keep().default_value(0.
f).help(
"lower bound on cost"))
250 .add(
make_option(
"cb_max_cost", max_cb_cost).keep().default_value(1.
f).help(
"upper bound on cost"))
251 .add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"))
254 .help(
"contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
263 options.
insert(
"cb_adf",
"");
265 if (type_string != mtr)
267 all.
trace_message <<
"warning: bad cb_type, RegCB only supports mtr; resetting to mtr." << std::endl;
268 options.
replace(
"cb_type", mtr);
274 size_t problem_multiplier = 1;
280 using explore_type = cb_explore_adf_base<cb_explore_adf_regcb>;
281 auto data = scoped_calloc_or_throw<explore_type>(regcbopt, c0, first_only, min_cb_cost, max_cb_cost);
void(* delete_prediction)(void *)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
virtual void replace(const std::string &key, const std::string &value)=0
label_type::label_type_t label_type
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
virtual bool was_supplied(const std::string &key)=0
virtual void insert(const std::string &key, const std::string &value)=0
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)