178 bool cb_explore_adf_option =
false;
179 std::string type_string =
"mtr";
180 size_t cover_size = 0;
183 bool first_only =
false;
185 config::option_group_definition new_options(
"Contextual Bandit Exploration with Action Dependent Features");
187 .add(
make_option(
"cb_explore_adf", cb_explore_adf_option)
189 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
190 .add(
make_option(
"cover", cover_size).keep().help(
"Online cover based exploration"))
191 .add(
make_option(
"psi", psi).keep().default_value(1.0
f).help(
"disagreement parameter for cover"))
192 .add(
make_option(
"nounif", nounif).keep().help(
"do not explore uniformly on zero-probability actions in cover"))
193 .add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"))
196 .help(
"contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
197 options.add_and_parse(new_options);
199 if (!cb_explore_adf_option || !options.was_supplied(
"cover"))
203 if (!options.was_supplied(
"cb_type"))
205 options.insert(
"cb_type", type_string);
206 options.add_and_parse(new_options);
210 if (!options.was_supplied(
"cb_adf"))
212 options.insert(
"cb_adf",
"");
219 if (type_string.compare(
"dr") == 0)
221 else if (type_string.compare(
"ips") == 0)
223 else if (type_string.compare(
"mtr") == 0)
225 all.
trace_message <<
"warning: currently, mtr is only used for the first policy in cover, other policies use dr" 231 all.
trace_message <<
"warning: cb_type must be in {'ips','dr','mtr'}; resetting to mtr." << std::endl;
232 options.replace(
"cb_type",
"mtr");
237 size_t problem_multiplier = cover_size + 1;
243 using explore_type = cb_explore_adf_base<cb_explore_adf_cover>;
244 auto data = scoped_calloc_or_throw<explore_type>(
LEARNER::base_learner * cost_sensitive
void(* delete_prediction)(void *)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
label_type::label_type_t label_type
base_learner * make_base(learner< T, E > &base)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
LEARNER::single_learner * scorer
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)