Classes
struct	cb_explore_adf_cover

Functions
LEARNER::base_learner *	setup (config::options_i &options, vw &all)

Function Documentation

◆ setup()

LEARNER::base_learner * VW::cb_explore_adf::cover::setup	(	config::options_i &	options,
		vw &	all
	)

Definition at line 174 of file cb_explore_adf_cover.cc.

References prediction_type::action_probs, VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), LEARNER::as_multiline(), label_type::cb, CB::cb_label, CB_TYPE_DR, CB_TYPE_IPS, CB_TYPE_MTR, vw::cost_sensitive, ACTION_SCORE::delete_action_scores(), vw::delete_prediction, f, finish_multiline_example(), LEARNER::init_learner(), VW::config::options_i::insert(), vw::label_type, learn(), parser::lp, LEARNER::make_base(), VW::config::make_option(), vw::p, predict(), VW::config::options_i::replace(), vw::scorer, setup_base(), vw::trace_message, and VW::config::options_i::was_supplied().

Referenced by parse_reductions().

 {
   using config::make_option;
 
   bool cb_explore_adf_option = false;
   std::string type_string = "mtr";
   size_t cover_size = 0;
   float psi = 0.;
   bool nounif = false;
   bool first_only = false;
 
   config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
   new_options
       .add(make_option("cb_explore_adf", cb_explore_adf_option)
                .keep()
                .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
       .add(make_option("cover", cover_size).keep().help("Online cover based exploration"))
       .add(make_option("psi", psi).keep().default_value(1.0f).help("disagreement parameter for cover"))
       .add(make_option("nounif", nounif).keep().help("do not explore uniformly on zero-probability actions in cover"))
       .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"))
       .add(make_option("cb_type", type_string)
                .keep()
                .help("contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
   options.add_and_parse(new_options);
 
   if (!cb_explore_adf_option || !options.was_supplied("cover"))
     return nullptr;
 
   // Ensure serialization of cb_type in all cases.
   if (!options.was_supplied("cb_type"))
   {
     options.insert("cb_type", type_string);
     options.add_and_parse(new_options);
   }
 
   // Ensure serialization of cb_adf in all cases.
   if (!options.was_supplied("cb_adf"))
   {
     options.insert("cb_adf", "");
   }
 
   all.delete_prediction = ACTION_SCORE::delete_action_scores;
 
   // Set cb_type
   size_t cb_type_enum;
   if (type_string.compare("dr") == 0)
     cb_type_enum = CB_TYPE_DR;
   else if (type_string.compare("ips") == 0)
     cb_type_enum = CB_TYPE_IPS;
   else if (type_string.compare("mtr") == 0)
   {
     all.trace_message << "warning: currently, mtr is only used for the first policy in cover, other policies use dr"
                       << std::endl;
     cb_type_enum = CB_TYPE_MTR;
   }
   else
   {
     all.trace_message << "warning: cb_type must be in {'ips','dr','mtr'}; resetting to mtr." << std::endl;
     options.replace("cb_type", "mtr");
     cb_type_enum = CB_TYPE_MTR;
   }
 
   // Set explore_type
   size_t problem_multiplier = cover_size + 1;
 
   LEARNER::multi_learner* base = LEARNER::as_multiline(setup_base(options, all));
   all.p->lp = CB::cb_label;
   all.label_type = label_type::cb;
 
   using explore_type = cb_explore_adf_base<cb_explore_adf_cover>;
   auto data = scoped_calloc_or_throw<explore_type>(
       cover_size, psi, nounif, first_only, as_multiline(all.cost_sensitive), all.scorer, cb_type_enum);
 
   LEARNER::learner<explore_type, multi_ex>& l = init_learner(
       data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type::action_probs);
 
   l.set_finish_example(explore_type::finish_multiline_example);
   return make_base(l);
 }

Classes

Functions

Function Documentation

◆ setup()