Classes
struct	cb_explore_adf_bag

Functions
LEARNER::base_learner *	setup (VW::config::options_i &options, vw &all)

Function Documentation

◆ setup()

LEARNER::base_learner * VW::cb_explore_adf::bag::setup	(	VW::config::options_i &	options,
		vw &	all
	)

Definition at line 114 of file cb_explore_adf_bag.cc.

References prediction_type::action_probs, VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), LEARNER::as_multiline(), label_type::cb, CB::cb_label, ACTION_SCORE::delete_action_scores(), vw::delete_prediction, finish_multiline_example(), vw::get_random_state(), LEARNER::init_learner(), VW::config::options_i::insert(), vw::label_type, learn(), parser::lp, LEARNER::make_base(), VW::config::make_option(), vw::p, predict(), setup_base(), and VW::config::options_i::was_supplied().

Referenced by parse_reductions().

 {
   using config::make_option;
   bool cb_explore_adf_option = false;
   float epsilon = 0.;
   size_t bag_size = 0;
   bool greedify = false;
   bool first_only = false;
   config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
   new_options
       .add(make_option("cb_explore_adf", cb_explore_adf_option)
                .keep()
                .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
       .add(make_option("epsilon", epsilon).keep().help("epsilon-greedy exploration"))
       .add(make_option("bag", bag_size).keep().help("bagging-based exploration"))
       .add(make_option("greedify", greedify).keep().help("always update first policy once in bagging"))
       .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"));
   options.add_and_parse(new_options);
 
   if (!cb_explore_adf_option || !options.was_supplied("bag"))
     return nullptr;
 
   // Ensure serialization of cb_adf in all cases.
   if (!options.was_supplied("cb_adf"))
   {
     options.insert("cb_adf", "");
   }
 
   all.delete_prediction = ACTION_SCORE::delete_action_scores;
 
   size_t problem_multiplier = bag_size;
   LEARNER::multi_learner* base = as_multiline(setup_base(options, all));
   all.p->lp = CB::cb_label;
   all.label_type = label_type::cb;
 
   using explore_type = cb_explore_adf_base<cb_explore_adf_bag>;
   auto data = scoped_calloc_or_throw<explore_type>(epsilon, bag_size, greedify, first_only, all.get_random_state());
 
   LEARNER::learner<explore_type, multi_ex>& l = LEARNER::init_learner(
       data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type::action_probs);
 
   l.set_finish_example(explore_type::finish_multiline_example);
   return make_base(l);
 }

Classes

Functions

Function Documentation

◆ setup()