Functions
LEARNER::base_learner *	warm_cb_setup (VW::config::options_i &options, vw &all)

Function Documentation

◆ warm_cb_setup()

LEARNER::base_learner* warm_cb_setup	(	VW::config::options_i &	options,
		vw &	all
	)

Definition at line 552 of file warm_cb.cc.

References ABS_CENTRAL, VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), LEARNER::as_multiline(), vw::delete_prediction, f, finish(), vw::get_random_state(), init_adf_data(), LEARNER::init_cost_sensitive_learner(), LEARNER::init_multiclass_learner(), VW::config::options_i::insert(), LEARNER::make_base(), VW::config::make_option(), vw::p, LEARNER::learner< T, E >::set_finish(), setup_base(), THROW, prediction_type::to_string(), UAR, uniform_hash(), and VW::config::options_i::was_supplied().

Referenced by parse_reductions().

 {
   uint32_t num_actions = 0;
   auto data = scoped_calloc_or_throw<warm_cb>();
   bool use_cs;
 
   option_group_definition new_options("Make Multiclass into Warm-starting Contextual Bandit");
 
   new_options
       .add(make_option("warm_cb", num_actions)
                .keep()
                .help("Convert multiclass on <k> classes into a contextual bandit problem"))
       .add(make_option("warm_cb_cs", use_cs)
                .help("consume cost-sensitive classification examples instead of multiclass"))
       .add(make_option("loss0", data->loss0).default_value(0.f).help("loss for correct label"))
       .add(make_option("loss1", data->loss1).default_value(1.f).help("loss for incorrect label"))
       .add(make_option("warm_start", data->ws_period)
                .default_value(0U)
                .help("number of training examples for warm start phase"))
       .add(make_option("epsilon", data->epsilon).keep().help("epsilon-greedy exploration"))
       .add(make_option("interaction", data->inter_period)
                .default_value(UINT32_MAX)
                .help("number of examples for the interactive contextual bandit learning phase"))
       .add(make_option("warm_start_update", data->upd_ws).help("indicator of warm start updates"))
       .add(make_option("interaction_update", data->upd_inter).help("indicator of interaction updates"))
       .add(make_option("corrupt_type_warm_start", data->cor_type_ws)
                .default_value(UAR)
                .help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: "
                      "replacing with overwriting label)"))
       .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws)
                .default_value(0.f)
                .help("probability of label corruption in the warm start phase"))
       .add(make_option("choices_lambda", data->choices_lambda)
                .default_value(1U)
                .help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between "
                      "the two sources)"))
       .add(make_option("lambda_scheme", data->lambda_scheme)
                .default_value(ABS_CENTRAL)
                .help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min "
                      "lambda=0, max lambda=1, 3: center lambda=epsilon/(1+epsilon), 4: center "
                      "lambda=epsilon/(1+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are "
                      "generated using a doubling scheme"))
       .add(make_option("overwrite_label", data->overwrite_label)
                .default_value(1U)
                .help("the label used by type 3 corruptions (overwriting)"))
       .add(make_option("sim_bandit", data->sim_bandit)
                .help("simulate contextual bandit updates on warm start examples"));
 
   options.add_and_parse(new_options);
 
   if (use_cs && (options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start")))
   {
     THROW("label corruption on cost-sensitive examples not currently supported");
   }
 
   if (!options.was_supplied("warm_cb"))
   {
     return nullptr;
   }
 
   data->app_seed = uniform_hash("vw", 2, 0);
   data->a_s = v_init<action_score>();
   data->all = &all;
   data->_random_state = all.get_random_state();
   data->use_cs = use_cs;
 
   init_adf_data(*data.get(), num_actions);
 
   options.insert("cb_min_cost", std::to_string(data->loss0));
   options.insert("cb_max_cost", std::to_string(data->loss1));
 
   if (options.was_supplied("baseline"))
   {
     std::stringstream ss;
     ss << std::max(std::abs(data->loss0), std::abs(data->loss1)) / (data->loss1 - data->loss0);
     options.insert("lr_multiplier", ss.str());
   }
 
   learner<warm_cb, example>* l;
 
   multi_learner* base = as_multiline(setup_base(options, all));
   // Note: the current version of warm start CB can only support epsilon-greedy exploration
   // We need to wait for the epsilon value to be passed from the base
   // cb_explore learner, if there is one
 
   if (!options.was_supplied("epsilon"))
   {
     std::cerr << "Warning: no epsilon (greedy parameter) specified; resetting to 0.05" << std::endl;
     data->epsilon = 0.05f;
   }
 
   if (use_cs)
     l = &init_cost_sensitive_learner(
         data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, all.p, data->choices_lambda);
   else
     l = &init_multiclass_learner(
         data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, all.p, data->choices_lambda);
 
   l->set_finish(finish);
   all.delete_prediction = nullptr;
 
   return make_base(*l);
 }

Functions

Function Documentation

◆ warm_cb_setup()