cpp/8.8.1/cb__explore__adf__regcb_8cc_source.html

 #include "cb_explore_adf_regcb.h"
 #include "reductions.h"
 #include "cb_adf.h"
 #include "rand48.h"
 #include "bs.h"
 #include "gen_cs_example.h"
 #include "cb_explore.h"
 #include "explore.h"
 #include "action_score.h"
 #include "cb.h"
 #include <vector>
 #include <algorithm>
 #include <cmath>

 // All exploration algorithms return a vector of id, probability tuples, sorted in order of scores. The probabilities
 // are the probability with which each action should be replaced to the top of the list.

 #define B_SEARCH_MAX_ITER 20

 namespace VW
 {
 namespace cb_explore_adf
 {
 namespace regcb
 {
 struct cb_explore_adf_regcb
 {
  private:
   size_t _counter;
   bool _regcbopt;  // use optimistic variant of RegCB
   float _c0;       // mellowness parameter for RegCB
   bool _first_only;
   float _min_cb_cost;
   float _max_cb_cost;

   std::vector<float> _min_costs;
   std::vector<float> _max_costs;

   // for backing up cb example data when computing sensitivities
   std::vector<ACTION_SCORE::action_scores> _ex_as;
   std::vector<v_array<CB::cb_class>> _ex_costs;

  public:
   cb_explore_adf_regcb(bool regcbopt, float c0, bool first_only, float min_cb_cost, float max_cb_cost);
   ~cb_explore_adf_regcb() = default;

   // Should be called through cb_explore_adf_base for pre/post-processing
   void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<false>(base, examples); }
   void learn(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<true>(base, examples); }

  private:
   template <bool is_learn>
   void predict_or_learn_impl(LEARNER::multi_learner& base, multi_ex& examples);

   void get_cost_ranges(float delta, LEARNER::multi_learner& base, multi_ex& examples, bool min_only);
   float binary_search(float fhat, float delta, float sens, float tol = 1e-6);
 };

 cb_explore_adf_regcb::cb_explore_adf_regcb(
     bool regcbopt, float c0, bool first_only, float min_cb_cost, float max_cb_cost)
     : _regcbopt(regcbopt), _c0(c0), _first_only(first_only), _min_cb_cost(min_cb_cost), _max_cb_cost(max_cb_cost)
 {
 }

 // TODO: same as cs_active.cc, move to shared place
 float cb_explore_adf_regcb::binary_search(float fhat, float delta, float sens, float tol)
 {
   const float maxw = (std::min)(fhat / sens, FLT_MAX);

   if (maxw * fhat * fhat <= delta)
     return maxw;

   float l = 0;
   float u = maxw;
   float w, v;

   for (int iter = 0; iter < B_SEARCH_MAX_ITER; iter++)
   {
     w = (u + l) / 2.f;
     v = w * (fhat * fhat - (fhat - sens * w) * (fhat - sens * w)) - delta;
     if (v > 0)
       u = w;
     else
       l = w;
     if (fabs(v) <= tol || u - l <= tol)
       break;
   }

   return l;
 }

 void cb_explore_adf_regcb::get_cost_ranges(float delta, LEARNER::multi_learner& base, multi_ex& examples, bool min_only)
 {
   const size_t num_actions = examples[0]->pred.a_s.size();
   _min_costs.resize(num_actions);
   _max_costs.resize(num_actions);

   _ex_as.clear();
   _ex_costs.clear();

   // backup cb example data
   for (const auto& ex : examples)
   {
     _ex_as.push_back(ex->pred.a_s);
     _ex_costs.push_back(ex->l.cb.costs);
   }

   // set regressor predictions
   for (const auto& as : _ex_as[0])
   {
     examples[as.action]->pred.scalar = as.score;
   }

   const float cmin = _min_cb_cost;
   const float cmax = _max_cb_cost;

   for (size_t a = 0; a < num_actions; ++a)
   {
     example* ec = examples[a];
     ec->l.simple.label = cmin - 1;
     float sens = base.sensitivity(*ec);
     float w = 0;  // importance weight

     if (ec->pred.scalar < cmin || std::isnan(sens) || std::isinf(sens))
       _min_costs[a] = cmin;
     else
     {
       w = binary_search(ec->pred.scalar - cmin + 1, delta, sens);
       _min_costs[a] = (std::max)(ec->pred.scalar - sens * w, cmin);
       if (_min_costs[a] > cmax)
         _min_costs[a] = cmax;
     }

     if (!min_only)
     {
       ec->l.simple.label = cmax + 1;
       sens = base.sensitivity(*ec);
       if (ec->pred.scalar > cmax || std::isnan(sens) || std::isinf(sens))
       {
         _max_costs[a] = cmax;
       }
       else
       {
         w = binary_search(cmax + 1 - ec->pred.scalar, delta, sens);
         _max_costs[a] = (std::min)(ec->pred.scalar + sens * w, cmax);
         if (_max_costs[a] < cmin)
           _max_costs[a] = cmin;
       }
     }
   }

   // reset cb example data
   for (size_t i = 0; i < examples.size(); ++i)
   {
     examples[i]->pred.a_s = _ex_as[i];
     examples[i]->l.cb.costs = _ex_costs[i];
   }
 }

 template <bool is_learn>
 void cb_explore_adf_regcb::predict_or_learn_impl(LEARNER::multi_learner& base, multi_ex& examples)
 {
   if (is_learn)
   {
     for (size_t i = 0; i < examples.size() - 1; ++i)
     {
       CB::label& ld = examples[i]->l.cb;
       if (ld.costs.size() == 1)
         ld.costs[0].probability = 1.f;  // no importance weighting
     }

     LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset);
     ++_counter;
   }
   else
     LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset);

   v_array<ACTION_SCORE::action_score>& preds = examples[0]->pred.a_s;
   uint32_t num_actions = (uint32_t)preds.size();

   const float max_range = _max_cb_cost - _min_cb_cost;
   // threshold on empirical loss difference
   const float delta = _c0 * log((float)(num_actions * _counter)) * pow(max_range, 2);

   if (!is_learn)
   {
     get_cost_ranges(delta, base, examples, /*min_only=*/_regcbopt);

     if (_regcbopt)  // optimistic variant
     {
       float min_cost = FLT_MAX;
       size_t a_opt = 0;  // optimistic action
       for (size_t a = 0; a < num_actions; ++a)
       {
         if (_min_costs[a] < min_cost)
         {
           min_cost = _min_costs[a];
           a_opt = a;
         }
       }
       for (size_t i = 0; i < preds.size(); ++i)
       {
         if (preds[i].action == a_opt || (!_first_only && _min_costs[preds[i].action] == min_cost))
           preds[i].score = 1;
         else
           preds[i].score = 0;
       }
     }
     else  // elimination variant
     {
       float min_max_cost = FLT_MAX;
       for (size_t a = 0; a < num_actions; ++a)
         if (_max_costs[a] < min_max_cost)
           min_max_cost = _max_costs[a];
       for (size_t i = 0; i < preds.size(); ++i)
       {
         if (_min_costs[preds[i].action] <= min_max_cost)
           preds[i].score = 1;
         else
           preds[i].score = 0;
         // explore uniformly on support
         exploration::enforce_minimum_probability(
             1.0, /*update_zero_elements=*/false, begin_scores(preds), end_scores(preds));
       }
     }
   }
 }

 LEARNER::base_learner* setup(VW::config::options_i& options, vw& all)
 {
   using config::make_option;
   bool cb_explore_adf_option = false;
   bool regcb = false;
   const std::string mtr = "mtr";
   std::string type_string(mtr);
   bool regcbopt = false;
   float c0 = 0.;
   bool first_only = false;
   float min_cb_cost = 0.;
   float max_cb_cost = 0.;
   config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
   new_options
       .add(make_option("cb_explore_adf", cb_explore_adf_option)
                .keep()
                .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
       .add(make_option("regcb", regcb).keep().help("RegCB-elim exploration"))
       .add(make_option("regcbopt", regcbopt).keep().help("RegCB optimistic exploration"))
       .add(make_option("mellowness", c0).keep().default_value(0.1f).help("RegCB mellowness parameter c_0. Default 0.1"))
       .add(make_option("cb_min_cost", min_cb_cost).keep().default_value(0.f).help("lower bound on cost"))
       .add(make_option("cb_max_cost", max_cb_cost).keep().default_value(1.f).help("upper bound on cost"))
       .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"))
       .add(make_option("cb_type", type_string)
                .keep()
                .help("contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
   options.add_and_parse(new_options);

   if (!cb_explore_adf_option || !(options.was_supplied("regcb") || options.was_supplied("regcbopt")))
     return nullptr;

   // Ensure serialization of cb_adf in all cases.
   if (!options.was_supplied("cb_adf"))
   {
     options.insert("cb_adf", "");
   }
   if (type_string != mtr)
   {
     all.trace_message << "warning: bad cb_type, RegCB only supports mtr; resetting to mtr." << std::endl;
     options.replace("cb_type", mtr);
   }

   all.delete_prediction = ACTION_SCORE::delete_action_scores;

   // Set explore_type
   size_t problem_multiplier = 1;

   LEARNER::multi_learner* base = as_multiline(setup_base(options, all));
   all.p->lp = CB::cb_label;
   all.label_type = label_type::cb;

   using explore_type = cb_explore_adf_base<cb_explore_adf_regcb>;
   auto data = scoped_calloc_or_throw<explore_type>(regcbopt, c0, first_only, min_cb_cost, max_cb_cost);
   LEARNER::learner<explore_type, multi_ex>& l = LEARNER::init_learner(
       data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type::action_probs);

   l.set_finish_example(explore_type::finish_multiline_example);
   return make_base(l);
 }

 }  // namespace regcb
 }  // namespace cb_explore_adf
 }  // namespace VW
VW::cb_explore_adf::regcb::cb_explore_adf_regcb::predict_or_learn_impl
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
Definition: cb_explore_adf_regcb.cc:161

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_max_costs
std::vector< float > _max_costs
Definition: cb_explore_adf_regcb.cc:37

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::binary_search
float binary_search(float fhat, float delta, float sens, float tol=1e-6)
Definition: cb_explore_adf_regcb.cc:66

vw::delete_prediction
void(* delete_prediction)(void *)
Definition: global_data.h:485

polyprediction::scalar
float scalar
Definition: example.h:45

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::predict
void predict(LEARNER::multi_learner &base, multi_ex &examples)
Definition: cb_explore_adf_regcb.cc:48

explore.h

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_counter
size_t _counter
Definition: cb_explore_adf_regcb.cc:29

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::get_cost_ranges
void get_cost_ranges(float delta, LEARNER::multi_learner &base, multi_ex &examples, bool min_only)
Definition: cb_explore_adf_regcb.cc:92

finish_multiline_example
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
Definition: cbify.cc:373

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_first_only
bool _first_only
Definition: cb_explore_adf_regcb.cc:32

VW::config::options_i::replace
virtual void replace(const std::string &key, const std::string &value)=0

action_score.h

vw::label_type
label_type::label_type_t label_type
Definition: global_data.h:550

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_min_costs
std::vector< float > _min_costs
Definition: cb_explore_adf_regcb.cc:36

CB::label::costs
v_array< cb_class > costs
Definition: cb.h:27

LEARNER::make_base
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462

VW::config::option_group_definition
Definition: options.h:85

action
uint32_t action
Definition: search.h:19

VW::config::options_i::add_and_parse
virtual void add_and_parse(const option_group_definition &group)=0

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_max_cb_cost
float _max_cb_cost
Definition: cb_explore_adf_regcb.cc:34

label_data::label
float label
Definition: simple_label.h:14

polylabel::simple
label_data simple
Definition: example.h:28

VW::cb_explore_adf::regcb::setup
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
Definition: cb_explore_adf_regcb.cc:229

LEARNER::learner
Definition: cb_explore.h:11

v_array::size
size_t size() const
Definition: v_array.h:68

ACTION_SCORE::begin_scores
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43

vw
Definition: global_data.h:369

vw::p
parser * p
Definition: global_data.h:377

ACTION_SCORE::end_scores
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45

LEARNER::init_learner
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369

ACTION_SCORE::delete_action_scores
void delete_action_scores(void *v)
Definition: action_score.cc:29

vw::trace_message
vw_ostream trace_message
Definition: global_data.h:424

VW::config::options_i::was_supplied
virtual bool was_supplied(const std::string &key)=0

VW::config::options_i
Definition: options.h:107

rand48.h

example
Definition: example.h:54

exploration::enforce_minimum_probability
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
Definition: explore_internal.h:226

cb_explore_adf_regcb.h

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_ex_costs
std::vector< v_array< CB::cb_class > > _ex_costs
Definition: cb_explore_adf_regcb.cc:41

reductions.h

cb_explore.h

VW::config::options_i::insert
virtual void insert(const std::string &key, const std::string &value)=0

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::cb_explore_adf_regcb
cb_explore_adf_regcb(bool regcbopt, float c0, bool first_only, float min_cb_cost, float max_cb_cost)
Definition: cb_explore_adf_regcb.cc:59

LEARNER::learner::sensitivity
float sensitivity(example &ec, size_t i=0)
Definition: learner.h:242

VW::config::option_group_definition::add
option_group_definition & add(T &&op)
Definition: options.h:90

cb_adf.h

multi_ex
std::vector< example * > multi_ex
Definition: example.h:122

CB::cb_label
label_parser cb_label
Definition: cb.cc:167

example::l
polylabel l
Definition: example.h:57

a
constexpr uint64_t a
Definition: rand48.cc:11

VW::cb_explore_adf::regcb::cb_explore_adf_regcb
Definition: cb_explore_adf_regcb.cc:26

VW::config::make_option
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_c0
float _c0
Definition: cb_explore_adf_regcb.cc:31

prediction_type::action_probs
Definition: learner.h:23

CB::label
Definition: cb.h:25

VW::cb_explore_adf::cb_explore_adf_base
Definition: cb_explore_adf_common.h:64

VW
Definition: autolink.cc:11

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::learn
void learn(LEARNER::multi_learner &base, multi_ex &examples)
Definition: cb_explore_adf_regcb.cc:49

cb.h

setup_base
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222

label_type::cb
Definition: global_data.h:346

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_min_cb_cost
float _min_cb_cost
Definition: cb_explore_adf_regcb.cc:33

predict
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956

example::pred
polyprediction pred
Definition: example.h:60

bs.h

B_SEARCH_MAX_ITER
#define B_SEARCH_MAX_ITER
Definition: cb_explore_adf_regcb.cc:18

learn
void learn(bfgs &b, base_learner &base, example &ec)
Definition: bfgs.cc:965

v_array< ACTION_SCORE::action_score >

gen_cs_example.h

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_ex_as
std::vector< ACTION_SCORE::action_scores > _ex_as
Definition: cb_explore_adf_regcb.cc:40

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::_regcbopt
bool _regcbopt
Definition: cb_explore_adf_regcb.cc:30

VW::cb_explore_adf::regcb::cb_explore_adf_regcb::~cb_explore_adf_regcb
~cb_explore_adf_regcb()=default

f
float f
Definition: cache.cc:40

LEARNER::as_multiline
multi_learner * as_multiline(learner< T, E > *l)
Definition: learner.h:468

parser::lp
label_parser lp
Definition: parser.h:102