Vowpal Wabbit
cb_explore_adf_cover.cc
Go to the documentation of this file.
1 #include "cb_explore_adf_cover.h"
2 #include "reductions.h"
3 #include "cb_adf.h"
4 #include "rand48.h"
5 #include "bs.h"
6 #include "gen_cs_example.h"
7 #include "cb_explore.h"
8 #include "explore.h"
9 #include <vector>
10 #include <algorithm>
11 #include <cmath>
12 
13 // All exploration algorithms return a vector of id, probability tuples, sorted in order of scores. The probabilities
14 // are the probability with which each action should be replaced to the top of the list.
15 
16 namespace VW
17 {
18 namespace cb_explore_adf
19 {
20 namespace cover
21 {
23 {
24  private:
25  size_t _cover_size;
26  float _psi;
27  bool _nounif;
29  size_t _counter;
32 
34  std::vector<float> _scores;
39 
40  public:
41  cb_explore_adf_cover(size_t cover_size, float psi, bool nounif, bool first_only,
42  LEARNER::multi_learner* cs_ldf_learner, LEARNER::single_learner* scorer, size_t cb_type);
44 
45  // Should be called through cb_explore_adf_base for pre/post-processing
46  void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<false>(base, examples); }
47  void learn(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<true>(base, examples); }
48 
49  private:
50  template <bool is_learn>
52 };
53 
54 cb_explore_adf_cover::cb_explore_adf_cover(size_t cover_size, float psi, bool nounif, bool first_only,
55  LEARNER::multi_learner* cs_ldf_learner, LEARNER::single_learner* scorer, size_t cb_type)
56  : _cover_size(cover_size), _psi(psi), _nounif(nounif), _first_only(first_only), _cs_ldf_learner(cs_ldf_learner)
57 {
58  _gen_cs.cb_type = cb_type;
59  _gen_cs.scorer = scorer;
60 }
61 
62 template <bool is_learn>
64 {
65  // Redundant with the call in cb_explore_adf_base, but encapsulation means we need to do this again here
67 
68  // Randomize over predictions from a base set of predictors
69  // Use cost sensitive oracle to cover actions to form distribution.
70  const bool is_mtr = _gen_cs.cb_type == CB_TYPE_MTR;
71  if (is_learn)
72  {
73  if (is_mtr) // use DR estimates for non-ERM policies in MTR
74  GEN_CS::gen_cs_example_dr<true>(_gen_cs, examples, _cs_labels);
75  else
76  GEN_CS::gen_cs_example<false>(_gen_cs, examples, _cs_labels);
77  LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset);
78  }
79  else
80  {
82  LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset);
83  }
84  v_array<ACTION_SCORE::action_score>& preds = examples[0]->pred.a_s;
85  const uint32_t num_actions = (uint32_t)preds.size();
86 
87  float additive_probability = 1.f / (float)_cover_size;
88  const float min_prob = (std::min)(1.f / num_actions, 1.f / (float)std::sqrt(_counter * num_actions));
90  for (uint32_t i = 0; i < num_actions; i++) _action_probs.push_back({i, 0.});
91  _scores.clear();
92  for (uint32_t i = 0; i < num_actions; i++) _scores.push_back(preds[i].score);
93 
94  if (!_first_only)
95  {
96  size_t tied_actions = fill_tied(preds);
97  for (size_t i = 0; i < tied_actions; ++i)
98  _action_probs[preds[i].action].score += additive_probability / tied_actions;
99  }
100  else
101  _action_probs[preds[0].action].score += additive_probability;
102 
103  float norm = min_prob * num_actions + (additive_probability - min_prob);
104  for (size_t i = 1; i < _cover_size; i++)
105  {
106  // Create costs of each action based on online cover
107  if (is_learn)
108  {
109  _cs_labels_2.costs.clear();
110  /* Cover's learn policy is similar to bag in that we have multiple learners
111  * The main difference here is that Cover's learners interact with each other.
112  * The following code generates cost = cost + penalty where a penalty is applied
113  * to any action a previous policy has already selected
114  */
115  for (uint32_t j = 0; j < num_actions; j++)
116  {
117  float pseudo_cost =
118  _cs_labels.costs[j].x - _psi * min_prob / ((std::max)(_action_probs[j].score, min_prob) / norm);
119  _cs_labels_2.costs.push_back({pseudo_cost, j, 0., 0.});
120  }
121  GEN_CS::call_cs_ldf<true>(
122  *(_cs_ldf_learner), examples, _cb_labels, _cs_labels_2, _prepped_cs_labels, examples[0]->ft_offset, i + 1);
123  }
124  else
125  GEN_CS::call_cs_ldf<false>(
126  *(_cs_ldf_learner), examples, _cb_labels, _cs_labels, _prepped_cs_labels, examples[0]->ft_offset, i + 1);
127 
128  for (uint32_t i = 0; i < num_actions; i++) _scores[i] += preds[i].score;
129  if (!_first_only)
130  {
131  size_t tied_actions = fill_tied(preds);
132  const float add_prob = additive_probability / tied_actions;
133  for (size_t i = 0; i < tied_actions; ++i)
134  {
135  if (_action_probs[preds[i].action].score < min_prob)
136  norm += (std::max)(0.f, add_prob - (min_prob - _action_probs[preds[i].action].score));
137  else
138  norm += add_prob;
139  _action_probs[preds[i].action].score += add_prob;
140  }
141  }
142  else
143  {
144  uint32_t action = preds[0].action;
145  if (_action_probs[action].score < min_prob)
146  norm += (std::max)(0.f, additive_probability - (min_prob - _action_probs[action].score));
147  else
148  norm += additive_probability;
149  _action_probs[action].score += additive_probability;
150  }
151  }
152 
154  min_prob * num_actions, !_nounif, begin_scores(_action_probs), end_scores(_action_probs));
155 
157  for (size_t i = 0; i < num_actions; i++) preds[i] = _action_probs[i];
158 
159  if (is_learn)
160  ++_counter;
161 }
162 
164 {
166  for (size_t i = 0; i < _prepped_cs_labels.size(); i++) _prepped_cs_labels[i].costs.delete_v();
168  _cs_labels_2.costs.delete_v();
169  _cs_labels.costs.delete_v();
171  _gen_cs.pred_scores.costs.delete_v();
172 }
173 
175 {
176  using config::make_option;
177 
178  bool cb_explore_adf_option = false;
179  std::string type_string = "mtr";
180  size_t cover_size = 0;
181  float psi = 0.;
182  bool nounif = false;
183  bool first_only = false;
184 
185  config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
186  new_options
187  .add(make_option("cb_explore_adf", cb_explore_adf_option)
188  .keep()
189  .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
190  .add(make_option("cover", cover_size).keep().help("Online cover based exploration"))
191  .add(make_option("psi", psi).keep().default_value(1.0f).help("disagreement parameter for cover"))
192  .add(make_option("nounif", nounif).keep().help("do not explore uniformly on zero-probability actions in cover"))
193  .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"))
194  .add(make_option("cb_type", type_string)
195  .keep()
196  .help("contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
197  options.add_and_parse(new_options);
198 
199  if (!cb_explore_adf_option || !options.was_supplied("cover"))
200  return nullptr;
201 
202  // Ensure serialization of cb_type in all cases.
203  if (!options.was_supplied("cb_type"))
204  {
205  options.insert("cb_type", type_string);
206  options.add_and_parse(new_options);
207  }
208 
209  // Ensure serialization of cb_adf in all cases.
210  if (!options.was_supplied("cb_adf"))
211  {
212  options.insert("cb_adf", "");
213  }
214 
216 
217  // Set cb_type
218  size_t cb_type_enum;
219  if (type_string.compare("dr") == 0)
220  cb_type_enum = CB_TYPE_DR;
221  else if (type_string.compare("ips") == 0)
222  cb_type_enum = CB_TYPE_IPS;
223  else if (type_string.compare("mtr") == 0)
224  {
225  all.trace_message << "warning: currently, mtr is only used for the first policy in cover, other policies use dr"
226  << std::endl;
227  cb_type_enum = CB_TYPE_MTR;
228  }
229  else
230  {
231  all.trace_message << "warning: cb_type must be in {'ips','dr','mtr'}; resetting to mtr." << std::endl;
232  options.replace("cb_type", "mtr");
233  cb_type_enum = CB_TYPE_MTR;
234  }
235 
236  // Set explore_type
237  size_t problem_multiplier = cover_size + 1;
238 
240  all.p->lp = CB::cb_label;
242 
243  using explore_type = cb_explore_adf_base<cb_explore_adf_cover>;
244  auto data = scoped_calloc_or_throw<explore_type>(
245  cover_size, psi, nounif, first_only, as_multiline(all.cost_sensitive), all.scorer, cb_type_enum);
246 
249 
250  l.set_finish_example(explore_type::finish_multiline_example);
251  return make_base(l);
252 }
253 } // namespace cover
254 } // namespace cb_explore_adf
255 } // namespace VW
void predict(LEARNER::multi_learner &base, multi_ex &examples)
CB::cb_class known_cost
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: scorer.cc:8
#define CB_TYPE_IPS
Definition: cb_algs.h:15
LEARNER::base_learner * cost_sensitive
Definition: global_data.h:385
void learn(LEARNER::multi_learner &base, multi_ex &examples)
void(* delete_prediction)(void *)
Definition: global_data.h:485
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
Definition: cbify.cc:373
virtual void replace(const std::string &key, const std::string &value)=0
label_type::label_type_t label_type
Definition: global_data.h:550
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
uint32_t action
Definition: search.h:19
virtual void add_and_parse(const option_group_definition &group)=0
#define CB_TYPE_DR
Definition: cb_algs.h:13
size_t size() const
Definition: v_array.h:68
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43
CB::cb_class get_observed_cost(multi_ex &examples)
Definition: cb_adf.cc:99
parser * p
Definition: global_data.h:377
size_t fill_tied(v_array< ACTION_SCORE::action_score > &preds)
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45
v_array< ACTION_SCORE::action_score > _action_probs
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void delete_action_scores(void *v)
Definition: action_score.cc:29
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
void gen_cs_example_ips(multi_ex &examples, COST_SENSITIVE::label &cs_labels, float clip_p)
LEARNER::single_learner * scorer
Definition: global_data.h:384
virtual void insert(const std::string &key, const std::string &value)=0
option_group_definition & add(T &&op)
Definition: options.h:90
std::vector< example * > multi_ex
Definition: example.h:122
label_parser cb_label
Definition: cb.cc:167
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
LEARNER::base_learner * setup(config::options_i &options, vw &all)
Definition: autolink.cc:11
LEARNER::single_learner * scorer
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
void delete_v()
Definition: v_array.h:98
cb_explore_adf_cover(size_t cover_size, float psi, bool nounif, bool first_only, LEARNER::multi_learner *cs_ldf_learner, LEARNER::single_learner *scorer, size_t cb_type)
void sort_action_probs(v_array< ACTION_SCORE::action_score > &probs, const std::vector< float > &scores)
v_array< wclass > costs
void learn(bfgs &b, base_learner &base, example &ec)
Definition: bfgs.cc:965
float f
Definition: cache.cc:40
multi_learner * as_multiline(learner< T, E > *l)
Definition: learner.h:468
COST_SENSITIVE::label pred_scores
label_parser lp
Definition: parser.h:102
#define CB_TYPE_MTR
Definition: cb_algs.h:16