Vowpal Wabbit
cb_explore_adf_bag.cc
Go to the documentation of this file.
1 #include "cb_explore_adf_bag.h"
2 
4 #include "reductions.h"
5 #include "cb_adf.h"
6 #include "rand48.h"
7 #include "bs.h"
8 #include "gen_cs_example.h"
9 #include "cb_explore.h"
10 #include "explore.h"
11 #include <vector>
12 #include <algorithm>
13 #include <cmath>
14 
15 // All exploration algorithms return a vector of id, probability tuples, sorted in order of scores. The probabilities
16 // are the probability with which each action should be replaced to the top of the list.
17 
18 namespace VW
19 {
20 namespace cb_explore_adf
21 {
22 namespace bag
23 {
25 {
26  private:
27  float _epsilon;
28  size_t _bag_size;
29  bool _greedify;
31  std::shared_ptr<rand_state> _random_state;
32 
34  std::vector<float> _scores;
35  std::vector<float> _top_actions;
36 
37  public:
39  float epsilon, size_t bag_size, bool greedify, bool first_only, std::shared_ptr<rand_state> random_state);
41 
42  // Should be called through cb_explore_adf_base for pre/post-processing
43  void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<false>(base, examples); }
44  void learn(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<true>(base, examples); }
45 
46  private:
47  template <bool is_learn>
49 };
50 
52  float epsilon, size_t bag_size, bool greedify, bool first_only, std::shared_ptr<rand_state> random_state)
53  : _epsilon(epsilon), _bag_size(bag_size), _greedify(greedify), _first_only(first_only), _random_state(random_state)
54 {
55 }
56 
57 template <bool is_learn>
59 {
60  // Randomize over predictions from a base set of predictors
61  v_array<ACTION_SCORE::action_score>& preds = examples[0]->pred.a_s;
62  uint32_t num_actions = (uint32_t)examples.size();
63  if (num_actions == 0)
64  {
65  preds.clear();
66  return;
67  }
68 
69  _scores.clear();
70  for (uint32_t i = 0; i < num_actions; i++) _scores.push_back(0.f);
71  _top_actions.assign(num_actions, 0);
72  for (uint32_t i = 0; i < _bag_size; i++)
73  {
74  // avoid updates to the random num generator
75  // for greedify, always update first policy once
76  uint32_t count = is_learn ? ((_greedify && i == 0) ? 1 : BS::weight_gen(_random_state)) : 0;
77 
78  if (is_learn && count > 0)
79  LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset, i);
80  else
81  LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset, i);
82 
83  assert(preds.size() == num_actions);
84  for (auto e : preds) _scores[e.action] += e.score;
85 
86  if (!_first_only)
87  {
88  size_t tied_actions = fill_tied(preds);
89  for (size_t i = 0; i < tied_actions; ++i) _top_actions[preds[i].action] += 1.f / tied_actions;
90  }
91  else
92  _top_actions[preds[0].action] += 1.f;
93  if (is_learn)
94  for (uint32_t j = 1; j < count; j++)
95  LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset, i);
96  }
97 
99  for (uint32_t i = 0; i < _scores.size(); i++) _action_probs.push_back({i, 0.});
100 
101  // generate distribution over actions
104 
106 
108 
109  for (size_t i = 0; i < num_actions; i++) preds[i] = _action_probs[i];
110 }
111 
113 
115 {
116  using config::make_option;
117  bool cb_explore_adf_option = false;
118  float epsilon = 0.;
119  size_t bag_size = 0;
120  bool greedify = false;
121  bool first_only = false;
122  config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
123  new_options
124  .add(make_option("cb_explore_adf", cb_explore_adf_option)
125  .keep()
126  .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
127  .add(make_option("epsilon", epsilon).keep().help("epsilon-greedy exploration"))
128  .add(make_option("bag", bag_size).keep().help("bagging-based exploration"))
129  .add(make_option("greedify", greedify).keep().help("always update first policy once in bagging"))
130  .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"));
131  options.add_and_parse(new_options);
132 
133  if (!cb_explore_adf_option || !options.was_supplied("bag"))
134  return nullptr;
135 
136  // Ensure serialization of cb_adf in all cases.
137  if (!options.was_supplied("cb_adf"))
138  {
139  options.insert("cb_adf", "");
140  }
141 
143 
144  size_t problem_multiplier = bag_size;
145  LEARNER::multi_learner* base = as_multiline(setup_base(options, all));
146  all.p->lp = CB::cb_label;
148 
149  using explore_type = cb_explore_adf_base<cb_explore_adf_bag>;
150  auto data = scoped_calloc_or_throw<explore_type>(epsilon, bag_size, greedify, first_only, all.get_random_state());
151 
154 
155  l.set_finish_example(explore_type::finish_multiline_example);
156  return make_base(l);
157 }
158 
159 } // namespace bag
160 } // namespace cb_explore_adf
161 } // namespace VW
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
uint32_t weight_gen(std::shared_ptr< rand_state > &state)
Definition: bs.h:17
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
void(* delete_prediction)(void *)
Definition: global_data.h:485
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
Definition: cbify.cc:373
void predict(LEARNER::multi_learner &base, multi_ex &examples)
label_type::label_type_t label_type
Definition: global_data.h:550
cb_explore_adf_bag(float epsilon, size_t bag_size, bool greedify, bool first_only, std::shared_ptr< rand_state > random_state)
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
uint32_t action
Definition: search.h:19
virtual void add_and_parse(const option_group_definition &group)=0
void learn(LEARNER::multi_learner &base, multi_ex &examples)
size_t size() const
Definition: v_array.h:68
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43
parser * p
Definition: global_data.h:377
size_t fill_tied(v_array< ACTION_SCORE::action_score > &preds)
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void delete_action_scores(void *v)
Definition: action_score.cc:29
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
virtual bool was_supplied(const std::string &key)=0
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
std::shared_ptr< rand_state > _random_state
virtual void insert(const std::string &key, const std::string &value)=0
option_group_definition & add(T &&op)
Definition: options.h:90
std::vector< example * > multi_ex
Definition: example.h:122
label_parser cb_label
Definition: cb.cc:167
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
Definition: autolink.cc:11
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
void delete_v()
Definition: v_array.h:98
void sort_action_probs(v_array< ACTION_SCORE::action_score > &probs, const std::vector< float > &scores)
v_array< ACTION_SCORE::action_score > _action_probs
void learn(bfgs &b, base_learner &base, example &ec)
Definition: bfgs.cc:965
float f
Definition: cache.cc:40
multi_learner * as_multiline(learner< T, E > *l)
Definition: learner.h:468
label_parser lp
Definition: parser.h:102