Vowpal Wabbit
cb_explore_adf_greedy.cc
Go to the documentation of this file.
2 #include "reductions.h"
3 #include "cb_adf.h"
4 #include "rand48.h"
5 #include "bs.h"
6 #include "gen_cs_example.h"
7 #include "cb_explore.h"
8 #include "explore.h"
9 #include <vector>
10 #include <algorithm>
11 #include <cmath>
12 #include <functional>
13 
14 namespace VW
15 {
16 namespace cb_explore_adf
17 {
18 namespace greedy
19 {
21 {
22  private:
23  float _epsilon;
25 
26  public:
27  cb_explore_adf_greedy(float epsilon, bool first_only);
28  ~cb_explore_adf_greedy() = default;
29 
30  // Should be called through cb_explore_adf_base for pre/post-processing
31  void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<false>(base, examples); }
32  void learn(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl<true>(base, examples); }
33 
34  private:
35  template <bool is_learn>
37 };
38 
39 cb_explore_adf_greedy::cb_explore_adf_greedy(float epsilon, bool first_only)
40  : _epsilon(epsilon), _first_only(first_only)
41 {
42 }
43 
44 template <bool is_learn>
46 {
47  // Explore uniform random an epsilon fraction of the time.
48  LEARNER::multiline_learn_or_predict<is_learn>(base, examples, examples[0]->ft_offset);
49 
50  ACTION_SCORE::action_scores& preds = examples[0]->pred.a_s;
51 
52  uint32_t num_actions = (uint32_t)preds.size();
53 
54  size_t tied_actions = fill_tied(preds);
55 
56  const float prob = _epsilon / num_actions;
57  for (size_t i = 0; i < num_actions; i++) preds[i].score = prob;
58  if (!_first_only)
59  {
60  for (size_t i = 0; i < tied_actions; ++i) preds[i].score += (1.f - _epsilon) / tied_actions;
61  }
62  else
63  preds[0].score += 1.f - _epsilon;
64 }
65 
67 {
68  using config::make_option;
69  bool cb_explore_adf_option = false;
70  float epsilon = 0.;
71  bool first_only = false;
72 
73  config::option_group_definition new_options("Contextual Bandit Exploration with Action Dependent Features");
74  new_options
75  .add(make_option("cb_explore_adf", cb_explore_adf_option)
76  .keep()
77  .help("Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
78  .add(make_option("epsilon", epsilon).keep().help("epsilon-greedy exploration"))
79  .add(make_option("first_only", first_only).keep().help("Only explore the first action in a tie-breaking event"));
80  options.add_and_parse(new_options);
81 
82  // NOTE: epsilon-greedy is the default explore type. This basically runs if none of the other explore strategies are
83  // used
84  bool use_greedy = !(options.was_supplied("first") || options.was_supplied("bag") || options.was_supplied("cover") ||
85  options.was_supplied("regcb") || options.was_supplied("regcbopt") || options.was_supplied("softmax"));
86 
87  if (!cb_explore_adf_option || !use_greedy)
88  return nullptr;
89 
90  // Ensure serialization of cb_adf in all cases.
91  if (!options.was_supplied("cb_adf"))
92  {
93  options.insert("cb_adf", "");
94  }
95 
97 
98  size_t problem_multiplier = 1;
99 
100  if (!options.was_supplied("epsilon"))
101  epsilon = 0.05f;
102 
103  LEARNER::multi_learner* base = as_multiline(setup_base(options, all));
104  all.p->lp = CB::cb_label;
106 
107  using explore_type = cb_explore_adf_base<cb_explore_adf_greedy>;
108  auto data = scoped_calloc_or_throw<explore_type>(epsilon, first_only);
109 
112 
113  l.set_finish_example(explore_type::finish_multiline_example);
114  return make_base(l);
115 }
116 
117 } // namespace greedy
118 } // namespace cb_explore_adf
119 } // namespace VW
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
void(* delete_prediction)(void *)
Definition: global_data.h:485
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
Definition: cbify.cc:373
label_type::label_type_t label_type
Definition: global_data.h:550
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
virtual void add_and_parse(const option_group_definition &group)=0
size_t size() const
Definition: v_array.h:68
parser * p
Definition: global_data.h:377
size_t fill_tied(v_array< ACTION_SCORE::action_score > &preds)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void delete_action_scores(void *v)
Definition: action_score.cc:29
virtual bool was_supplied(const std::string &key)=0
void predict(LEARNER::multi_learner &base, multi_ex &examples)
virtual void insert(const std::string &key, const std::string &value)=0
option_group_definition & add(T &&op)
Definition: options.h:90
std::vector< example * > multi_ex
Definition: example.h:122
label_parser cb_label
Definition: cb.cc:167
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
Definition: autolink.cc:11
void learn(LEARNER::multi_learner &base, multi_ex &examples)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
void learn(bfgs &b, base_learner &base, example &ec)
Definition: bfgs.cc:965
float f
Definition: cache.cc:40
multi_learner * as_multiline(learner< T, E > *l)
Definition: learner.h:468
label_parser lp
Definition: parser.h:102