Vowpal Wabbit
Classes | Functions
CB_EXPLORE Namespace Reference

Classes

struct  cb_explore
 

Functions

template<bool is_learn>
void predict_or_learn_first (cb_explore &data, single_learner &base, example &ec)
 
template<bool is_learn>
void predict_or_learn_greedy (cb_explore &data, single_learner &base, example &ec)
 
template<bool is_learn>
void predict_or_learn_bag (cb_explore &data, single_learner &base, example &ec)
 
void get_cover_probabilities (cb_explore &data, single_learner &, example &ec, v_array< action_score > &probs)
 
template<bool is_learn>
void predict_or_learn_cover (cb_explore &data, single_learner &base, example &ec)
 
void print_update_cb_explore (vw &all, bool is_test, example &ec, std::stringstream &pred_string)
 
void output_example (vw &all, cb_explore &data, example &ec, CB::label &ld)
 
void finish_example (vw &all, cb_explore &c, example &ec)
 

Function Documentation

◆ finish_example()

void CB_EXPLORE::finish_example ( vw all,
cb_explore c,
example ec 
)

Definition at line 266 of file cb_explore.cc.

References polylabel::cb, VW::finish_example(), example::l, and output_example().

Referenced by cb_explore_setup().

267 {
268  output_example(all, c, ec, ec.l.cb);
269  VW::finish_example(all, ec);
270 }
CB::label cb
Definition: example.h:31
void output_example(vw &all, cb_explore &data, example &ec, CB::label &ld)
Definition: cb_explore.cc:235
void finish_example(vw &, example &)
Definition: parser.cc:881
polylabel l
Definition: example.h:57

◆ get_cover_probabilities()

void CB_EXPLORE::get_cover_probabilities ( cb_explore data,
single_learner ,
example ec,
v_array< action_score > &  probs 
)

Definition at line 125 of file cb_explore.cc.

References ACTION_SCORE::begin_scores(), CB_EXPLORE::cb_explore::cbcs, v_array< T >::clear(), CB_EXPLORE::cb_explore::counter, CB_EXPLORE::cb_explore::cover_size, CB_EXPLORE::cb_explore::cs, ACTION_SCORE::end_scores(), exploration::enforce_minimum_probability(), f, polyprediction::multiclass, GEN_CS::cb_to_cs::num_actions, example::pred, CB_EXPLORE::cb_explore::preds, and v_array< T >::push_back().

Referenced by predict_or_learn_cover().

126 {
127  float additive_probability = 1.f / (float)data.cover_size;
128  data.preds.clear();
129 
130  for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.});
131 
132  for (size_t i = 0; i < data.cover_size; i++)
133  {
134  // get predicted cost-sensitive predictions
135  if (i == 0)
136  data.cs->predict(ec, i);
137  else
138  data.cs->predict(ec, i + 1);
139  uint32_t pred = ec.pred.multiclass;
140  probs[pred - 1].score += additive_probability;
141  data.preds.push_back((uint32_t)pred);
142  }
143  uint32_t num_actions = data.cbcs.num_actions;
144 
145  float min_prob = std::min(1.f / num_actions, 1.f / (float)std::sqrt(data.counter * num_actions));
146 
147  enforce_minimum_probability(min_prob * num_actions, false, begin_scores(probs), end_scores(probs));
148 
149  data.counter++;
150 }
uint32_t multiclass
Definition: example.h:49
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
uint32_t num_actions
learner< cb_explore, example > * cs
Definition: cb_explore.cc:30
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
v_array< uint32_t > preds
Definition: cb_explore.cc:23
polyprediction pred
Definition: example.h:60
float f
Definition: cache.cc:40

◆ output_example()

void CB_EXPLORE::output_example ( vw all,
cb_explore data,
example ec,
CB::label ld 
)

Definition at line 235 of file cb_explore.cc.

References polyprediction::a_s, c, CB::cb_label, CB_EXPLORE::cb_explore::cbcs, f, vw::final_prediction_sink, CB_ALGS::get_cost_estimate(), CB_ADF::get_observed_cost(), GEN_CS::cb_to_cs::known_cost, loss(), example::num_features, example::pred, GEN_CS::cb_to_cs::pred_scores, vw::print_text, print_update_cb_explore(), vw::sd, v_array< T >::size(), example::tag, test_label(), example::test_only, and shared_data::update().

Referenced by finish_example().

236 {
237  float loss = 0.;
238 
239  cb_to_cs& c = data.cbcs;
240 
241  if ((c.known_cost = get_observed_cost(ld)) != nullptr)
242  for (uint32_t i = 0; i < ec.pred.a_s.size(); i++)
243  loss += get_cost_estimate(c.known_cost, c.pred_scores, i + 1) * ec.pred.a_s[i].score;
244 
245  all.sd->update(ec.test_only, get_observed_cost(ld) != nullptr, loss, 1.f, ec.num_features);
246 
247  std::stringstream ss;
248  float maxprob = 0.;
249  uint32_t maxid = 0;
250  for (uint32_t i = 0; i < ec.pred.a_s.size(); i++)
251  {
252  ss << std::fixed << ec.pred.a_s[i].score << " ";
253  if (ec.pred.a_s[i].score > maxprob)
254  {
255  maxprob = ec.pred.a_s[i].score;
256  maxid = i + 1;
257  }
258  }
259  for (int sink : all.final_prediction_sink) all.print_text(sink, ss.str(), ec.tag);
260 
261  std::stringstream sso;
262  sso << maxid << ":" << std::fixed << maxprob;
263  print_update_cb_explore(all, CB::cb_label.test_label(&ld), ec, sso);
264 }
v_array< char > tag
Definition: example.h:63
ACTION_SCORE::action_scores a_s
Definition: example.h:47
COST_SENSITIVE::label pred_scores
v_array< int > final_prediction_sink
Definition: global_data.h:518
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
Definition: cb_algs.h:58
void print_update_cb_explore(vw &all, bool is_test, example &ec, std::stringstream &pred_string)
Definition: cb_explore.cc:221
size_t size() const
Definition: v_array.h:68
CB::cb_class get_observed_cost(multi_ex &examples)
Definition: cb_adf.cc:99
CB::cb_class * known_cost
shared_data * sd
Definition: global_data.h:375
size_t num_features
Definition: example.h:67
void(* print_text)(int, std::string, v_array< char >)
Definition: global_data.h:522
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190
label_parser cb_label
Definition: cb.cc:167
bool test_label(void *v)
Definition: simple_label.cc:70
polyprediction pred
Definition: example.h:60
constexpr uint64_t c
Definition: rand48.cc:12
float f
Definition: cache.cc:40
bool test_only
Definition: example.h:76

◆ predict_or_learn_bag()

template<bool is_learn>
void CB_EXPLORE::predict_or_learn_bag ( cb_explore data,
single_learner base,
example ec 
)

Definition at line 101 of file cb_explore.cc.

References CB_EXPLORE::cb_explore::_random_state, polyprediction::a_s, CB_EXPLORE::cb_explore::bag_size, CB_EXPLORE::cb_explore::cbcs, v_array< T >::clear(), LEARNER::learner< T, E >::learn(), polyprediction::multiclass, GEN_CS::cb_to_cs::num_actions, example::pred, LEARNER::learner< T, E >::predict(), prediction_type::prob, v_array< T >::push_back(), and BS::weight_gen().

102 {
103  // Randomize over predictions from a base set of predictors
104  action_scores probs = ec.pred.a_s;
105  probs.clear();
106 
107  for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.});
108  float prob = 1.f / (float)data.bag_size;
109  for (size_t i = 0; i < data.bag_size; i++)
110  {
111  uint32_t count = BS::weight_gen(data._random_state);
112  if (is_learn && count > 0)
113  base.learn(ec, i);
114  else
115  base.predict(ec, i);
116  uint32_t chosen = ec.pred.multiclass - 1;
117  probs[chosen].score += prob;
118  if (is_learn)
119  for (uint32_t j = 1; j < count; j++) base.learn(ec, i);
120  }
121 
122  ec.pred.a_s = probs;
123 }
uint32_t multiclass
Definition: example.h:49
ACTION_SCORE::action_scores a_s
Definition: example.h:47
void predict(E &ec, size_t i=0)
Definition: learner.h:169
uint32_t weight_gen(std::shared_ptr< rand_state > &state)
Definition: bs.h:17
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
std::shared_ptr< rand_state > _random_state
Definition: cb_explore.cc:21
uint32_t num_actions
polyprediction pred
Definition: example.h:60
void learn(E &ec, size_t i=0)
Definition: learner.h:160

◆ predict_or_learn_cover()

template<bool is_learn>
void CB_EXPLORE::predict_or_learn_cover ( cb_explore data,
single_learner base,
example ec 
)

Definition at line 153 of file cb_explore.cc.

References polyprediction::a_s, polylabel::cb, CB_EXPLORE::cb_explore::cb_label, CB_EXPLORE::cb_explore::cbcs, v_array< T >::clear(), COST_SENSITIVE::label::costs, CB_EXPLORE::cb_explore::counter, CB_EXPLORE::cb_explore::cover_probs, CB_EXPLORE::cb_explore::cover_size, polylabel::cs, CB_EXPLORE::cb_explore::cs, CB_EXPLORE::cb_explore::cs_label, f, get_cover_probabilities(), CB_ADF::get_observed_cost(), GEN_CS::cb_to_cs::known_cost, example::l, LEARNER::learner< T, E >::learn(), GEN_CS::cb_to_cs::num_actions, example::pred, CB_EXPLORE::cb_explore::preds, CB_EXPLORE::cb_explore::psi, and CB_EXPLORE::cb_explore::second_cs_label.

154 {
155  // Randomize over predictions from a base set of predictors
156  // Use cost sensitive oracle to cover actions to form distribution.
157 
158  uint32_t num_actions = data.cbcs.num_actions;
159 
160  action_scores probs = ec.pred.a_s;
161  probs.clear();
162  data.cs_label.costs.clear();
163 
164  for (uint32_t j = 0; j < num_actions; j++) data.cs_label.costs.push_back({FLT_MAX, j + 1, 0., 0.});
165 
166  size_t cover_size = data.cover_size;
167  size_t counter = data.counter;
168  v_array<float>& probabilities = data.cover_probs;
169  v_array<uint32_t>& predictions = data.preds;
170 
171  float additive_probability = 1.f / (float)cover_size;
172 
173  float min_prob = std::min(1.f / num_actions, 1.f / (float)std::sqrt(counter * num_actions));
174 
175  data.cb_label = ec.l.cb;
176 
177  ec.l.cs = data.cs_label;
178  get_cover_probabilities(data, base, ec, probs);
179 
180  if (is_learn)
181  {
182  ec.l.cb = data.cb_label;
183  base.learn(ec);
184 
185  // Now update oracles
186 
187  // 1. Compute loss vector
188  data.cs_label.costs.clear();
189  float norm = min_prob * num_actions;
190  ec.l.cb = data.cb_label;
192  gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
193  for (uint32_t i = 0; i < num_actions; i++) probabilities[i] = 0;
194 
195  ec.l.cs = data.second_cs_label;
196  // 2. Update functions
197  for (size_t i = 0; i < cover_size; i++)
198  {
199  // Create costs of each action based on online cover
200  for (uint32_t j = 0; j < num_actions; j++)
201  {
202  float pseudo_cost =
203  data.cs_label.costs[j].x - data.psi * min_prob / (std::max(probabilities[j], min_prob) / norm) + 1;
204  data.second_cs_label.costs[j].class_index = j + 1;
205  data.second_cs_label.costs[j].x = pseudo_cost;
206  }
207  if (i != 0)
208  data.cs->learn(ec, i + 1);
209  if (probabilities[predictions[i] - 1] < min_prob)
210  norm += std::max(0.f, additive_probability - (min_prob - probabilities[predictions[i] - 1]));
211  else
212  norm += additive_probability;
213  probabilities[predictions[i] - 1] += additive_probability;
214  }
215  }
216 
217  ec.l.cb = data.cb_label;
218  ec.pred.a_s = probs;
219 }
ACTION_SCORE::action_scores a_s
Definition: example.h:47
COST_SENSITIVE::label second_cs_label
Definition: cb_explore.cc:28
COST_SENSITIVE::label cs_label
Definition: cb_explore.cc:27
CB::label cb
Definition: example.h:31
CB::cb_class get_observed_cost(multi_ex &examples)
Definition: cb_adf.cc:99
CB::cb_class * known_cost
COST_SENSITIVE::label cs
Definition: example.h:30
void clear()
Definition: v_array.h:88
uint32_t num_actions
learner< cb_explore, example > * cs
Definition: cb_explore.cc:30
v_array< float > cover_probs
Definition: cb_explore.cc:24
polylabel l
Definition: example.h:57
void get_cover_probabilities(cb_explore &data, single_learner &, example &ec, v_array< action_score > &probs)
Definition: cb_explore.cc:125
v_array< uint32_t > preds
Definition: cb_explore.cc:23
polyprediction pred
Definition: example.h:60
void learn(E &ec, size_t i=0)
Definition: learner.h:160
v_array< wclass > costs
float f
Definition: cache.cc:40

◆ predict_or_learn_first()

template<bool is_learn>
void CB_EXPLORE::predict_or_learn_first ( cb_explore data,
single_learner base,
example ec 
)

Definition at line 51 of file cb_explore.cc.

References polyprediction::a_s, polylabel::cb, CB_EXPLORE::cb_explore::cbcs, v_array< T >::clear(), CB::label::costs, example::l, LEARNER::learner< T, E >::learn(), polyprediction::multiclass, GEN_CS::cb_to_cs::num_actions, example::pred, LEARNER::learner< T, E >::predict(), prediction_type::prob, v_array< T >::push_back(), and CB_EXPLORE::cb_explore::tau.

52 {
53  // Explore tau times, then act according to optimal.
54  action_scores probs = ec.pred.a_s;
55 
56  if (is_learn && ec.l.cb.costs[0].probability < 1)
57  base.learn(ec);
58  else
59  base.predict(ec);
60 
61  probs.clear();
62  if (data.tau > 0)
63  {
64  float prob = 1.f / (float)data.cbcs.num_actions;
65  for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, prob});
66  data.tau--;
67  }
68  else
69  {
70  uint32_t chosen = ec.pred.multiclass - 1;
71  for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.});
72  probs[chosen].score = 1.0;
73  }
74 
75  ec.pred.a_s = probs;
76 }
uint32_t multiclass
Definition: example.h:49
ACTION_SCORE::action_scores a_s
Definition: example.h:47
void predict(E &ec, size_t i=0)
Definition: learner.h:169
CB::label cb
Definition: example.h:31
v_array< cb_class > costs
Definition: cb.h:27
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
uint32_t num_actions
polylabel l
Definition: example.h:57
polyprediction pred
Definition: example.h:60
void learn(E &ec, size_t i=0)
Definition: learner.h:160

◆ predict_or_learn_greedy()

template<bool is_learn>
void CB_EXPLORE::predict_or_learn_greedy ( cb_explore data,
single_learner base,
example ec 
)

Definition at line 79 of file cb_explore.cc.

References polyprediction::a_s, ACTION_SCORE::begin_scores(), CB_EXPLORE::cb_explore::cbcs, v_array< T >::clear(), ACTION_SCORE::end_scores(), CB_EXPLORE::cb_explore::epsilon, exploration::generate_epsilon_greedy(), LEARNER::learner< T, E >::learn(), polyprediction::multiclass, GEN_CS::cb_to_cs::num_actions, example::pred, LEARNER::learner< T, E >::predict(), v_array< T >::push_back(), and v_array< T >::resize().

80 {
81  // Explore uniform random an epsilon fraction of the time.
82  // TODO: pointers are copied here. What happens if base.learn/base.predict re-allocs?
83  // ec.pred.a_s = probs; will restore the than free'd memory
84  action_scores probs = ec.pred.a_s;
85  probs.clear();
86 
87  if (is_learn)
88  base.learn(ec);
89  else
90  base.predict(ec);
91 
92  // pre-allocate pdf
93  probs.resize(data.cbcs.num_actions);
94  for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0});
96 
97  ec.pred.a_s = probs;
98 }
void resize(size_t length)
Definition: v_array.h:69
uint32_t multiclass
Definition: example.h:49
ACTION_SCORE::action_scores a_s
Definition: example.h:47
void predict(E &ec, size_t i=0)
Definition: learner.h:169
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
uint32_t num_actions
polyprediction pred
Definition: example.h:60
void learn(E &ec, size_t i=0)
Definition: learner.h:160

◆ print_update_cb_explore()

void CB_EXPLORE::print_update_cb_explore ( vw all,
bool  is_test,
example ec,
std::stringstream &  pred_string 
)

Definition at line 221 of file cb_explore.cc.

References vw::bfgs, polylabel::cb, CB::label::costs, vw::current_pass, shared_data::dump_interval, vw::holdout_set_off, example::l, example::num_features, shared_data::print_update(), vw::progress_add, vw::progress_arg, vw::quiet, vw::sd, and shared_data::weighted_examples().

Referenced by output_example().

222 {
223  if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
224  {
225  std::stringstream label_string;
226  if (is_test)
227  label_string << " unknown";
228  else
229  label_string << ec.l.cb.costs[0].action;
230  all.sd->print_update(all.holdout_set_off, all.current_pass, label_string.str(), pred_string.str(), ec.num_features,
231  all.progress_add, all.progress_arg);
232  }
233 }
CB::label cb
Definition: example.h:31
v_array< cb_class > costs
Definition: cb.h:27
bool quiet
Definition: global_data.h:487
bool holdout_set_off
Definition: global_data.h:499
bool progress_add
Definition: global_data.h:545
shared_data * sd
Definition: global_data.h:375
float progress_arg
Definition: global_data.h:546
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:225
bool bfgs
Definition: global_data.h:412
size_t num_features
Definition: example.h:67
uint64_t current_pass
Definition: global_data.h:396
polylabel l
Definition: example.h:57
double weighted_examples()
Definition: global_data.h:188
float dump_interval
Definition: global_data.h:147