Vowpal Wabbit
gen_cs_example.h
Go to the documentation of this file.
1 /*
2  Copyright (c) by respective owners including Yahoo!, Microsoft, and
3  individual contributors. All rights reserved. Released under a BSD (revised)
4  license as described in the file LICENSE.
5 */
6 #pragma once
7 #include <float.h>
8 
9 #include "vw.h"
10 #include "reductions.h"
11 #include "cb_algs.h"
12 #include "vw_exception.h"
13 
14 namespace GEN_CS
15 {
16 struct cb_to_cs
17 {
18  size_t cb_type;
19  uint32_t num_actions;
26 
28 };
29 
31 {
32  size_t cb_type;
33 
34  // for MTR
35  uint64_t action_sum;
36  uint64_t event_sum;
37  uint32_t mtr_example;
38  multi_ex mtr_ec_seq; // shared + the one example.
39 
40  // for DR
44 };
45 
47 
48 float safe_probability(float prob);
49 
50 void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld, float clip_p = 0.f);
51 
52 template <bool is_learn>
54 { // this implements the direct estimation method, where costs are directly specified by the learned regressor.
55  CB::label ld = ec.l.cb;
56 
57  float min = FLT_MAX;
58  uint32_t argmin = 1;
59  // generate cost sensitive example
60  cs_ld.costs.clear();
61  c.pred_scores.costs.clear();
62 
63  if (ld.costs.size() == 0 ||
64  (ld.costs.size() == 1 &&
65  ld.costs[0].cost != FLT_MAX)) // this is a typical example where we can perform all actions
66  { // in this case generate cost-sensitive example with all actions
67  for (uint32_t i = 1; i <= c.num_actions; i++)
68  {
69  COST_SENSITIVE::wclass wc = {0., i, 0., 0.};
70  // get cost prediction for this action
71  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, i, 0);
72  if (wc.x < min)
73  {
74  min = wc.x;
75  argmin = i;
76  }
77 
78  c.pred_scores.costs.push_back(wc);
79 
80  if (c.known_cost != nullptr && c.known_cost->action == i)
81  {
82  c.nb_ex_regressors++;
83  c.avg_loss_regressors += (1.0f / c.nb_ex_regressors) *
84  ((c.known_cost->cost - wc.x) * (c.known_cost->cost - wc.x) - c.avg_loss_regressors);
85  c.last_pred_reg = wc.x;
87  }
88 
89  cs_ld.costs.push_back(wc);
90  }
91  }
92  else // this is an example where we can only perform a subset of the actions
93  { // in this case generate cost-sensitive example with only allowed actions
94  for (auto& cl : ld.costs)
95  {
96  COST_SENSITIVE::wclass wc = {0., cl.action, 0., 0.};
97 
98  // get cost prediction for this action
99  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, cl.action, 0);
100  if (wc.x < min || (wc.x == min && cl.action < argmin))
101  {
102  min = wc.x;
103  argmin = cl.action;
104  }
105  c.pred_scores.costs.push_back(wc);
106 
107  if (c.known_cost != nullptr && c.known_cost->action == cl.action)
108  {
109  c.nb_ex_regressors++;
110  c.avg_loss_regressors += (1.0f / c.nb_ex_regressors) *
111  ((c.known_cost->cost - wc.x) * (c.known_cost->cost - wc.x) - c.avg_loss_regressors);
112  c.last_pred_reg = wc.x;
114  }
115 
116  cs_ld.costs.push_back(wc);
117  }
118  }
119 
120  ec.pred.multiclass = argmin;
121 }
122 
123 template <bool is_learn>
124 void gen_cs_label(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld, uint32_t action, float clip_p = 0.f)
125 {
126  COST_SENSITIVE::wclass wc = {0., action, 0., 0.};
127 
128  // get cost prediction for this action
129  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, action, c.num_actions);
130 
131  c.pred_scores.costs.push_back(wc);
132  // add correction if we observed cost for this action and regressor is wrong
133  if (c.known_cost != nullptr && c.known_cost->action == action)
134  {
135  c.nb_ex_regressors++;
136  c.avg_loss_regressors += (1.0f / c.nb_ex_regressors) *
137  ((c.known_cost->cost - wc.x) * (c.known_cost->cost - wc.x) - c.avg_loss_regressors);
138  c.last_pred_reg = wc.x;
140  wc.x += (c.known_cost->cost - wc.x) / std::max(c.known_cost->probability, clip_p);
141  }
142 
143  cs_ld.costs.push_back(wc);
144 }
145 
146 template <bool is_learn>
147 void gen_cs_example_dr(cb_to_cs& c, example& ec, CB::label& ld, COST_SENSITIVE::label& cs_ld, float /*clip_p*/ = 0.f)
148 { // this implements the doubly robust method
149  cs_ld.costs.clear();
150  c.pred_scores.costs.clear();
151  if (ld.costs.size() == 0) // a test example
152  for (uint32_t i = 1; i <= c.num_actions; i++)
153  { // Explicit declaration for a weak compiler.
154  COST_SENSITIVE::wclass temp = {FLT_MAX, i, 0., 0.};
155  cs_ld.costs.push_back(temp);
156  }
157  else if (ld.costs.size() == 0 || (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX))
158  // this is a typical example where we can perform all actions
159  // in this case generate cost-sensitive example with all actions
160  for (uint32_t i = 1; i <= c.num_actions; i++) gen_cs_label<is_learn>(c, ec, cs_ld, i);
161  else // this is an example where we can only perform a subset of the actions
162  // in this case generate cost-sensitive example with only allowed actions
163  for (auto& cl : ld.costs) gen_cs_label<is_learn>(c, ec, cs_ld, cl.action);
164 }
165 
166 template <bool is_learn>
168 {
169  switch (c.cb_type)
170  {
171  case CB_TYPE_IPS:
172  gen_cs_example_ips(c, ld, cs_ld);
173  break;
174  case CB_TYPE_DM:
175  gen_cs_example_dm<is_learn>(c, ec, cs_ld);
176  break;
177  case CB_TYPE_DR:
178  gen_cs_example_dr<is_learn>(c, ec, ld, cs_ld);
179  break;
180  default:
181  THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type);
182  }
183 }
184 
185 void gen_cs_test_example(multi_ex& examples, COST_SENSITIVE::label& cs_labels);
186 
187 void gen_cs_example_ips(multi_ex& examples, COST_SENSITIVE::label& cs_labels, float clip_p = 0.f);
188 
189 void gen_cs_example_dm(multi_ex& examples, COST_SENSITIVE::label& cs_labels);
190 
192 
193 void gen_cs_example_sm(multi_ex& examples, uint32_t chosen_action, float sign_offset,
194  ACTION_SCORE::action_scores action_vals, COST_SENSITIVE::label& cs_labels);
195 
196 template <bool is_learn>
197 void gen_cs_example_dr(cb_to_cs_adf& c, multi_ex& examples, COST_SENSITIVE::label& cs_labels, float clip_p = 0.f)
198 { // size_t mysize = examples.size();
199  c.pred_scores.costs.clear();
200 
201  cs_labels.costs.clear();
202  for (size_t i = 0; i < examples.size(); i++)
203  {
204  if (CB_ALGS::example_is_newline_not_header(*examples[i]))
205  continue;
206 
207  COST_SENSITIVE::wclass wc = {0., (uint32_t)i, 0., 0.};
208 
209  if (c.known_cost.action == i)
210  {
211  int known_index = c.known_cost.action;
212  c.known_cost.action = 0;
213  // get cost prediction for this label
214  // num_actions should be 1 effectively.
215  // my get_cost_pred function will use 1 for 'index-1+base'
216  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, &(c.known_cost), *(examples[i]), 0, 2);
217  c.known_cost.action = known_index;
218  }
219  else
220  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, nullptr, *(examples[i]), 0, 2);
221 
222  c.pred_scores.costs.push_back(wc); // done
223 
224  // add correction if we observed cost for this action and regressor is wrong
225  if (c.known_cost.probability != -1 && c.known_cost.action == i)
226  wc.x += (c.known_cost.cost - wc.x) / std::max(c.known_cost.probability, clip_p);
227  cs_labels.costs.push_back(wc);
228  }
229 }
230 
231 template <bool is_learn>
233 {
234  switch (c.cb_type)
235  {
236  case CB_TYPE_IPS:
237  gen_cs_example_ips(ec_seq, cs_labels);
238  break;
239  case CB_TYPE_DR:
240  gen_cs_example_dr<is_learn>(c, ec_seq, cs_labels);
241  break;
242  case CB_TYPE_MTR:
243  gen_cs_example_mtr(c, ec_seq, cs_labels);
244  break;
245  default:
246  THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type);
247  }
248 }
249 
250 template <bool is_learn>
252  COST_SENSITIVE::label& cs_labels, v_array<COST_SENSITIVE::label>& prepped_cs_labels, uint64_t offset, size_t id = 0)
253 {
254  cb_labels.clear();
255  if (prepped_cs_labels.size() < cs_labels.costs.size() + 1)
256  {
257  prepped_cs_labels.resize(cs_labels.costs.size() + 1);
258  prepped_cs_labels.end() = prepped_cs_labels.end_array;
259  }
260 
261  // 1st: save cb_label (into mydata) and store cs_label for each example, which will be passed into base.learn.
262  // also save offsets
263  uint64_t saved_offset = examples[0]->ft_offset;
264  size_t index = 0;
265  for (auto ec : examples)
266  {
267  cb_labels.push_back(ec->l.cb);
268  prepped_cs_labels[index].costs.clear();
269  prepped_cs_labels[index].costs.push_back(cs_labels.costs[index]);
270  ec->l.cs = prepped_cs_labels[index++];
271  ec->ft_offset = offset;
272  }
273 
274  // 2nd: predict for each ex
275  // // call base.predict for all examples
276  if (is_learn)
277  base.learn(examples, (int32_t)id);
278  else
279  base.predict(examples, (int32_t)id);
280 
281  // 3rd: restore cb_label for each example
282  // (**ec).l.cb = array.element.
283  // and restore offsets
284  for (size_t i = 0; i < examples.size(); ++i)
285  {
286  examples[i]->l.cb = cb_labels[i];
287  examples[i]->ft_offset = saved_offset;
288  }
289 }
290 } // namespace GEN_CS
void gen_cs_example_sm(multi_ex &, uint32_t chosen_action, float sign_offset, ACTION_SCORE::action_scores action_vals, COST_SENSITIVE::label &cs_labels)
CB::cb_class known_cost
void resize(size_t length)
Definition: v_array.h:69
void call_cs_ldf(LEARNER::multi_learner &base, multi_ex &examples, v_array< CB::label > &cb_labels, COST_SENSITIVE::label &cs_labels, v_array< COST_SENSITIVE::label > &prepped_cs_labels, uint64_t offset, size_t id=0)
bool example_is_newline_not_header(example const &ec)
Definition: cb_algs.h:80
uint32_t multiclass
Definition: example.h:49
COST_SENSITIVE::label pred_scores
void gen_cs_label(cb_to_cs &c, example &ec, COST_SENSITIVE::label &cs_ld, uint32_t action, float clip_p=0.f)
void predict(E &ec, size_t i=0)
Definition: learner.h:169
#define CB_TYPE_IPS
Definition: cb_algs.h:15
CB::label cb
Definition: example.h:31
cb_class * get_observed_cost(CB::label &ld)
v_array< cb_class > costs
Definition: cb.h:27
uint32_t action
Definition: search.h:19
#define CB_TYPE_DM
Definition: cb_algs.h:14
void gen_cs_example_dm(multi_ex &examples, COST_SENSITIVE::label &cs_labels)
#define CB_TYPE_DR
Definition: cb_algs.h:13
size_t size() const
Definition: v_array.h:68
void gen_cs_example_dr(cb_to_cs &c, example &ec, CB::label &ld, COST_SENSITIVE::label &cs_ld, float=0.f)
CB::cb_class * known_cost
uint32_t action
Definition: cb.h:18
void push_back(const T &new_ele)
Definition: v_array.h:107
float probability
Definition: cb.h:19
void clear()
Definition: v_array.h:88
uint32_t num_actions
LEARNER::single_learner * scorer
void gen_cs_example_ips(multi_ex &examples, COST_SENSITIVE::label &cs_labels, float clip_p)
T *& end()
Definition: v_array.h:43
void gen_cs_example(cb_to_cs &c, example &ec, CB::label &ld, COST_SENSITIVE::label &cs_ld)
std::vector< example * > multi_ex
Definition: example.h:122
polylabel l
Definition: example.h:57
float safe_probability(float prob)
Definition: cb.h:25
float cost
Definition: cb.h:17
void gen_cs_example_mtr(cb_to_cs_adf &c, multi_ex &ec_seq, COST_SENSITIVE::label &cs_labels)
LEARNER::single_learner * scorer
polyprediction pred
Definition: example.h:60
void learn(E &ec, size_t i=0)
Definition: learner.h:160
v_array< wclass > costs
#define THROW(args)
Definition: vw_exception.h:181
constexpr uint64_t c
Definition: rand48.cc:12
void gen_cs_test_example(multi_ex &examples, COST_SENSITIVE::label &cs_labels)
float f
Definition: cache.cc:40
COST_SENSITIVE::label pred_scores
T * end_array
Definition: v_array.h:38
#define CB_TYPE_MTR
Definition: cb_algs.h:16