Vowpal Wabbit
Public Member Functions | Private Member Functions | Private Attributes | List of all members
CB_ADF::cb_adf Struct Reference

Public Member Functions

template<bool is_learn>
void do_actual_learning (LEARNER::multi_learner &base, multi_ex &ec_seq)
 
bool update_statistics (example &ec, multi_ex *ec_seq)
 
 cb_adf (shared_data *sd, size_t cb_type, VW::version_struct *model_file_ver, bool rank_all, float clip_p, bool no_predict)
 
void set_scorer (LEARNER::single_learner *scorer)
 
bool get_rank_all () const
 
const cb_to_cs_adfget_gen_cs () const
 
const VW::version_structget_model_file_ver () const
 
 ~cb_adf ()
 

Private Member Functions

void learn_IPS (multi_learner &base, multi_ex &examples)
 
void learn_DR (multi_learner &base, multi_ex &examples)
 
void learn_DM (multi_learner &base, multi_ex &examples)
 
void learn_SM (multi_learner &base, multi_ex &examples)
 
template<bool predict>
void learn_MTR (multi_learner &base, multi_ex &examples)
 

Private Attributes

shared_data_sd
 
VW::version_struct_model_file_ver
 
cb_to_cs_adf _gen_cs
 
v_array< CB::label_cb_labels
 
COST_SENSITIVE::label _cs_labels
 
v_array< COST_SENSITIVE::label_prepped_cs_labels
 
action_scores _a_s
 
action_scores _a_s_mtr_cs
 
action_scores _prob_s
 
v_array< uint32_t > _backup_nf
 
v_array< float > _backup_weights
 
uint64_t _offset
 
const bool _no_predict
 
const bool _rank_all
 
const float _clip_p
 

Detailed Description

Definition at line 30 of file cb_adf.cc.

Constructor & Destructor Documentation

◆ cb_adf()

CB_ADF::cb_adf::cb_adf ( shared_data sd,
size_t  cb_type,
VW::version_struct model_file_ver,
bool  rank_all,
float  clip_p,
bool  no_predict 
)
inline

Definition at line 59 of file cb_adf.cc.

References GEN_CS::cb_to_cs_adf::cb_type.

61  : _sd(sd), _model_file_ver(model_file_ver), _no_predict(no_predict), _rank_all(rank_all), _clip_p(clip_p)
62  {
63  _gen_cs.cb_type = cb_type;
64  }
VW::version_struct * _model_file_ver
Definition: cb_adf.cc:36
const float _clip_p
Definition: cb_adf.cc:52
shared_data * _sd
Definition: cb_adf.cc:33
const bool _rank_all
Definition: cb_adf.cc:51
const bool _no_predict
Definition: cb_adf.cc:50
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38

◆ ~cb_adf()

CB_ADF::cb_adf::~cb_adf ( )
inline

Definition at line 74 of file cb_adf.cc.

References COST_SENSITIVE::label::costs, v_array< T >::delete_v(), and GEN_CS::cb_to_cs_adf::pred_scores.

75  {
77  for (auto& prepped_cs_label : _prepped_cs_labels) prepped_cs_label.costs.delete_v();
78  _prepped_cs_labels.delete_v();
79  _cs_labels.costs.delete_v();
82  _prob_s.delete_v();
83 
84  _a_s.delete_v();
86  _gen_cs.pred_scores.costs.delete_v();
87  }
v_array< float > _backup_weights
Definition: cb_adf.cc:47
action_scores _a_s
Definition: cb_adf.cc:43
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
v_array< uint32_t > _backup_nf
Definition: cb_adf.cc:46
action_scores _prob_s
Definition: cb_adf.cc:45
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
action_scores _a_s_mtr_cs
Definition: cb_adf.cc:44
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38
void delete_v()
Definition: v_array.h:98
v_array< wclass > costs
COST_SENSITIVE::label pred_scores

Member Function Documentation

◆ do_actual_learning()

template<bool is_learn>
void CB_ADF::cb_adf::do_actual_learning ( LEARNER::multi_learner base,
multi_ex ec_seq 
)

Definition at line 295 of file cb_adf.cc.

References CB_TYPE_DM, CB_TYPE_DR, CB_TYPE_IPS, CB_TYPE_MTR, CB_TYPE_SM, GEN_CS::gen_cs_test_example(), CB_ADF::get_observed_cost(), CB_ADF::test_adf_sequence(), and THROW.

Referenced by CB_ADF::learn(), and CB_ADF::predict().

296 {
297  _offset = ec_seq[0]->ft_offset;
298  _gen_cs.known_cost = get_observed_cost(ec_seq); // need to set for test case
299  if (is_learn && test_adf_sequence(ec_seq) != nullptr)
300  {
301  /* v_array<float> temp_scores;
302  temp_scores = v_init<float>();
303  do_actual_learning<false>(data,base);
304  for (size_t i = 0; i < data.ec_seq[0]->pred.a_s.size(); i++)
305  temp_scores.push_back(data.ec_seq[0]->pred.a_s[i].score);*/
306  switch (_gen_cs.cb_type)
307  {
308  case CB_TYPE_IPS:
309  learn_IPS(base, ec_seq);
310  break;
311  case CB_TYPE_DR:
312  learn_DR(base, ec_seq);
313  break;
314  case CB_TYPE_DM:
315  learn_DM(base, ec_seq);
316  break;
317  case CB_TYPE_MTR:
318  if (_no_predict)
319  learn_MTR<false>(base, ec_seq);
320  else
321  learn_MTR<true>(base, ec_seq);
322  break;
323  case CB_TYPE_SM:
324  learn_SM(base, ec_seq);
325  break;
326  default:
327  THROW("Unknown cb_type specified for contextual bandit learning: " << _gen_cs.cb_type);
328  }
329 
330  /* for (size_t i = 0; i < temp_scores.size(); i++)
331  if (temp_scores[i] != data.ec_seq[0]->pred.a_s[i].score)
332  std::cout << "problem! " << temp_scores[i] << " != " << data.ec_seq[0]->pred.a_s[i].score << " for " <<
333  data.ec_seq[0]->pred.a_s[i].action << std::endl; temp_scores.delete_v();*/
334  }
335  else
336  {
337  gen_cs_test_example(ec_seq, _cs_labels); // create test labels.
338  call_cs_ldf<false>(base, ec_seq, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
339  }
340 }
CB::cb_class known_cost
void learn_IPS(multi_learner &base, multi_ex &examples)
Definition: cb_adf.cc:131
#define CB_TYPE_IPS
Definition: cb_algs.h:15
void learn_DM(multi_learner &base, multi_ex &examples)
Definition: cb_adf.cc:233
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
example * test_adf_sequence(multi_ex &ec_seq)
Definition: cb_adf.cc:268
#define CB_TYPE_DM
Definition: cb_algs.h:14
#define CB_TYPE_DR
Definition: cb_algs.h:13
CB::cb_class get_observed_cost(multi_ex &examples)
Definition: cb_adf.cc:99
void learn_SM(multi_learner &base, multi_ex &examples)
Definition: cb_adf.cc:137
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
const bool _no_predict
Definition: cb_adf.cc:50
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38
void learn_DR(multi_learner &base, multi_ex &examples)
Definition: cb_adf.cc:227
#define THROW(args)
Definition: vw_exception.h:181
void gen_cs_test_example(multi_ex &examples, COST_SENSITIVE::label &cs_labels)
#define CB_TYPE_SM
Definition: cb_algs.h:17
#define CB_TYPE_MTR
Definition: cb_algs.h:16

◆ get_gen_cs()

const cb_to_cs_adf& CB_ADF::cb_adf::get_gen_cs ( ) const
inline

Definition at line 70 of file cb_adf.cc.

Referenced by CB_ADF::save_load().

70 { return _gen_cs; }
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38

◆ get_model_file_ver()

const VW::version_struct* CB_ADF::cb_adf::get_model_file_ver ( ) const
inline

Definition at line 72 of file cb_adf.cc.

Referenced by CB_ADF::save_load().

72 { return _model_file_ver; }
VW::version_struct * _model_file_ver
Definition: cb_adf.cc:36

◆ get_rank_all()

bool CB_ADF::cb_adf::get_rank_all ( ) const
inline

Definition at line 68 of file cb_adf.cc.

Referenced by CB_ADF::output_example_seq().

68 { return _rank_all; }
const bool _rank_all
Definition: cb_adf.cc:51

◆ learn_DM()

void CB_ADF::cb_adf::learn_DM ( multi_learner base,
multi_ex examples 
)
private

Definition at line 233 of file cb_adf.cc.

References GEN_CS::gen_cs_example_dm().

234 {
235  gen_cs_example_dm(examples, _cs_labels);
236  call_cs_ldf<true>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
237 }
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
void gen_cs_example_dm(multi_ex &examples, COST_SENSITIVE::label &cs_labels)
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41

◆ learn_DR()

void CB_ADF::cb_adf::learn_DR ( multi_learner base,
multi_ex examples 
)
private

Definition at line 227 of file cb_adf.cc.

228 {
229  gen_cs_example_dr<true>(_gen_cs, examples, _cs_labels, _clip_p);
230  call_cs_ldf<true>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
231 }
const float _clip_p
Definition: cb_adf.cc:52
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38

◆ learn_IPS()

void CB_ADF::cb_adf::learn_IPS ( multi_learner base,
multi_ex examples 
)
private

Definition at line 131 of file cb_adf.cc.

References GEN_CS::gen_cs_example_ips().

132 {
134  call_cs_ldf<true>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
135 }
const float _clip_p
Definition: cb_adf.cc:52
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
void gen_cs_example_ips(multi_ex &examples, COST_SENSITIVE::label &cs_labels, float clip_p)
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41

◆ learn_MTR()

template<bool predict>
void CB_ADF::cb_adf::learn_MTR ( multi_learner base,
multi_ex examples 
)
private

Definition at line 240 of file cb_adf.cc.

References GEN_CS::gen_cs_example_ips(), GEN_CS::gen_cs_example_mtr(), and CB_ADF::predict().

241 {
242  // uint32_t action = 0;
243  if (predict) // first get the prediction to return
244  {
245  gen_cs_example_ips(examples, _cs_labels);
246  call_cs_ldf<false>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
247  std::swap(examples[0]->pred.a_s, _a_s);
248  }
249  // second train on _one_ action (which requires up to 3 examples).
250  // We must go through the cost sensitive classifier layer to get
251  // proper feature handling.
253  uint32_t nf = (uint32_t)examples[_gen_cs.mtr_example]->num_features;
254  float old_weight = examples[_gen_cs.mtr_example]->weight;
255  const float clipped_p = std::max(examples[_gen_cs.mtr_example]->l.cb.costs[0].probability, _clip_p);
256  examples[_gen_cs.mtr_example]->weight *= 1.f / clipped_p * ((float)_gen_cs.event_sum / (float)_gen_cs.action_sum);
257 
258  std::swap(_gen_cs.mtr_ec_seq[0]->pred.a_s, _a_s_mtr_cs);
259  // TODO!!! cb_labels are not getting properly restored (empty costs are dropped)
260  GEN_CS::call_cs_ldf<true>(base, _gen_cs.mtr_ec_seq, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
261  examples[_gen_cs.mtr_example]->num_features = nf;
262  examples[_gen_cs.mtr_example]->weight = old_weight;
263  std::swap(_gen_cs.mtr_ec_seq[0]->pred.a_s, _a_s_mtr_cs);
264  std::swap(examples[0]->pred.a_s, _a_s);
265 }
action_scores _a_s
Definition: cb_adf.cc:43
const float _clip_p
Definition: cb_adf.cc:52
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
void gen_cs_example_ips(multi_ex &examples, COST_SENSITIVE::label &cs_labels, float clip_p)
action_scores _a_s_mtr_cs
Definition: cb_adf.cc:44
void gen_cs_example_mtr(cb_to_cs_adf &c, multi_ex &ec_seq, COST_SENSITIVE::label &cs_labels)
void predict(cb_adf &c, multi_learner &base, multi_ex &ec_seq)
Definition: cb_adf.cc:477
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38

◆ learn_SM()

void CB_ADF::cb_adf::learn_SM ( multi_learner base,
multi_ex examples 
)
private

Definition at line 137 of file cb_adf.cc.

References ACTION_SCORE::action_score::action, ACTION_SCORE::begin_scores(), CB::label::costs, ACTION_SCORE::end_scores(), GEN_CS::gen_cs_example_sm(), GEN_CS::gen_cs_test_example(), exploration::generate_softmax(), GEN_CS::safe_probability(), and ACTION_SCORE::action_score::score.

138 {
139  gen_cs_test_example(examples, _cs_labels); // create test labels.
140  call_cs_ldf<false>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
141 
142  // Can probably do this more efficiently than 6 loops over the examples...
143  //[1: initialize temporary storage;
144  // 2: find chosen action;
145  // 3: create cs_labels (gen_cs_example_sm);
146  // 4: get probability of chosen action;
147  // 5: backup example wts;
148  // 6: restore example wts]
149  _a_s.clear();
150  _prob_s.clear();
151  // TODO: Check that predicted scores are always stored with the first example
152  for (uint32_t i = 0; i < examples[0]->pred.a_s.size(); i++)
153  {
154  _a_s.push_back({examples[0]->pred.a_s[i].action, examples[0]->pred.a_s[i].score});
155  _prob_s.push_back({examples[0]->pred.a_s[i].action, 0.0});
156  }
157 
158  float sign_offset = 1.0; // To account for negative rewards/costs
159  uint32_t chosen_action = 0;
160  float example_weight = 1.0;
161 
162  for (uint32_t i = 0; i < examples.size(); i++)
163  {
164  CB::label ld = examples[i]->l.cb;
165  if (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX)
166  {
167  chosen_action = i;
168  example_weight = ld.costs[0].cost / safe_probability(ld.costs[0].probability);
169 
170  // Importance weights of examples cannot be negative.
171  // So we use a trick: set |w| as weight, and use sign(w) as an offset in the regression target.
172  if (ld.costs[0].cost < 0.0)
173  {
174  sign_offset = -1.0;
175  example_weight = -example_weight;
176  }
177  break;
178  }
179  }
180 
181  gen_cs_example_sm(examples, chosen_action, sign_offset, _a_s, _cs_labels);
182 
183  // Lambda is -1 in the call to generate_softmax because in vw, lower score is better; for softmax higher score is
184  // better.
186 
187  // TODO: Check Marco's example that causes VW to report prob > 1.
188 
189  for (auto const& action_score : _prob_s) // Scale example_wt by prob of chosen action
190  {
191  if (action_score.action == chosen_action)
192  {
193  example_weight *= action_score.score;
194  break;
195  }
196  }
197 
199  _backup_nf.clear();
200  for (auto const& action_score : _prob_s)
201  {
202  uint32_t current_action = action_score.action;
203  _backup_weights.push_back(examples[current_action]->weight);
204  _backup_nf.push_back((uint32_t)examples[current_action]->num_features);
205 
206  if (current_action == chosen_action)
207  examples[current_action]->weight = example_weight * (1.0f - action_score.score);
208  else
209  examples[current_action]->weight = example_weight * action_score.score;
210 
211  if (examples[current_action]->weight <= 1e-15)
212  examples[current_action]->weight = 0;
213  }
214 
215  // Do actual training
216  call_cs_ldf<true>(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset);
217 
218  // Restore example weights and numFeatures
219  for (size_t i = 0; i < _prob_s.size(); i++)
220  {
221  uint32_t current_action = _prob_s[i].action;
222  examples[current_action]->weight = _backup_weights[i];
223  examples[current_action]->num_features = _backup_nf[i];
224  }
225 }
void gen_cs_example_sm(multi_ex &, uint32_t chosen_action, float sign_offset, ACTION_SCORE::action_scores action_vals, COST_SENSITIVE::label &cs_labels)
v_array< float > _backup_weights
Definition: cb_adf.cc:47
action_scores _a_s
Definition: cb_adf.cc:43
COST_SENSITIVE::label _cs_labels
Definition: cb_adf.cc:40
v_array< uint32_t > _backup_nf
Definition: cb_adf.cc:46
v_array< cb_class > costs
Definition: cb.h:27
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
score_iterator begin_scores(action_scores &a_s)
Definition: action_score.h:43
score_iterator end_scores(action_scores &a_s)
Definition: action_score.h:45
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
action_scores _prob_s
Definition: cb_adf.cc:45
v_array< CB::label > _cb_labels
Definition: cb_adf.cc:39
uint64_t _offset
Definition: cb_adf.cc:49
float weight
float safe_probability(float prob)
Definition: cb.h:25
v_array< COST_SENSITIVE::label > _prepped_cs_labels
Definition: cb_adf.cc:41
void gen_cs_test_example(multi_ex &examples, COST_SENSITIVE::label &cs_labels)

◆ set_scorer()

void CB_ADF::cb_adf::set_scorer ( LEARNER::single_learner scorer)
inline

Definition at line 66 of file cb_adf.cc.

References GEN_CS::cb_to_cs_adf::scorer.

Referenced by cb_adf_setup().

66 { _gen_cs.scorer = scorer; }
LEARNER::single_learner * scorer
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38

◆ update_statistics()

bool CB_ADF::cb_adf::update_statistics ( example ec,
multi_ex ec_seq 
)

Definition at line 357 of file cb_adf.cc.

References polyprediction::a_s, CB_ALGS::get_cost_estimate(), loss(), example::num_features, example::pred, and example::weight.

Referenced by CB_ADF::output_example(), and CB_ADF::output_rank_example().

358 {
359  size_t num_features = 0;
360 
361  uint32_t action = ec.pred.a_s[0].action;
362  for (const auto& example : *ec_seq) num_features += example->num_features;
363 
364  float loss = 0.;
365 
366  bool labeled_example = true;
369  else
370  labeled_example = false;
371 
372  bool holdout_example = labeled_example;
373  for (auto const& i : *ec_seq) holdout_example &= i->test_only;
374 
375  _sd->update(holdout_example, labeled_example, loss, ec.weight, num_features);
376  return labeled_example;
377 }
CB::cb_class known_cost
ACTION_SCORE::action_scores a_s
Definition: example.h:47
uint32_t action
Definition: search.h:19
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
Definition: cb_algs.h:58
shared_data * _sd
Definition: cb_adf.cc:33
float probability
Definition: cb.h:19
size_t num_features
Definition: example.h:67
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190
cb_to_cs_adf _gen_cs
Definition: cb_adf.cc:38
polyprediction pred
Definition: example.h:60
float weight
Definition: example.h:62
COST_SENSITIVE::label pred_scores

Member Data Documentation

◆ _a_s

action_scores CB_ADF::cb_adf::_a_s
private

Definition at line 43 of file cb_adf.cc.

◆ _a_s_mtr_cs

action_scores CB_ADF::cb_adf::_a_s_mtr_cs
private

Definition at line 44 of file cb_adf.cc.

◆ _backup_nf

v_array<uint32_t> CB_ADF::cb_adf::_backup_nf
private

Definition at line 46 of file cb_adf.cc.

◆ _backup_weights

v_array<float> CB_ADF::cb_adf::_backup_weights
private

Definition at line 47 of file cb_adf.cc.

◆ _cb_labels

v_array<CB::label> CB_ADF::cb_adf::_cb_labels
private

Definition at line 39 of file cb_adf.cc.

◆ _clip_p

const float CB_ADF::cb_adf::_clip_p
private

Definition at line 52 of file cb_adf.cc.

◆ _cs_labels

COST_SENSITIVE::label CB_ADF::cb_adf::_cs_labels
private

Definition at line 40 of file cb_adf.cc.

◆ _gen_cs

cb_to_cs_adf CB_ADF::cb_adf::_gen_cs
private

Definition at line 38 of file cb_adf.cc.

◆ _model_file_ver

VW::version_struct* CB_ADF::cb_adf::_model_file_ver
private

Definition at line 36 of file cb_adf.cc.

◆ _no_predict

const bool CB_ADF::cb_adf::_no_predict
private

Definition at line 50 of file cb_adf.cc.

◆ _offset

uint64_t CB_ADF::cb_adf::_offset
private

Definition at line 49 of file cb_adf.cc.

◆ _prepped_cs_labels

v_array<COST_SENSITIVE::label> CB_ADF::cb_adf::_prepped_cs_labels
private

Definition at line 41 of file cb_adf.cc.

◆ _prob_s

action_scores CB_ADF::cb_adf::_prob_s
private

Definition at line 45 of file cb_adf.cc.

◆ _rank_all

const bool CB_ADF::cb_adf::_rank_all
private

Definition at line 51 of file cb_adf.cc.

◆ _sd

shared_data* CB_ADF::cb_adf::_sd
private

Definition at line 33 of file cb_adf.cc.


The documentation for this struct was generated from the following file: