Vowpal Wabbit
vw_slim_predict.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <memory>
4 #include <vector>
5 #include <string>
6 #include <array>
7 
8 // avoid mmap dependency
9 #define DISABLE_SHARED_WEIGHTS
10 
11 #include "example_predict.h"
12 #include "explore.h"
13 #include "gd_predict.h"
14 #include "model_parser.h"
15 #include "opts.h"
16 
17 namespace vw_slim
18 {
19 namespace internal
20 {
21 template <typename It1, typename It2>
23 
24 template <typename It1, typename It2>
26 {
27  public:
28  using Val1 = typename std::iterator_traits<It1>::value_type;
29  using Val2 = typename std::iterator_traits<It2>::value_type;
30 
33 
34  location_value(const location_reference<It1, It2>& rhs) : _val1(*rhs._ptr1), _val2(*rhs._ptr2) {}
35 };
36 
37 template <typename It1, typename It2>
39 {
40  public:
41  It1 _ptr1;
42  It2 _ptr2;
45 
46  location_reference() = delete;
47 
48  location_reference(const Ref& other) : _ptr1(other._ptr1), _ptr2(other._ptr2) {}
49 
50  location_reference(It1 first, It2 second) : _ptr1(first), _ptr2(second) {}
51 
53  {
54  *_ptr1 = rhs._val1;
55  *_ptr2 = rhs._val2;
56  return *this;
57  }
58 
60  {
61  *_ptr1 = *rhs._ptr1;
62  *_ptr2 = *rhs._ptr2;
63  return *this;
64  }
65 
66  friend void swap(const location_reference& a, const location_reference& b)
67  {
68  std::iter_swap(a._ptr1, b._ptr1);
69  std::iter_swap(a._ptr2, b._ptr2);
70  }
71 };
72 
73 template <typename It1, typename It2>
75  : public std::iterator<std::random_access_iterator_tag, location_value<It1, It2>, // value type
76  size_t, // difference type
77  location_reference<It1, It2> // reference_type
78  >
79 {
80  It1 _ptr1;
81  It2 _ptr2;
82 
83  public:
87 
88  collection_pair_iterator(It1 first, It2 second) : _ptr1(first), _ptr2(second) {}
89 
90  // must support: a) default-construction, b) copy-construction, c) copy-assignment, d) destruction
91  collection_pair_iterator() = default;
93  collection_pair_iterator& operator=(const collection_pair_iterator& rhs) = default;
94 
95  // must support: a == b; a != b;
96  bool operator==(const Iter& rhs) const { return (_ptr1 == rhs._ptr1 && _ptr2 == rhs._ptr2); }
97  bool operator!=(const Iter& rhs) const { return !(operator==(rhs)); }
98 
99  // must support: b = *a; a->m ;
100  // must support: *a = t;
101  Ref operator*() { return Ref(_ptr1, _ptr2); } // Non-conforming - normally returns loc&
102 
103  // must support: ++a; a++; *a++;
105  {
106  ++_ptr1;
107  ++_ptr2;
108  return *this;
109  }
110 
112  {
113  Iter ret(*this);
114  ++_ptr1;
115  ++_ptr2;
116  return ret;
117  }
118 
119  // must support: --a; a--; *a--;
121  {
122  --_ptr1;
123  --_ptr2;
124  return *this;
125  }
126 
128  {
129  Iter ret(*this);
130  --_ptr1;
131  --_ptr2;
132  return ret;
133  }
134 
135  // must support: a + n; n + a; a - n; a - b
136  Iter operator+(const size_t n) const { return Iter(_ptr1 + n, _ptr2 + n); }
137  // friend Iter operator+(const size_t, const Iter&);
138  Iter operator-(const size_t n) const { return Iter(_ptr1 - n, _ptr2 - n); }
139  size_t operator-(const Iter& rhs) const { return _ptr1 - rhs._ptr1; }
140 
141  // must support: a > b; a < b; a <= b; a >= b;
142  bool operator<(const Iter& rhs) const { return _ptr1 < rhs._ptr1; }
143  bool operator>(const Iter& rhs) const { return _ptr1 > rhs._ptr1; }
144  bool operator<=(const Iter& rhs) const { return _ptr1 <= rhs._ptr1; }
145  bool operator>=(const Iter& rhs) const { return _ptr1 >= rhs._ptr1; }
146 
147  // must support: a += n; a -= n;
148  Iter& operator+=(size_t n)
149  {
150  _ptr1 += n;
151  _ptr2 += n;
152  return *this;
153  }
154 
155  Iter& operator-=(size_t n)
156  {
157  _ptr1 -= n;
158  _ptr2 -= n;
159  return *this;
160  }
161 };
162 } // namespace internal
163 } // namespace vw_slim
164 
165 namespace vw_slim
166 {
171 {
175 };
176 
177 uint64_t ceil_log_2(uint64_t v);
178 
179 // this guard assumes that namespaces are added in order
180 // the complete feature_space of the added namespace is cleared afterwards
182 {
184  unsigned char _ns;
186 
187  public:
188  namespace_copy_guard(example_predict& ex, unsigned char ns);
190 
191  void feature_push_back(feature_value v, feature_index idx);
192 };
193 
195 {
197  uint64_t _old_ft_offset;
198 
199  public:
200  feature_offset_guard(example_predict& ex, uint64_t ft_offset);
202 };
203 
205 {
207  uint64_t _shift;
208 
209  public:
210  stride_shift_guard(example_predict& ex, uint64_t shift);
212 };
213 
217 template <typename W>
219 {
220  std::unique_ptr<W> _weights;
221  std::string _id;
222  std::string _version;
224  std::vector<std::string> _interactions;
225  std::array<bool, NUM_NAMESPACES> _ignore_linear;
227 
230  float _epsilon;
231  float _lambda;
233  uint32_t _num_bits;
234 
235  uint32_t _stride_shift;
237 
238  public:
239  vw_predict() : _model_loaded(false) {}
240 
248  int load(const char* model, size_t length)
249  {
250  if (!model || length == 0)
252 
253  _model_loaded = false;
254 
255  // required for inline_predict
256  _ignore_linear.fill(false);
257 
258  model_parser mp(model, length);
259 
260  // parser_regressor.cc: save_load_header
261  RETURN_ON_FAIL(mp.read_string<false>("version", _version));
262 
263  // read model id
264  RETURN_ON_FAIL(mp.read_string<true>("model_id", _id));
265 
266  RETURN_ON_FAIL(mp.skip(sizeof(char))); // "model character"
267  RETURN_ON_FAIL(mp.skip(sizeof(float))); // "min_label"
268  RETURN_ON_FAIL(mp.skip(sizeof(float))); // "max_label"
269 
270  RETURN_ON_FAIL(mp.read("num_bits", _num_bits));
271 
272  RETURN_ON_FAIL(mp.skip(sizeof(uint32_t))); // "lda"
273 
274  uint32_t ngram_len;
275  RETURN_ON_FAIL(mp.read("ngram_len", ngram_len));
276  mp.skip(3 * ngram_len);
277 
278  uint32_t skips_len;
279  RETURN_ON_FAIL(mp.read("skips_len", skips_len));
280  mp.skip(3 * skips_len);
281 
282  RETURN_ON_FAIL(mp.read_string<true>("file_options", _command_line_arguments));
283 
284  // command line arg parsing
285  _no_constant = _command_line_arguments.find("--noconstant") != std::string::npos;
286 
287  // only 0-valued hash_seed supported
288  int hash_seed;
289  if (find_opt_int(_command_line_arguments, "--hash_seed", hash_seed) && hash_seed)
291 
292  _interactions.clear();
293  find_opt(_command_line_arguments, "-q", _interactions);
294  find_opt(_command_line_arguments, "--quadratic", _interactions);
295  find_opt(_command_line_arguments, "--cubic", _interactions);
296  find_opt(_command_line_arguments, "--interactions", _interactions);
297 
298  // VW performs the following transformation as a side-effect of looking for duplicates.
299  // This affects how interaction hashes are generated.
300  std::vector<std::string> vec_sorted;
301  for (const std::string& interaction : _interactions)
302  {
303  std::string sorted_i(interaction);
304  std::sort(std::begin(sorted_i), std::end(sorted_i));
305  vec_sorted.push_back(sorted_i);
306  }
307  _interactions = vec_sorted;
308 
309  // TODO: take --cb_type dr into account
310  uint64_t num_weights = 0;
311 
312  if (_command_line_arguments.find("--cb_explore_adf") != std::string::npos)
313  {
314  // parse exploration options
315  if (find_opt_int(_command_line_arguments, "--bag", _bag_size))
316  {
317  _exploration = vw_predict_exploration::bag;
318  num_weights = _bag_size;
319 
320  // check for additional minimum epsilon greedy
321  _minimum_epsilon = 0.f;
322  find_opt_float(_command_line_arguments, "--epsilon", _minimum_epsilon);
323  }
324  else if (_command_line_arguments.find("--softmax") != std::string::npos)
325  {
326  if (find_opt_float(_command_line_arguments, "--lambda", _lambda))
327  {
328  if (_lambda > 0) // Lambda should always be negative because we are using a cost basis.
329  _lambda = -_lambda;
330  _exploration = vw_predict_exploration::softmax;
331  }
332  }
333  else if (find_opt_float(_command_line_arguments, "--epsilon", _epsilon))
335  else
337  }
338 
339  // VW style check_sum validation
340  uint32_t check_sum_computed = mp.checksum();
341 
342  // perform check sum check
343  uint32_t check_sum_len;
344  RETURN_ON_FAIL((mp.read<uint32_t, false>("check_sum_len", check_sum_len)));
345  if (check_sum_len != sizeof(uint32_t))
347 
348  uint32_t check_sum;
349  RETURN_ON_FAIL((mp.read<uint32_t, false>("check_sum", check_sum)));
350 
351  if (check_sum_computed != check_sum)
353 
354  if (_command_line_arguments.find("--cb_adf") != std::string::npos)
355  {
356  RETURN_ON_FAIL(mp.skip(sizeof(uint64_t))); // cb_adf.cc: event_sum
357  RETURN_ON_FAIL(mp.skip(sizeof(uint64_t))); // cb_adf.cc: action_sum
358  }
359 
360  // gd.cc: save_load
361  bool gd_resume;
362  RETURN_ON_FAIL(mp.read("resume", gd_resume));
363  if (gd_resume)
365 
366  // read sparse weights into dense
367  uint64_t weight_length = (uint64_t)1 << _num_bits;
368  _stride_shift = (uint32_t)ceil_log_2(num_weights);
369 
370  RETURN_ON_FAIL(mp.read_weights<W>(_weights, _num_bits, _stride_shift));
371 
372  // TODO: check that permutations is not enabled (or parse it)
373 
374  _model_loaded = true;
375 
376  return S_VW_PREDICT_OK;
377  }
378 
385  bool is_cb_explore_adf() { return _command_line_arguments.find("--cb_explore_adf") != std::string::npos; }
386 
394  bool is_csoaa_ldf() { return _command_line_arguments.find("--csoaa_ldf") != std::string::npos; }
395 
405  int predict(example_predict& ex, float& score)
406  {
407  if (!_model_loaded)
409 
410  std::unique_ptr<namespace_copy_guard> ns_copy_guard;
411 
412  if (!_no_constant)
413  {
414  // add constant feature
415  ns_copy_guard = std::unique_ptr<namespace_copy_guard>(new namespace_copy_guard(ex, constant_namespace));
416  ns_copy_guard->feature_push_back(1.f, (constant << _stride_shift) + ex.ft_offset);
417  }
418 
419  score = GD::inline_predict<W>(*_weights, false, _ignore_linear, _interactions, /* permutations */ false, ex);
420 
421  return S_VW_PREDICT_OK;
422  }
423 
424  // multiclass classification
425  int predict(example_predict& shared, example_predict* actions, size_t num_actions, std::vector<float>& out_scores)
426  {
427  if (!_model_loaded)
429 
430  if (!is_csoaa_ldf())
432 
433  out_scores.resize(num_actions);
434 
435  example_predict* action = actions;
436  for (size_t i = 0; i < num_actions; i++, action++)
437  {
438  std::vector<std::unique_ptr<namespace_copy_guard>> ns_copy_guards;
439 
440  // shared feature copying
441  for (auto ns : shared.indices)
442  {
443  // insert namespace
444  auto ns_copy_guard = std::unique_ptr<namespace_copy_guard>(new namespace_copy_guard(*action, ns));
445 
446  // copy features
447  for (auto fs : shared.feature_space[ns]) ns_copy_guard->feature_push_back(fs.value(), fs.index());
448 
449  // keep guard around
450  ns_copy_guards.push_back(std::move(ns_copy_guard));
451  }
452 
453  RETURN_ON_FAIL(predict(*action, out_scores[i]));
454  }
455 
456  return S_VW_PREDICT_OK;
457  }
458 
459  int predict(const char* event_id, example_predict& shared, example_predict* actions, size_t num_actions,
460  std::vector<float>& pdf, std::vector<int>& ranking)
461  {
462  if (!_model_loaded)
464 
465  if (!is_cb_explore_adf())
467 
468  std::vector<float> scores;
469 
470  // add exploration
471  pdf.resize(num_actions);
472  ranking.resize(num_actions);
473 
474  switch (_exploration)
475  {
477  {
478  // get the prediction
479  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
480 
481  // generate exploration distribution
482  // model is trained against cost -> minimum is better
483  auto top_action_iterator = std::min_element(std::begin(scores), std::end(scores));
484  uint32_t top_action = (uint32_t)(top_action_iterator - std::begin(scores));
485 
487  exploration::generate_epsilon_greedy(_epsilon, top_action, std::begin(pdf), std::end(pdf)));
488  break;
489  }
491  {
492  // get the prediction
493  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
494 
495  // generate exploration distribution
497  _lambda, std::begin(scores), std::end(scores), std::begin(pdf), std::end(pdf)));
498  break;
499  }
501  {
502  std::vector<uint32_t> top_actions(num_actions);
503 
504  // apply stride shifts
505  std::vector<std::unique_ptr<stride_shift_guard>> stride_shift_guards;
506  stride_shift_guards.push_back(
507  std::unique_ptr<stride_shift_guard>(new stride_shift_guard(shared, _stride_shift)));
508  example_predict* actions_end = actions + num_actions;
509  for (example_predict* action = actions; action != actions_end; ++action)
510  stride_shift_guards.push_back(
511  std::unique_ptr<stride_shift_guard>(new stride_shift_guard(*action, _stride_shift)));
512 
513  for (size_t i = 0; i < _bag_size; i++)
514  {
515  std::vector<std::unique_ptr<feature_offset_guard>> feature_offset_guards;
516  for (example_predict* action = actions; action != actions_end; ++action)
517  feature_offset_guards.push_back(
518  std::unique_ptr<feature_offset_guard>(new feature_offset_guard(*action, i)));
519 
520  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
521 
522  auto top_action_iterator = std::min_element(std::begin(scores), std::end(scores));
523  uint32_t top_action = (uint32_t)(top_action_iterator - std::begin(scores));
524 
525  top_actions[top_action]++;
526  }
527 
528  // generate exploration distribution
530  exploration::generate_bag(std::begin(top_actions), std::end(top_actions), std::begin(pdf), std::end(pdf)));
531 
532  if (_minimum_epsilon > 0)
534  exploration::enforce_minimum_probability(_minimum_epsilon, true, std::begin(pdf), std::end(pdf)));
535 
536  break;
537  }
538  default:
540  }
541 
542  RETURN_EXPLORATION_ON_FAIL(sort_by_scores(
543  std::begin(pdf), std::end(pdf), std::begin(scores), std::end(scores), std::begin(ranking), std::end(ranking)));
544 
545  // Sample from the pdf
546  uint32_t chosen_action_idx;
548  exploration::sample_after_normalizing(event_id, std::begin(pdf), std::end(pdf), chosen_action_idx));
549 
550  // Swap top element with chosen one (unless chosen is the top)
551  if (chosen_action_idx != 0)
552  {
553  std::iter_swap(std::begin(ranking), std::begin(ranking) + chosen_action_idx);
554  std::iter_swap(std::begin(pdf), std::begin(pdf) + chosen_action_idx);
555  }
556 
557  return S_VW_PREDICT_OK;
558  }
559 
560  template <typename PdfIt, typename InputScoreIt, typename OutputIt>
561  static int sort_by_scores(PdfIt pdf_first, PdfIt pdf_last, InputScoreIt scores_first, InputScoreIt scores_last,
562  OutputIt ranking_begin, OutputIt ranking_last)
563  {
564  const size_t pdf_size = pdf_last - pdf_first;
565  const size_t ranking_size = ranking_last - ranking_begin;
566 
567  if (pdf_size != ranking_size)
569 
570  // Initialize ranking with actions 0,1,2,3 ...
571  std::iota(ranking_begin, ranking_last, 0);
572 
573  // Pdf starts out in the same order as ranking. Ranking and pdf should been sorted
574  // in the order specified by scores.
576  using Iter = typename CP::Iter;
577  using Loc = typename CP::Loc;
578  const Iter begin_coll(ranking_begin, pdf_first);
579  const Iter end_coll(ranking_last, pdf_last);
580  std::sort(begin_coll, end_coll, [&scores_first](const Loc& l, const Loc& r) {
581  return scores_first[size_t(l._val1)] < scores_first[size_t(r._val1)];
582  });
583 
584  return S_EXPLORATION_OK;
585  }
586 
587  uint32_t feature_index_num_bits() { return _num_bits; }
588 };
589 } // namespace vw_slim
#define E_VW_PREDICT_ERR_CB_EXPLORATION_MISSING
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
v_array< namespace_index > indices
#define E_VW_PREDICT_ERR_NOT_A_CB_MODEL
#define RETURN_ON_FAIL(stmt)
static int sort_by_scores(PdfIt pdf_first, PdfIt pdf_last, InputScoreIt scores_first, InputScoreIt scores_last, OutputIt ranking_begin, OutputIt ranking_last)
int read_weights(std::unique_ptr< W > &weights, uint64_t weight_length)
Definition: model_parser.h:106
#define E_VW_PREDICT_ERR_INVALID_MODEL_CHECK_SUM
#define S_VW_PREDICT_OK
location_value(const location_reference< It1, It2 > &rhs)
bool is_cb_explore_adf()
True if the model describes a contextual bandit (cb) model using action dependent features (afd) ...
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
bool find_opt_int(std::string const &command_line_args, std::string arg_name, int &value)
Definition: opts.cc:78
Vowpal Wabbit slim predictor. Supports: regression, multi-class classification and contextual bandits...
friend void swap(const location_reference &a, const location_reference &b)
float feature_value
Definition: feature_group.h:20
uint32_t action
Definition: search.h:19
bool operator!=(const Iter &rhs) const
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
vw_predict_exploration
Exploration algorithm specified by the model.
typename std::iterator_traits< It2 >::value_type Val2
#define RETURN_EXPLORATION_ON_FAIL(stmt)
int predict(const char *event_id, example_predict &shared, example_predict *actions, size_t num_actions, std::vector< float > &pdf, std::vector< int > &ranking)
std::array< bool, NUM_NAMESPACES > _ignore_linear
bool is_csoaa_ldf()
True if the model describes a cost sensitive one-against-all (csoaa). This is also true for cb_explor...
size_t operator-(const Iter &rhs) const
std::array< features, NUM_NAMESPACES > feature_space
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int load(const char *model, size_t length)
Reads the Vowpal Wabbit model from the supplied buffer (produced using vw -f <modelname>) ...
int read(const char *field_name, size_t field_length, const char **ret)
Definition: model_parser.cc:19
void find_opt(std::string const &command_line_args, std::string arg_name, std::vector< std::string > &out_values)
Definition: opts.cc:9
location_reference(It1 first, It2 second)
constexpr uint64_t constant
Definition: constant.h:11
uint64_t feature_index
Definition: feature_group.h:21
#define E_VW_PREDICT_ERR_INVALID_MODEL
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
std::vector< std::string > _interactions
location_reference & operator=(const Val &rhs)
uint32_t feature_index_num_bits()
location_reference & operator=(const Ref &rhs)
bool operator==(typed_option< T > &lhs, typed_option< T > &rhs)
Definition: options.h:146
#define E_VW_PREDICT_ERR_NO_A_CSOAA_MODEL
#define E_VW_PREDICT_ERR_HASH_SEED_NOT_SUPPORTED
constexpr uint64_t a
Definition: rand48.cc:11
#define E_VW_PREDICT_ERR_NO_MODEL_LOADED
vw_predict_exploration _exploration
int skip(size_t bytes)
Definition: model_parser.cc:37
int predict(example_predict &shared, example_predict *actions, size_t num_actions, std::vector< float > &out_scores)
bool operator==(const Iter &rhs) const
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
std::unique_ptr< W > _weights
uint32_t num_weights(vw &all)
Definition: vw.h:187
constexpr unsigned char constant_namespace
Definition: constant.h:22
typename std::iterator_traits< It1 >::value_type Val1
int predict(example_predict &ex, float &score)
Predicts a score (as in regression) for the provided example.
#define S_EXPLORATION_OK
Definition: explore.h:3
uint64_t ceil_log_2(uint64_t v)
float f
Definition: cache.cc:40
int read_string(const char *field_name, std::string &s)
Definition: model_parser.h:39
#define E_VW_PREDICT_ERR_GD_RESUME_NOT_SUPPORTED
bool find_opt_float(std::string const &command_line_args, std::string arg_name, float &value)
Definition: opts.cc:73
#define E_EXPLORATION_PDF_RANKING_SIZE_MISMATCH
Definition: explore.h:5
std::string _command_line_arguments