Vowpal Wabbit
Public Member Functions | Static Public Member Functions | Private Attributes | List of all members
vw_slim::vw_predict< W > Class Template Reference

Vowpal Wabbit slim predictor. Supports: regression, multi-class classification and contextual bandits. More...

#include <vw_slim_predict.h>

Public Member Functions

 vw_predict ()
 
int load (const char *model, size_t length)
 Reads the Vowpal Wabbit model from the supplied buffer (produced using vw -f <modelname>) More...
 
bool is_cb_explore_adf ()
 True if the model describes a contextual bandit (cb) model using action dependent features (afd) More...
 
bool is_csoaa_ldf ()
 True if the model describes a cost sensitive one-against-all (csoaa). This is also true for cb_explore_adf models, as they are reduced to csoaa. More...
 
int predict (example_predict &ex, float &score)
 Predicts a score (as in regression) for the provided example. More...
 
int predict (example_predict &shared, example_predict *actions, size_t num_actions, std::vector< float > &out_scores)
 
int predict (const char *event_id, example_predict &shared, example_predict *actions, size_t num_actions, std::vector< float > &pdf, std::vector< int > &ranking)
 
uint32_t feature_index_num_bits ()
 

Static Public Member Functions

template<typename PdfIt , typename InputScoreIt , typename OutputIt >
static int sort_by_scores (PdfIt pdf_first, PdfIt pdf_last, InputScoreIt scores_first, InputScoreIt scores_last, OutputIt ranking_begin, OutputIt ranking_last)
 

Private Attributes

std::unique_ptr< W > _weights
 
std::string _id
 
std::string _version
 
std::string _command_line_arguments
 
std::vector< std::string > _interactions
 
std::array< bool, NUM_NAMESPACES_ignore_linear
 
bool _no_constant
 
vw_predict_exploration _exploration
 
float _minimum_epsilon
 
float _epsilon
 
float _lambda
 
int _bag_size
 
uint32_t _num_bits
 
uint32_t _stride_shift
 
bool _model_loaded
 

Detailed Description

template<typename W>
class vw_slim::vw_predict< W >

Vowpal Wabbit slim predictor. Supports: regression, multi-class classification and contextual bandits.

Definition at line 218 of file vw_slim_predict.h.

Constructor & Destructor Documentation

◆ vw_predict()

template<typename W>
vw_slim::vw_predict< W >::vw_predict ( )
inline

Definition at line 239 of file vw_slim_predict.h.

239 : _model_loaded(false) {}

Member Function Documentation

◆ feature_index_num_bits()

template<typename W>
uint32_t vw_slim::vw_predict< W >::feature_index_num_bits ( )
inline

Definition at line 587 of file vw_slim_predict.h.

Referenced by TEST().

587 { return _num_bits; }

◆ is_cb_explore_adf()

template<typename W>
bool vw_slim::vw_predict< W >::is_cb_explore_adf ( )
inline

True if the model describes a contextual bandit (cb) model using action dependent features (afd)

Returns
true True if contextual bandit predict method can be used.
false False if contextual bandit predict method cannot be used.

Definition at line 385 of file vw_slim_predict.h.

Referenced by run_predict_in_memory(), and TEST_P().

385 { return _command_line_arguments.find("--cb_explore_adf") != std::string::npos; }
std::string _command_line_arguments

◆ is_csoaa_ldf()

template<typename W>
bool vw_slim::vw_predict< W >::is_csoaa_ldf ( )
inline

True if the model describes a cost sensitive one-against-all (csoaa). This is also true for cb_explore_adf models, as they are reduced to csoaa.

Returns
true True if csoaa predict method can be used.
false False if csoaa predict method cannot be used.

Definition at line 394 of file vw_slim_predict.h.

394 { return _command_line_arguments.find("--csoaa_ldf") != std::string::npos; }
std::string _command_line_arguments

◆ load()

template<typename W>
int vw_slim::vw_predict< W >::load ( const char *  model,
size_t  length 
)
inline

Reads the Vowpal Wabbit model from the supplied buffer (produced using vw -f <modelname>)

Parameters
modelThe binary model.
lengthThe length of the binary model.
Returns
int Returns 0 (S_VW_PREDICT_OK) if succesful, otherwise one of the error codes (see E_VW_PREDICT_ERR_*).

Definition at line 248 of file vw_slim_predict.h.

References vw_slim::bag, vw_slim::ceil_log_2(), vw_slim::model_parser::checksum(), E_VW_PREDICT_ERR_CB_EXPLORATION_MISSING, E_VW_PREDICT_ERR_GD_RESUME_NOT_SUPPORTED, E_VW_PREDICT_ERR_HASH_SEED_NOT_SUPPORTED, E_VW_PREDICT_ERR_INVALID_MODEL, E_VW_PREDICT_ERR_INVALID_MODEL_CHECK_SUM, vw_slim::epsilon_greedy, vw_slim::find_opt(), vw_slim::find_opt_float(), vw_slim::find_opt_int(), VW::num_weights(), vw_slim::model_parser::read(), vw_slim::model_parser::read_string(), vw_slim::model_parser::read_weights(), RETURN_ON_FAIL, S_VW_PREDICT_OK, vw_slim::model_parser::skip(), and vw_slim::softmax.

Referenced by cb_data_epsilon_0_skype_jb_test_runner(), run_predict_in_memory(), TEST(), TEST_P(), and TYPED_TEST_P().

249  {
250  if (!model || length == 0)
252 
253  _model_loaded = false;
254 
255  // required for inline_predict
256  _ignore_linear.fill(false);
257 
258  model_parser mp(model, length);
259 
260  // parser_regressor.cc: save_load_header
261  RETURN_ON_FAIL(mp.read_string<false>("version", _version));
262 
263  // read model id
264  RETURN_ON_FAIL(mp.read_string<true>("model_id", _id));
265 
266  RETURN_ON_FAIL(mp.skip(sizeof(char))); // "model character"
267  RETURN_ON_FAIL(mp.skip(sizeof(float))); // "min_label"
268  RETURN_ON_FAIL(mp.skip(sizeof(float))); // "max_label"
269 
270  RETURN_ON_FAIL(mp.read("num_bits", _num_bits));
271 
272  RETURN_ON_FAIL(mp.skip(sizeof(uint32_t))); // "lda"
273 
274  uint32_t ngram_len;
275  RETURN_ON_FAIL(mp.read("ngram_len", ngram_len));
276  mp.skip(3 * ngram_len);
277 
278  uint32_t skips_len;
279  RETURN_ON_FAIL(mp.read("skips_len", skips_len));
280  mp.skip(3 * skips_len);
281 
282  RETURN_ON_FAIL(mp.read_string<true>("file_options", _command_line_arguments));
283 
284  // command line arg parsing
285  _no_constant = _command_line_arguments.find("--noconstant") != std::string::npos;
286 
287  // only 0-valued hash_seed supported
288  int hash_seed;
289  if (find_opt_int(_command_line_arguments, "--hash_seed", hash_seed) && hash_seed)
291 
292  _interactions.clear();
296  find_opt(_command_line_arguments, "--interactions", _interactions);
297 
298  // VW performs the following transformation as a side-effect of looking for duplicates.
299  // This affects how interaction hashes are generated.
300  std::vector<std::string> vec_sorted;
301  for (const std::string& interaction : _interactions)
302  {
303  std::string sorted_i(interaction);
304  std::sort(std::begin(sorted_i), std::end(sorted_i));
305  vec_sorted.push_back(sorted_i);
306  }
307  _interactions = vec_sorted;
308 
309  // TODO: take --cb_type dr into account
310  uint64_t num_weights = 0;
311 
312  if (_command_line_arguments.find("--cb_explore_adf") != std::string::npos)
313  {
314  // parse exploration options
316  {
318  num_weights = _bag_size;
319 
320  // check for additional minimum epsilon greedy
321  _minimum_epsilon = 0.f;
323  }
324  else if (_command_line_arguments.find("--softmax") != std::string::npos)
325  {
327  {
328  if (_lambda > 0) // Lambda should always be negative because we are using a cost basis.
329  _lambda = -_lambda;
331  }
332  }
333  else if (find_opt_float(_command_line_arguments, "--epsilon", _epsilon))
335  else
337  }
338 
339  // VW style check_sum validation
340  uint32_t check_sum_computed = mp.checksum();
341 
342  // perform check sum check
343  uint32_t check_sum_len;
344  RETURN_ON_FAIL((mp.read<uint32_t, false>("check_sum_len", check_sum_len)));
345  if (check_sum_len != sizeof(uint32_t))
347 
348  uint32_t check_sum;
349  RETURN_ON_FAIL((mp.read<uint32_t, false>("check_sum", check_sum)));
350 
351  if (check_sum_computed != check_sum)
353 
354  if (_command_line_arguments.find("--cb_adf") != std::string::npos)
355  {
356  RETURN_ON_FAIL(mp.skip(sizeof(uint64_t))); // cb_adf.cc: event_sum
357  RETURN_ON_FAIL(mp.skip(sizeof(uint64_t))); // cb_adf.cc: action_sum
358  }
359 
360  // gd.cc: save_load
361  bool gd_resume;
362  RETURN_ON_FAIL(mp.read("resume", gd_resume));
363  if (gd_resume)
365 
366  // read sparse weights into dense
367  uint64_t weight_length = (uint64_t)1 << _num_bits;
368  _stride_shift = (uint32_t)ceil_log_2(num_weights);
369 
370  RETURN_ON_FAIL(mp.read_weights<W>(_weights, _num_bits, _stride_shift));
371 
372  // TODO: check that permutations is not enabled (or parse it)
373 
374  _model_loaded = true;
375 
376  return S_VW_PREDICT_OK;
377  }
#define E_VW_PREDICT_ERR_CB_EXPLORATION_MISSING
#define RETURN_ON_FAIL(stmt)
#define E_VW_PREDICT_ERR_INVALID_MODEL_CHECK_SUM
#define S_VW_PREDICT_OK
bool find_opt_int(std::string const &command_line_args, std::string arg_name, int &value)
Definition: opts.cc:78
std::array< bool, NUM_NAMESPACES > _ignore_linear
void find_opt(std::string const &command_line_args, std::string arg_name, std::vector< std::string > &out_values)
Definition: opts.cc:9
#define E_VW_PREDICT_ERR_INVALID_MODEL
std::vector< std::string > _interactions
#define E_VW_PREDICT_ERR_HASH_SEED_NOT_SUPPORTED
vw_predict_exploration _exploration
std::unique_ptr< W > _weights
uint32_t num_weights(vw &all)
Definition: vw.h:187
uint64_t ceil_log_2(uint64_t v)
#define E_VW_PREDICT_ERR_GD_RESUME_NOT_SUPPORTED
bool find_opt_float(std::string const &command_line_args, std::string arg_name, float &value)
Definition: opts.cc:73
std::string _command_line_arguments

◆ predict() [1/3]

template<typename W>
int vw_slim::vw_predict< W >::predict ( example_predict ex,
float &  score 
)
inline

Predicts a score (as in regression) for the provided example.

Regular regression with support for constant feature (bias term) and interactions

Parameters
exThe example to get the prediction for.
scoreThe output score produced by the model.
Returns
int Returns 0 (S_VW_PREDICT_OK) if succesful, otherwise one of the error codes (see E_VW_PREDICT_ERR_*).

Definition at line 405 of file vw_slim_predict.h.

References constant, constant_namespace, E_VW_PREDICT_ERR_NO_MODEL_LOADED, f, example_predict::ft_offset, and S_VW_PREDICT_OK.

Referenced by cb_data_epsilon_0_skype_jb_test_runner(), run_predict_in_memory(), TEST(), TEST_P(), and TYPED_TEST_P().

406  {
407  if (!_model_loaded)
409 
410  std::unique_ptr<namespace_copy_guard> ns_copy_guard;
411 
412  if (!_no_constant)
413  {
414  // add constant feature
415  ns_copy_guard = std::unique_ptr<namespace_copy_guard>(new namespace_copy_guard(ex, constant_namespace));
416  ns_copy_guard->feature_push_back(1.f, (constant << _stride_shift) + ex.ft_offset);
417  }
418 
419  score = GD::inline_predict<W>(*_weights, false, _ignore_linear, _interactions, /* permutations */ false, ex);
420 
421  return S_VW_PREDICT_OK;
422  }
#define S_VW_PREDICT_OK
std::array< bool, NUM_NAMESPACES > _ignore_linear
constexpr uint64_t constant
Definition: constant.h:11
std::vector< std::string > _interactions
#define E_VW_PREDICT_ERR_NO_MODEL_LOADED
std::unique_ptr< W > _weights
constexpr unsigned char constant_namespace
Definition: constant.h:22
float f
Definition: cache.cc:40

◆ predict() [2/3]

template<typename W>
int vw_slim::vw_predict< W >::predict ( example_predict shared,
example_predict actions,
size_t  num_actions,
std::vector< float > &  out_scores 
)
inline

Definition at line 425 of file vw_slim_predict.h.

References E_VW_PREDICT_ERR_NO_A_CSOAA_MODEL, E_VW_PREDICT_ERR_NO_MODEL_LOADED, example_predict::feature_space, example_predict::indices, predict(), RETURN_ON_FAIL, and S_VW_PREDICT_OK.

426  {
427  if (!_model_loaded)
429 
430  if (!is_csoaa_ldf())
432 
433  out_scores.resize(num_actions);
434 
435  example_predict* action = actions;
436  for (size_t i = 0; i < num_actions; i++, action++)
437  {
438  std::vector<std::unique_ptr<namespace_copy_guard>> ns_copy_guards;
439 
440  // shared feature copying
441  for (auto ns : shared.indices)
442  {
443  // insert namespace
444  auto ns_copy_guard = std::unique_ptr<namespace_copy_guard>(new namespace_copy_guard(*action, ns));
445 
446  // copy features
447  for (auto fs : shared.feature_space[ns]) ns_copy_guard->feature_push_back(fs.value(), fs.index());
448 
449  // keep guard around
450  ns_copy_guards.push_back(std::move(ns_copy_guard));
451  }
452 
453  RETURN_ON_FAIL(predict(*action, out_scores[i]));
454  }
455 
456  return S_VW_PREDICT_OK;
457  }
v_array< namespace_index > indices
#define RETURN_ON_FAIL(stmt)
#define S_VW_PREDICT_OK
uint32_t action
Definition: search.h:19
bool is_csoaa_ldf()
True if the model describes a cost sensitive one-against-all (csoaa). This is also true for cb_explor...
std::array< features, NUM_NAMESPACES > feature_space
#define E_VW_PREDICT_ERR_NO_A_CSOAA_MODEL
#define E_VW_PREDICT_ERR_NO_MODEL_LOADED
int predict(example_predict &ex, float &score)
Predicts a score (as in regression) for the provided example.

◆ predict() [3/3]

template<typename W>
int vw_slim::vw_predict< W >::predict ( const char *  event_id,
example_predict shared,
example_predict actions,
size_t  num_actions,
std::vector< float > &  pdf,
std::vector< int > &  ranking 
)
inline

Definition at line 459 of file vw_slim_predict.h.

References vw_slim::bag, E_VW_PREDICT_ERR_NO_MODEL_LOADED, E_VW_PREDICT_ERR_NOT_A_CB_MODEL, exploration::enforce_minimum_probability(), vw_slim::epsilon_greedy, exploration::generate_bag(), exploration::generate_epsilon_greedy(), exploration::generate_softmax(), predict(), RETURN_EXPLORATION_ON_FAIL, RETURN_ON_FAIL, S_VW_PREDICT_OK, exploration::sample_after_normalizing(), and vw_slim::softmax.

461  {
462  if (!_model_loaded)
464 
465  if (!is_cb_explore_adf())
467 
468  std::vector<float> scores;
469 
470  // add exploration
471  pdf.resize(num_actions);
472  ranking.resize(num_actions);
473 
474  switch (_exploration)
475  {
477  {
478  // get the prediction
479  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
480 
481  // generate exploration distribution
482  // model is trained against cost -> minimum is better
483  auto top_action_iterator = std::min_element(std::begin(scores), std::end(scores));
484  uint32_t top_action = (uint32_t)(top_action_iterator - std::begin(scores));
485 
487  exploration::generate_epsilon_greedy(_epsilon, top_action, std::begin(pdf), std::end(pdf)));
488  break;
489  }
491  {
492  // get the prediction
493  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
494 
495  // generate exploration distribution
497  _lambda, std::begin(scores), std::end(scores), std::begin(pdf), std::end(pdf)));
498  break;
499  }
501  {
502  std::vector<uint32_t> top_actions(num_actions);
503 
504  // apply stride shifts
505  std::vector<std::unique_ptr<stride_shift_guard>> stride_shift_guards;
506  stride_shift_guards.push_back(
507  std::unique_ptr<stride_shift_guard>(new stride_shift_guard(shared, _stride_shift)));
508  example_predict* actions_end = actions + num_actions;
509  for (example_predict* action = actions; action != actions_end; ++action)
510  stride_shift_guards.push_back(
511  std::unique_ptr<stride_shift_guard>(new stride_shift_guard(*action, _stride_shift)));
512 
513  for (size_t i = 0; i < _bag_size; i++)
514  {
515  std::vector<std::unique_ptr<feature_offset_guard>> feature_offset_guards;
516  for (example_predict* action = actions; action != actions_end; ++action)
517  feature_offset_guards.push_back(
518  std::unique_ptr<feature_offset_guard>(new feature_offset_guard(*action, i)));
519 
520  RETURN_ON_FAIL(predict(shared, actions, num_actions, scores));
521 
522  auto top_action_iterator = std::min_element(std::begin(scores), std::end(scores));
523  uint32_t top_action = (uint32_t)(top_action_iterator - std::begin(scores));
524 
525  top_actions[top_action]++;
526  }
527 
528  // generate exploration distribution
530  exploration::generate_bag(std::begin(top_actions), std::end(top_actions), std::begin(pdf), std::end(pdf)));
531 
532  if (_minimum_epsilon > 0)
534  exploration::enforce_minimum_probability(_minimum_epsilon, true, std::begin(pdf), std::end(pdf)));
535 
536  break;
537  }
538  default:
540  }
541 
543  std::begin(pdf), std::end(pdf), std::begin(scores), std::end(scores), std::begin(ranking), std::end(ranking)));
544 
545  // Sample from the pdf
546  uint32_t chosen_action_idx;
548  exploration::sample_after_normalizing(event_id, std::begin(pdf), std::end(pdf), chosen_action_idx));
549 
550  // Swap top element with chosen one (unless chosen is the top)
551  if (chosen_action_idx != 0)
552  {
553  std::iter_swap(std::begin(ranking), std::begin(ranking) + chosen_action_idx);
554  std::iter_swap(std::begin(pdf), std::begin(pdf) + chosen_action_idx);
555  }
556 
557  return S_VW_PREDICT_OK;
558  }
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
#define E_VW_PREDICT_ERR_NOT_A_CB_MODEL
#define RETURN_ON_FAIL(stmt)
static int sort_by_scores(PdfIt pdf_first, PdfIt pdf_last, InputScoreIt scores_first, InputScoreIt scores_last, OutputIt ranking_begin, OutputIt ranking_last)
#define S_VW_PREDICT_OK
bool is_cb_explore_adf()
True if the model describes a contextual bandit (cb) model using action dependent features (afd) ...
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
uint32_t action
Definition: search.h:19
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
#define RETURN_EXPLORATION_ON_FAIL(stmt)
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
#define E_VW_PREDICT_ERR_NO_MODEL_LOADED
vw_predict_exploration _exploration
int predict(example_predict &ex, float &score)
Predicts a score (as in regression) for the provided example.

◆ sort_by_scores()

template<typename W>
template<typename PdfIt , typename InputScoreIt , typename OutputIt >
static int vw_slim::vw_predict< W >::sort_by_scores ( PdfIt  pdf_first,
PdfIt  pdf_last,
InputScoreIt  scores_first,
InputScoreIt  scores_last,
OutputIt  ranking_begin,
OutputIt  ranking_last 
)
inlinestatic

Definition at line 561 of file vw_slim_predict.h.

References E_EXPLORATION_PDF_RANKING_SIZE_MISMATCH, and S_EXPLORATION_OK.

563  {
564  const size_t pdf_size = pdf_last - pdf_first;
565  const size_t ranking_size = ranking_last - ranking_begin;
566 
567  if (pdf_size != ranking_size)
569 
570  // Initialize ranking with actions 0,1,2,3 ...
571  std::iota(ranking_begin, ranking_last, 0);
572 
573  // Pdf starts out in the same order as ranking. Ranking and pdf should been sorted
574  // in the order specified by scores.
575  using CP = internal::collection_pair_iterator<OutputIt, PdfIt>;
576  using Iter = typename CP::Iter;
577  using Loc = typename CP::Loc;
578  const Iter begin_coll(ranking_begin, pdf_first);
579  const Iter end_coll(ranking_last, pdf_last);
580  std::sort(begin_coll, end_coll, [&scores_first](const Loc& l, const Loc& r) {
581  return scores_first[size_t(l._val1)] < scores_first[size_t(r._val1)];
582  });
583 
584  return S_EXPLORATION_OK;
585  }
#define S_EXPLORATION_OK
Definition: explore.h:3
#define E_EXPLORATION_PDF_RANKING_SIZE_MISMATCH
Definition: explore.h:5

Member Data Documentation

◆ _bag_size

template<typename W>
int vw_slim::vw_predict< W >::_bag_size
private

Definition at line 232 of file vw_slim_predict.h.

◆ _command_line_arguments

template<typename W>
std::string vw_slim::vw_predict< W >::_command_line_arguments
private

Definition at line 223 of file vw_slim_predict.h.

◆ _epsilon

template<typename W>
float vw_slim::vw_predict< W >::_epsilon
private

Definition at line 230 of file vw_slim_predict.h.

◆ _exploration

template<typename W>
vw_predict_exploration vw_slim::vw_predict< W >::_exploration
private

Definition at line 228 of file vw_slim_predict.h.

◆ _id

template<typename W>
std::string vw_slim::vw_predict< W >::_id
private

Definition at line 221 of file vw_slim_predict.h.

◆ _ignore_linear

template<typename W>
std::array<bool, NUM_NAMESPACES> vw_slim::vw_predict< W >::_ignore_linear
private

Definition at line 225 of file vw_slim_predict.h.

◆ _interactions

template<typename W>
std::vector<std::string> vw_slim::vw_predict< W >::_interactions
private

Definition at line 224 of file vw_slim_predict.h.

◆ _lambda

template<typename W>
float vw_slim::vw_predict< W >::_lambda
private

Definition at line 231 of file vw_slim_predict.h.

◆ _minimum_epsilon

template<typename W>
float vw_slim::vw_predict< W >::_minimum_epsilon
private

Definition at line 229 of file vw_slim_predict.h.

◆ _model_loaded

template<typename W>
bool vw_slim::vw_predict< W >::_model_loaded
private

Definition at line 236 of file vw_slim_predict.h.

◆ _no_constant

template<typename W>
bool vw_slim::vw_predict< W >::_no_constant
private

Definition at line 226 of file vw_slim_predict.h.

◆ _num_bits

template<typename W>
uint32_t vw_slim::vw_predict< W >::_num_bits
private

Definition at line 233 of file vw_slim_predict.h.

◆ _stride_shift

template<typename W>
uint32_t vw_slim::vw_predict< W >::_stride_shift
private

Definition at line 235 of file vw_slim_predict.h.

◆ _version

template<typename W>
std::string vw_slim::vw_predict< W >::_version
private

Definition at line 222 of file vw_slim_predict.h.

◆ _weights

template<typename W>
std::unique_ptr<W> vw_slim::vw_predict< W >::_weights
private

Definition at line 220 of file vw_slim_predict.h.


The documentation for this class was generated from the following file: