Vowpal Wabbit
global_data.h
Go to the documentation of this file.
1 
2 /*
3 Copyright (c) by respective owners including Yahoo!, Microsoft, and
4 individual contributors. All rights reserved. Released under a BSD
5 license as described in the file LICENSE.
6  */
7 #pragma once
8 #include <iostream>
9 #include <iomanip>
10 #include <vector>
11 #include <map>
12 #include <cfloat>
13 #include <stdint.h>
14 #include <cstdio>
15 #include <inttypes.h>
16 #include <climits>
17 #include <stack>
18 #include <array>
19 
20 // Thread cannot be used in managed C++, tell the compiler that this is unmanaged even if included in a managed project.
21 #ifdef _M_CEE
22 #pragma managed(push, off)
23 #undef _M_CEE
24 #include <thread>
25 #define _M_CEE 001
26 #pragma managed(pop)
27 #else
28 #include <thread>
29 #endif
30 
31 #include "v_array.h"
32 #include "array_parameters.h"
33 #include "parse_primitives.h"
34 #include "loss_functions.h"
35 #include "comp_io.h"
36 #include "example.h"
37 #include "config.h"
38 #include "learner.h"
39 #include "v_hashmap.h"
40 #include <time.h>
41 #include "hash.h"
42 #include "crossplat_compat.h"
43 #include "error_reporting.h"
44 #include "constant.h"
45 #include "rand48.h"
46 
47 #include "options.h"
48 #include "version.h"
49 #include <memory>
50 
51 typedef float weight;
52 
54 
56 {
57  char* name;
58  uint64_t file_hash;
60 };
61 
62 inline void deleter(substring ss, uint64_t /* label */) { free_it(ss.begin); }
63 
65 {
66  private:
67  std::vector<substring> id2name;
69  uint32_t K;
70 
71  public:
72  namedlabels(std::string label_list)
73  {
74  char* temp = calloc_or_throw<char>(1 + label_list.length());
75  memcpy(temp, label_list.c_str(), strlen(label_list.c_str()));
76  substring ss = {temp, nullptr};
77  ss.end = ss.begin + label_list.length();
78  tokenize(',', ss, id2name);
79 
80  K = (uint32_t)id2name.size();
81  name2id.delete_v(); // delete automatically allocated vector.
82  name2id.init(4 * K + 1, 0, substring_equal);
83  for (size_t k = 0; k < K; k++)
84  {
85  substring& l = id2name[k];
86  uint64_t hash = uniform_hash((unsigned char*)l.begin, l.end - l.begin, 378401);
87  uint64_t id = name2id.get(l, hash);
88  if (id != 0) // TODO: memory leak: char* temp
89  THROW("error: label dictionary initialized with multiple occurances of: " << l);
90  size_t len = l.end - l.begin;
91  substring l_copy = {calloc_or_throw<char>(len), nullptr};
92  memcpy(l_copy.begin, l.begin, len * sizeof(char));
93  l_copy.end = l_copy.begin + len;
94  name2id.put(l_copy, hash, k + 1);
95  }
96  }
97 
99  {
100  if (id2name.size() > 0)
101  free(id2name[0].begin);
102  name2id.iter(deleter);
103  name2id.delete_v();
104  }
105 
106  uint32_t getK() { return K; }
107 
108  uint64_t get(substring& s)
109  {
110  uint64_t hash = uniform_hash((unsigned char*)s.begin, s.end - s.begin, 378401);
111  uint64_t v = name2id.get(s, hash);
112  if (v == 0)
113  {
114  std::cerr << "warning: missing named label '";
115  for (char* c = s.begin; c != s.end; c++) std::cerr << *c;
116  std::cerr << '\'' << std::endl;
117  }
118  return v;
119  }
120 
121  substring get(uint32_t v)
122  {
123  if ((v == 0) || (v > K))
124  {
125  substring ss = {nullptr, nullptr};
126  return ss;
127  }
128  else
129  return id2name[v - 1];
130  }
131 };
132 
134 {
135  size_t queries;
136 
137  uint64_t example_number;
138  uint64_t total_features;
139 
140  double t;
145  double sum_loss;
147  float dump_interval; // when should I update for the user.
148  double gravity;
149  double contraction;
150  float min_label; // minimum label encountered
151  float max_label; // maximum label encountered
152 
154 
155  // for holdout
160  // for best model selection
162  double weighted_holdout_examples_since_last_pass; // reserved for best predictor selection
165  // for --probabilities
169 
173 
174  // Column width, precision constants:
175  static constexpr int col_avg_loss = 8;
176  static constexpr int prec_avg_loss = 6;
177  static constexpr int col_since_last = 8;
178  static constexpr int prec_since_last = 6;
179  static constexpr int col_example_counter = 12;
180  static constexpr int col_example_weight = col_example_counter + 2;
181  static constexpr int prec_example_weight = 1;
182  static constexpr int col_current_label = 8;
183  static constexpr int prec_current_label = 4;
184  static constexpr int col_current_predict = 8;
185  static constexpr int prec_current_predict = 4;
186  static constexpr int col_current_features = 8;
187 
188  double weighted_examples() { return weighted_labeled_examples + weighted_unlabeled_examples; }
189 
190  void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
191  {
192  t += weight;
193  if (test_example && labeled_example)
194  {
195  weighted_holdout_examples += weight; // test weight seen
196  weighted_holdout_examples_since_last_dump += weight;
197  weighted_holdout_examples_since_last_pass += weight;
198  holdout_sum_loss += loss;
199  holdout_sum_loss_since_last_dump += loss;
200  holdout_sum_loss_since_last_pass += loss; // since last pass
201  }
202  else
203  {
204  if (labeled_example)
205  weighted_labeled_examples += weight;
206  else
207  weighted_unlabeled_examples += weight;
208  sum_loss += loss;
209  sum_loss_since_last_dump += loss;
210  total_features += num_features;
211  example_number++;
212  }
213  }
214 
215  inline void update_dump_interval(bool progress_add, float progress_arg)
216  {
217  sum_loss_since_last_dump = 0.0;
218  old_weighted_labeled_examples = weighted_labeled_examples;
219  if (progress_add)
220  dump_interval = (float)weighted_examples() + progress_arg;
221  else
222  dump_interval = (float)weighted_examples() * progress_arg;
223  }
224 
225  void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features,
226  bool progress_add, float progress_arg)
227  {
228  std::ostringstream label_buf, pred_buf;
229 
230  label_buf << std::setw(col_current_label) << std::setfill(' ');
231  if (label < FLT_MAX)
232  label_buf << std::setprecision(prec_current_label) << std::fixed << std::right << label;
233  else
234  label_buf << std::left << " unknown";
235 
236  pred_buf << std::setw(col_current_predict) << std::setprecision(prec_current_predict) << std::fixed << std::right
237  << std::setfill(' ') << prediction;
238 
239  print_update(
240  holdout_set_off, current_pass, label_buf.str(), pred_buf.str(), num_features, progress_add, progress_arg);
241  }
242 
243  void print_update(bool holdout_set_off, size_t current_pass, uint32_t label, uint32_t prediction, size_t num_features,
244  bool progress_add, float progress_arg)
245  {
246  std::ostringstream label_buf, pred_buf;
247 
248  label_buf << std::setw(col_current_label) << std::setfill(' ');
249  if (label < INT_MAX)
250  label_buf << std::right << label;
251  else
252  label_buf << std::left << " unknown";
253 
254  pred_buf << std::setw(col_current_predict) << std::right << std::setfill(' ') << prediction;
255 
256  print_update(
257  holdout_set_off, current_pass, label_buf.str(), pred_buf.str(), num_features, progress_add, progress_arg);
258  }
259 
260  void print_update(bool holdout_set_off, size_t current_pass, const std::string& label, uint32_t prediction,
261  size_t num_features, bool progress_add, float progress_arg)
262  {
263  std::ostringstream pred_buf;
264 
265  pred_buf << std::setw(col_current_predict) << std::right << std::setfill(' ') << prediction;
266 
267  print_update(holdout_set_off, current_pass, label, pred_buf.str(), num_features, progress_add, progress_arg);
268  }
269 
270  void print_update(bool holdout_set_off, size_t current_pass, const std::string& label, const std::string& prediction,
271  size_t num_features, bool progress_add, float progress_arg)
272  {
273  std::streamsize saved_w = std::cerr.width();
274  std::streamsize saved_prec = std::cerr.precision();
275  std::ostream::fmtflags saved_f = std::cerr.flags();
276  bool holding_out = false;
277 
278  if (!holdout_set_off && current_pass >= 1)
279  {
280  if (holdout_sum_loss == 0. && weighted_holdout_examples == 0.)
281  std::cerr << std::setw(col_avg_loss) << std::left << " unknown";
282  else
283  std::cerr << std::setw(col_avg_loss) << std::setprecision(prec_avg_loss) << std::fixed << std::right
284  << (holdout_sum_loss / weighted_holdout_examples);
285 
286  std::cerr << " ";
287 
288  if (holdout_sum_loss_since_last_dump == 0. && weighted_holdout_examples_since_last_dump == 0.)
289  std::cerr << std::setw(col_since_last) << std::left << " unknown";
290  else
291  std::cerr << std::setw(col_since_last) << std::setprecision(prec_since_last) << std::fixed << std::right
292  << (holdout_sum_loss_since_last_dump / weighted_holdout_examples_since_last_dump);
293 
294  weighted_holdout_examples_since_last_dump = 0;
295  holdout_sum_loss_since_last_dump = 0.0;
296 
297  holding_out = true;
298  }
299  else
300  {
301  std::cerr << std::setw(col_avg_loss) << std::setprecision(prec_avg_loss) << std::right << std::fixed;
302  if (weighted_labeled_examples > 0.)
303  std::cerr << (sum_loss / weighted_labeled_examples);
304  else
305  std::cerr << "n.a.";
306  std::cerr << " " << std::setw(col_since_last) << std::setprecision(prec_avg_loss) << std::right << std::fixed;
307  if (weighted_labeled_examples == old_weighted_labeled_examples)
308  std::cerr << "n.a.";
309  else
310  std::cerr << (sum_loss_since_last_dump / (weighted_labeled_examples - old_weighted_labeled_examples));
311  }
312  std::cerr << " " << std::setw(col_example_counter) << std::right << example_number << " "
313  << std::setw(col_example_weight) << std::setprecision(prec_example_weight) << std::right
314  << weighted_examples() << " " << std::setw(col_current_label) << std::right << label << " "
315  << std::setw(col_current_predict) << std::right << prediction << " " << std::setw(col_current_features)
316  << std::right << num_features;
317 
318  if (holding_out)
319  std::cerr << " h";
320 
321  std::cerr << std::endl;
322  std::cerr.flush();
323 
324  std::cerr.width(saved_w);
325  std::cerr.precision(saved_prec);
326  std::cerr.setf(saved_f);
327 
328  update_dump_interval(progress_add, progress_arg);
329  }
330 };
331 
333 {
336 };
337 
338 class AllReduce;
339 
340 // avoid name clash
341 namespace label_type
342 {
344 {
346  cb, // contextual-bandit
347  cb_eval, // contextual-bandit evaluation
348  cs, // cost-sensitive
350  mc,
351  ccb // conditional contextual-bandit
352 };
353 }
354 
356 {
357  private:
358  uint64_t random_state;
359 
360  public:
361  constexpr rand_state() : random_state(0) {}
362  rand_state(uint64_t initial) : random_state(initial) {}
363  constexpr uint64_t get_current_state() const noexcept { return random_state; }
364  float get_and_update_random() { return merand48(random_state); }
365  float get_random() const { return merand48_noadvance(random_state); }
366  void set_random_state(uint64_t initial) noexcept { random_state = initial; }
367 };
368 
369 struct vw
370 {
371  private:
372  std::shared_ptr<rand_state> _random_state_sp = std::make_shared<rand_state>(); // per instance random_state
373 
374  public:
376 
378  std::thread parse_thread;
379 
382 
383  LEARNER::base_learner* l; // the top level learner
384  LEARNER::single_learner* scorer; // a scoring function
385  LEARNER::base_learner* cost_sensitive; // a cost sensitive learning algorithm. can be single or multi line learner
386 
387  void learn(example&);
388  void learn(multi_ex&);
389  void predict(example&);
390  void predict(multi_ex&);
391  void finish_example(example&);
392  void finish_example(multi_ex&);
393 
394  void (*set_minmax)(shared_data* sd, float label);
395 
396  uint64_t current_pass;
397 
398  uint32_t num_bits; // log_2 of the number of features.
400 
401  uint32_t hash_seed;
402 
403  std::string data_filename; // was vm["data"]
404 
405  bool daemon;
406  size_t num_children;
407 
411 
412  bool bfgs;
414 
417  std::string id;
418 
421  bool vw_is_main = false; // true if vw is executable; false in library mode
422 
423  // error reporting
425 
426  // Flag used when VW internally manages lifetime of options object.
427  bool should_delete_options = false;
429 
430  void* /*Search::search*/ searchstr;
431 
432  uint32_t wpp;
433 
435 
436  std::vector<std::string> initial_regressors;
437 
438  std::string feature_mask;
439 
443 
444  float l1_lambda; // the level of l_1 regularization to impose.
445  float l2_lambda; // the level of l_2 regularization to impose.
446  bool no_bias; // no bias in regularization
447  float power_t; // the power on learning rate decay.
448  int reg_mode;
449 
450  size_t pass_length;
451  size_t numpasses;
453  uint64_t parse_mask; // 1 << num_bits -1
454  bool permutations; // if true - permutations of features generated instead of simple combinations. false by default
455 
456  // Referenced by examples as their set of interactions. Can be overriden by reductions.
457  std::vector<std::string> interactions;
458  // TODO #1863 deprecate in favor of only interactions field.
459  std::vector<std::string> pairs; // pairs of features to cross.
460  // TODO #1863 deprecate in favor of only interactions field.
461  std::vector<std::string> triples; // triples of features to cross.
463  std::array<bool, NUM_NAMESPACES> ignore; // a set of namespaces to ignore
465  std::array<bool, NUM_NAMESPACES> ignore_linear; // a set of namespaces to ignore for linear
466 
467  bool redefine_some; // --redefine param was used
468  std::array<unsigned char, NUM_NAMESPACES> redefine; // keeps new chars for namespaces
469  std::vector<std::string> ngram_strings;
470  std::vector<std::string> skip_strings;
471  std::array<uint32_t, NUM_NAMESPACES> ngram; // ngrams to generate.
472  std::array<uint32_t, NUM_NAMESPACES> skips; // skips in ngrams.
473  std::vector<std::string> limit_strings; // descriptor of feature limits
474  std::array<uint32_t, NUM_NAMESPACES> limit; // count to limit features by
475  std::array<uint64_t, NUM_NAMESPACES>
476  affix_features; // affixes to generate (up to 16 per namespace - 4 bits per affix)
477  std::array<bool, NUM_NAMESPACES> spelling_features; // generate spelling features for which namespace
478  std::vector<std::string> dictionary_path; // where to look for dictionaries
479 
480  // This array is required to be value initialized so that the std::vectors are constructed.
481  std::array<std::vector<feature_dict*>, NUM_NAMESPACES>
482  namespace_dictionaries{}; // each namespace has a list of dictionaries attached to it
483  std::vector<dictionary_info> loaded_dictionaries; // which dictionaries have we loaded from a file to memory?
484 
485  void (*delete_prediction)(void*);
486  bool audit; // should I print lots of debugging information?
487  bool quiet; // Should I suppress progress-printing of updates?
488  bool training; // Should I train if lable data is available?
489  bool active;
490  bool invariant_updates; // Should we use importance aware/safe updates
491  uint64_t random_seed;
493  bool random_positive_weights; // for initialize_regressor w/ new_mf
501  uint32_t holdout_period;
502  uint32_t holdout_after;
503  size_t check_holdout_every_n_passes; // default: 1, but search might want to set it higher if you spend multiple
504  // passes learning a single policy
505 
506  size_t normalized_idx; // offset idx where the norm is stored (1 or 2 depending on whether adaptive is true)
507 
508  uint32_t lda;
509 
510  std::string text_regressor_name;
512 
513  size_t length() { return ((size_t)1) << num_bits; };
514 
515  std::stack<LEARNER::base_learner* (*)(VW::config::options_i&, vw&)> reduction_stack;
516 
517  // Prediction output
518  v_array<int> final_prediction_sink; // set to send global predictions to.
519  int raw_prediction; // file descriptors for text output.
520 
521  void (*print)(int, float, float, v_array<char>);
522  void (*print_text)(int, std::string, v_array<char>);
524 
526 
527  bool stdin_off;
528 
529  // runtime accounting variables.
530  float initial_t;
531  float eta; // learning rate control.
533  time_t init_time;
534 
535  std::string final_regressor_name;
536 
538 
539  size_t max_examples; // for TLC
540 
541  bool hash_inv;
543 
544  // Set by --progress <arg>
545  bool progress_add; // additive (rather than multiplicative) progress dumps
546  float progress_arg; // next update progress dump multiplier
547 
548  std::map<std::string, size_t> name_index_map;
549 
551 
552  vw();
553  std::shared_ptr<rand_state> get_random_state() { return _random_state_sp; }
554 
555  vw(const vw&) = delete;
556  vw& operator=(const vw&) = delete;
557 
558  // vw object cannot be moved as many objects hold a pointer to it.
559  // That pointer would be invalidated if it were to be moved.
560  vw(const vw&&) = delete;
561  vw& operator=(const vw&&) = delete;
562 };
563 
564 void print_result(int f, float res, float weight, v_array<char> tag);
565 void binary_print_result(int f, float res, float weight, v_array<char> tag);
566 void noop_mm(shared_data*, float label);
567 void get_prediction(int sock, float& res, float& weight);
568 void compile_gram(
569  std::vector<std::string> grams, std::array<uint32_t, NUM_NAMESPACES>& dest, char* descriptor, bool quiet);
570 void compile_limits(std::vector<std::string> limits, std::array<uint32_t, NUM_NAMESPACES>& dest, bool quiet);
571 int print_tag(std::stringstream& ss, v_array<char> tag);
bool report_multiclass_log_loss
Definition: global_data.h:166
double sum_loss
Definition: global_data.h:145
std::vector< std::string > skip_strings
Definition: global_data.h:470
uint32_t holdout_after
Definition: global_data.h:502
void print_update(bool holdout_set_off, size_t current_pass, const std::string &label, uint32_t prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:260
int raw_prediction
Definition: global_data.h:519
std::array< uint32_t, NUM_NAMESPACES > skips
Definition: global_data.h:472
void * searchstr
Definition: global_data.h:430
uint32_t K
Definition: global_data.h:69
bool ignore_some_linear
Definition: global_data.h:464
std::array< bool, NUM_NAMESPACES > spelling_features
Definition: global_data.h:477
parameters weights
Definition: global_data.h:537
loss_function * loss
Definition: global_data.h:523
float merand48_noadvance(uint64_t v)
Definition: rand48.cc:24
constexpr bool test_example(example &ec) noexcept
Definition: bfgs.cc:147
LEARNER::base_learner * cost_sensitive
Definition: global_data.h:385
bool tnormal_weights
Definition: global_data.h:495
std::vector< std::string > pairs
Definition: global_data.h:459
std::array< uint32_t, NUM_NAMESPACES > ngram
Definition: global_data.h:471
void finish_example(vw &all, audit_regressor_data &dd, example &ec)
float initial_t
Definition: global_data.h:530
double holdout_sum_loss_since_last_pass
Definition: global_data.h:163
VW::config::options_i * options
Definition: global_data.h:428
size_t normalized_idx
Definition: global_data.h:506
double weighted_unlabeled_examples
Definition: global_data.h:143
void get_prediction(int sock, float &res, float &weight)
Definition: global_data.cc:52
std::vector< std::string > ngram_strings
Definition: global_data.h:469
bool is_more_than_two_labels_observed
Definition: global_data.h:170
bool hash_inv
Definition: global_data.h:541
char * end
Definition: hashstring.h:10
bool random_positive_weights
Definition: global_data.h:493
char * begin
Definition: hashstring.h:9
float initial_weight
Definition: global_data.h:409
bool add_constant
Definition: global_data.h:496
label_type::label_type_t label_type
Definition: global_data.h:550
bool redefine_some
Definition: global_data.h:467
uint64_t random_seed
Definition: global_data.h:491
std::string inv_hash_regressor_name
Definition: global_data.h:511
time_t init_time
Definition: global_data.h:533
float power_t
Definition: global_data.h:447
v_array< int > final_prediction_sink
Definition: global_data.h:518
namedlabels * ldict
Definition: global_data.h:153
double holdout_multiclass_log_loss
Definition: global_data.h:168
v_hashmap< substring, uint64_t > name2id
Definition: global_data.h:68
std::vector< dictionary_info > loaded_dictionaries
Definition: global_data.h:483
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
double holdout_best_loss
Definition: global_data.h:161
bool quiet
Definition: global_data.h:487
double contraction
Definition: global_data.h:149
std::vector< std::string > limit_strings
Definition: global_data.h:473
std::array< uint64_t, NUM_NAMESPACES > affix_features
Definition: global_data.h:476
float merand48(uint64_t &initial)
Definition: rand48.cc:16
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60
bool holdout_set_off
Definition: global_data.h:499
size_t check_holdout_every_n_passes
Definition: global_data.h:503
uint32_t num_bits
Definition: global_data.h:398
std::array< bool, NUM_NAMESPACES > ignore
Definition: global_data.h:463
bool progress_add
Definition: global_data.h:545
bool training
Definition: global_data.h:488
bool hessian_on
Definition: global_data.h:413
void free_it(void *ptr)
Definition: memory.h:94
double sum_loss_since_last_dump
Definition: global_data.h:146
uint32_t lda
Definition: global_data.h:508
parser * p
Definition: global_data.h:377
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
std::array< uint32_t, NUM_NAMESPACES > limit
Definition: global_data.h:474
double multiclass_log_loss
Definition: global_data.h:167
bool ignore_some
Definition: global_data.h:462
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores)
Definition: cb.cc:180
AllReduce * all_reduce
Definition: global_data.h:381
int print_tag(std::stringstream &ss, v_array< char > tag)
Definition: global_data.cc:81
float first_observed_label
Definition: global_data.h:171
int stdout_fileno
Definition: global_data.h:434
std::string id
Definition: global_data.h:417
namedlabels(std::string label_list)
Definition: global_data.h:72
bool no_bias
Definition: global_data.h:446
constexpr rand_state()
Definition: global_data.h:361
float initial_constant
Definition: global_data.h:410
shared_data * sd
Definition: global_data.h:375
constexpr uint64_t get_current_state() const noexcept
Definition: global_data.h:363
float l2_lambda
Definition: global_data.h:445
VW::version_struct model_file_ver
Definition: global_data.h:419
float progress_arg
Definition: global_data.h:546
std::array< bool, NUM_NAMESPACES > ignore_linear
Definition: global_data.h:465
void deleter(substring ss, uint64_t)
Definition: global_data.h:62
bool active
Definition: global_data.h:489
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
vw_ostream trace_message
Definition: global_data.h:424
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:225
double old_weighted_labeled_examples
Definition: global_data.h:142
constexpr size_t NUM_NAMESPACES
Definition: constant.h:38
bool bfgs
Definition: global_data.h:412
double weighted_holdout_examples
Definition: global_data.h:156
std::string feature_mask
Definition: global_data.h:438
double weighted_holdout_examples_since_last_pass
Definition: global_data.h:162
double holdout_sum_loss
Definition: global_data.h:159
std::string per_feature_regularizer_output
Definition: global_data.h:441
AllReduceType all_reduce_type
Definition: global_data.h:380
feature_dict * dict
Definition: global_data.h:59
char * program_name
Definition: global_data.h:525
double weighted_labels
Definition: global_data.h:144
bool random_weights
Definition: global_data.h:492
uint64_t file_hash
Definition: global_data.h:58
rand_state(uint64_t initial)
Definition: global_data.h:362
std::thread parse_thread
Definition: global_data.h:378
float weight
Definition: global_data.h:51
uint64_t current_pass
Definition: global_data.h:396
float get_and_update_random()
Definition: global_data.h:364
bool print_invert
Definition: global_data.h:542
std::array< unsigned char, NUM_NAMESPACES > redefine
Definition: global_data.h:468
bool default_bits
Definition: global_data.h:399
AllReduceType
Definition: global_data.h:332
uint32_t wpp
Definition: global_data.h:432
std::string per_feature_regularizer_text
Definition: global_data.h:442
std::vector< std::string > triples
Definition: global_data.h:461
LEARNER::single_learner * scorer
Definition: global_data.h:384
void print_update(bool holdout_set_off, size_t current_pass, uint32_t label, uint32_t prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:243
size_t numpasses
Definition: global_data.h:451
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190
bool nonormalize
Definition: global_data.h:497
float eta
Definition: global_data.h:531
float weight
uint64_t example_number
Definition: global_data.h:137
std::vector< example * > multi_ex
Definition: example.h:122
std::vector< std::string > initial_regressors
Definition: global_data.h:436
bool save_per_pass
Definition: global_data.h:408
uint64_t parse_mask
Definition: global_data.h:453
std::vector< std::string > dictionary_path
Definition: global_data.h:478
size_t passes_complete
Definition: global_data.h:452
void print_update(bool holdout_set_off, size_t current_pass, const std::string &label, const std::string &prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:270
uint64_t random_state
Definition: global_data.h:358
float min_label
Definition: global_data.h:150
double gravity
Definition: global_data.h:148
void init(size_t min_size, const V &def, bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:54
size_t pass_length
Definition: global_data.h:450
v_hashmap< substring, features * > feature_dict
Definition: global_data.h:53
void noop_mm(shared_data *, float label)
Definition: global_data.cc:135
std::vector< std::string > interactions
Definition: global_data.h:457
bool preserve_performance_counters
Definition: global_data.h:416
LEARNER::base_learner * l
Definition: global_data.h:383
bool save_resume
Definition: global_data.h:415
bool substring_equal(const substring &a, const substring &b)
bool do_reset_source
Definition: global_data.h:498
uint32_t getK()
Definition: global_data.h:106
float max_label
Definition: global_data.h:151
void compile_gram(std::vector< std::string > grams, std::array< uint32_t, NUM_NAMESPACES > &dest, char *descriptor, bool quiet)
Definition: global_data.cc:191
uint32_t hash_seed
Definition: global_data.h:401
void delete_v()
Definition: v_hashmap.h:103
double weighted_labeled_examples
Definition: global_data.h:141
Definition: print.cc:9
Definition: parser.h:38
bool permutations
Definition: global_data.h:454
std::string per_feature_regularizer_input
Definition: global_data.h:440
double weighted_holdout_examples_since_last_dump
Definition: global_data.h:157
bool audit
Definition: global_data.h:486
float second_observed_label
Definition: global_data.h:172
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
bool stdin_off
Definition: global_data.h:527
void binary_print_result(int f, float res, float weight, v_array< char > tag)
Definition: global_data.cc:72
double holdout_sum_loss_since_last_dump
Definition: global_data.h:158
size_t num_children
Definition: global_data.h:406
bool early_terminate
Definition: global_data.h:500
float get_random() const
Definition: global_data.h:365
void set_random_state(uint64_t initial) noexcept
Definition: global_data.h:366
std::string final_regressor_name
Definition: global_data.h:535
std::vector< substring > id2name
Definition: global_data.h:67
void print_result(int f, float res, float weight, v_array< char > tag)
Definition: global_data.cc:91
size_t max_examples
Definition: global_data.h:539
bool invariant_updates
Definition: global_data.h:490
void learn(bfgs &b, base_learner &base, example &ec)
Definition: bfgs.cc:965
std::map< std::string, size_t > name_index_map
Definition: global_data.h:548
double weighted_examples()
Definition: global_data.h:188
float l1_lambda
Definition: global_data.h:444
float dump_interval
Definition: global_data.h:147
void put(const K &key, uint64_t hash, const V &val)
Definition: v_hashmap.h:275
#define THROW(args)
Definition: vw_exception.h:181
V & get(const K &key, uint64_t hash)
Definition: v_hashmap.h:203
constexpr uint64_t c
Definition: rand48.cc:12
size_t queries
Definition: global_data.h:135
bool normal_weights
Definition: global_data.h:494
void iter(void(*func)(K, V))
Definition: v_hashmap.h:149
size_t holdout_best_pass
Definition: global_data.h:164
float f
Definition: cache.cc:40
uint64_t total_features
Definition: global_data.h:138
std::string data_filename
Definition: global_data.h:403
bool daemon
Definition: global_data.h:405
std::stack< LEARNER::base_learner *(*)(VW::config::options_i &, vw &)> reduction_stack
Definition: global_data.h:513
uint32_t holdout_period
Definition: global_data.h:501
std::string text_regressor_name
Definition: global_data.h:510
int reg_mode
Definition: global_data.h:448
void update_dump_interval(bool progress_add, float progress_arg)
Definition: global_data.h:215
double normalized_sum_norm_x
Definition: global_data.h:420
void compile_limits(std::vector< std::string > limits, std::array< uint32_t, NUM_NAMESPACES > &dest, bool quiet)
Definition: global_data.cc:216
float eta_decay_rate
Definition: global_data.h:532