Vowpal Wabbit
ezexample.h
Go to the documentation of this file.
1 #pragma once
2 #include <cstdio>
3 #include "../vowpalwabbit/parser.h"
4 #include "../vowpalwabbit/vw.h"
5 
6 typedef uint32_t fid;
7 
9 {
11 
12  public:
13  vw_namespace(const char c) : namespace_letter(c) {}
14 };
15 
16 class ezexample
17 {
18  private:
20  vw* vw_par_ref; // an extra parser if we're multithreaded
22 
23  char str[2];
26  std::vector<fid> past_seeds;
30  char current_ns;
31  bool ns_exists[256];
33 
35 
36  ezexample(const ezexample& ex) = delete;
37  ezexample& operator=(const ezexample& ex) = delete;
38 
40  {
41  example* new_ec = VW::new_unused_example(*vw_par_ref);
42  vw_par_ref->p->lp.default_label(&new_ec->l);
43  new_ec->tag.clear();
44  new_ec->indices.clear();
45  for (auto& i : new_ec->feature_space) i.clear();
46 
47  new_ec->ft_offset = 0;
48  new_ec->num_features = 0;
49  new_ec->partial_prediction = 0.;
50  new_ec->updated_prediction = 0.;
51  new_ec->passthrough = nullptr;
52  new_ec->loss = 0.;
53  new_ec->total_sum_feat_sq = 0.;
54  new_ec->confidence = 0.;
55  return new_ec;
56  }
57 
58  void setup_new_ezexample(vw* this_vw, bool multiline, vw* this_vw_parser)
59  {
60  vw_ref = this_vw;
61  vw_par_ref = (this_vw_parser == nullptr) ? this_vw : this_vw_parser;
62  is_multiline = multiline;
63 
64  str[0] = 0;
65  str[1] = 0;
66  current_seed = 0;
67  current_ns = 0;
68 
69  quadratic_features_num = 0;
70  quadratic_features_sqr = 0.;
71 
72  for (bool& ns_exist : ns_exists) ns_exist = false;
73 
74  example_changed_since_prediction = true;
75  }
76 
78  {
79  static example* empty_example = is_multiline ? VW::read_example(*vw_par_ref, (char*)"") : nullptr;
80  if (example_changed_since_prediction)
81  {
82  mini_setup_example();
83  vw_ref->learn(*ec);
84  if (is_multiline)
85  vw_ref->learn(*empty_example);
86  example_changed_since_prediction = false;
87  }
88  }
89 
90  public:
91  // REAL FUNCTIONALITY
92  // create a new ezexample by asking the vw parser for an example
93  ezexample(vw* this_vw, bool multiline = false, vw* this_vw_parser = nullptr)
94  {
95  setup_new_ezexample(this_vw, multiline, this_vw_parser);
96  example_copies = v_init<example*>();
97  ec = get_new_example();
98  we_create_ec = true;
99 
100  if (vw_ref->add_constant)
101  VW::add_constant_feature(*vw_ref, ec);
102  }
103 
104  // create a new ezexample by wrapping around an already existing example
105  // we do NOT copy your data, therefore, WARNING:
106  // do NOT touch the underlying example unless you really know what you're done)
107  ezexample(vw* this_vw, example* this_ec, bool multiline = false, vw* this_vw_parser = nullptr)
108  {
109  setup_new_ezexample(this_vw, multiline, this_vw_parser);
110 
111  ec = this_ec;
112  we_create_ec = false;
113 
114  for (auto ns : ec->indices) ns_exists[ns] = true;
115  if (current_ns != 0)
116  {
117  str[0] = current_ns;
118  current_seed = VW::hash_space(*vw_ref, str);
119  }
120  }
121 
122  ~ezexample() // calls finish_example *only* if we created our own example!
123  {
124  if (ec->in_use && VW::is_ring_example(*vw_par_ref, ec))
125  VW::finish_example(*vw_par_ref, *ec);
126  for (auto ecc : example_copies)
127  if (ecc->in_use && VW::is_ring_example(*vw_par_ref, ec))
128  VW::finish_example(*vw_par_ref, *ecc);
129  example_copies.clear();
130  free(example_copies.begin());
131  }
132 
133  bool ensure_ns_exists(char c) // returns TRUE iff we should ignore it :)
134  {
135  if (vw_ref->ignore_some && vw_ref->ignore[(int)c])
136  return true;
137  if (ns_exists[(int)c])
138  return false;
139  ec->indices.push_back((size_t)c);
140  ns_exists[(int)c] = true;
141  return false;
142  }
143 
144  void addns(char c)
145  {
146  if (ensure_ns_exists(c))
147  return;
148 
149  ec->feature_space[(int)c].clear();
150  past_seeds.push_back(current_seed);
151  current_ns = c;
152  str[0] = c;
153  current_seed = VW::hash_space(*vw_ref, str);
154  }
155 
156  void remns()
157  {
158  if (ec->indices.empty())
159  {
160  current_seed = 0;
161  current_ns = 0;
162  }
163  else
164  {
165  if (ns_exists[(int)current_ns])
166  {
167  ec->total_sum_feat_sq -= ec->feature_space[(int)current_ns].sum_feat_sq;
168  ec->feature_space[(int)current_ns].clear();
169  ec->num_features -= ec->feature_space[(int)current_ns].size();
170 
171  ns_exists[(int)current_ns] = false;
172  }
173 
174  current_seed = past_seeds.back();
175  past_seeds.pop_back();
176  ec->indices.pop();
177  example_changed_since_prediction = true;
178  }
179  }
180 
181  inline fid addf(char to_ns, fid fint, float v)
182  {
183  if (to_ns == 0)
184  return 0;
185  if (ensure_ns_exists(to_ns))
186  return 0;
187 
188  ec->feature_space[(int)to_ns].push_back(v, fint << vw_ref->weights.stride_shift());
189  ec->total_sum_feat_sq += v * v;
190  ec->num_features++;
191  example_changed_since_prediction = true;
192  return fint;
193  }
194 
195  inline fid addf(fid fint, float v) { return addf(current_ns, fint, v); }
196 
197  // copy an entire namespace from this other example, you can even give it a new namespace name if you want!
198  void add_other_example_ns(example& other, char other_ns, char to_ns)
199  {
200  if (ensure_ns_exists(to_ns))
201  return;
202  features& fs = other.feature_space[(int)other_ns];
203  for (size_t i = 0; i < fs.size(); i++) ec->feature_space[(int)to_ns].push_back(fs.values[i], fs.indicies[i]);
204  ec->total_sum_feat_sq += fs.sum_feat_sq;
205  ec->num_features += fs.size();
206  example_changed_since_prediction = true;
207  }
208  void add_other_example_ns(example& other, char ns) // default to_ns to other_ns
209  {
210  add_other_example_ns(other, ns, ns);
211  }
212 
213  void add_other_example_ns(ezexample& other, char other_ns, char to_ns)
214  {
215  add_other_example_ns(*other.ec, other_ns, to_ns);
216  }
217  void add_other_example_ns(ezexample& other, char ns) { add_other_example_ns(*other.ec, ns); }
218 
219  inline ezexample& set_label(std::string label)
220  {
221  VW::parse_example_label(*vw_par_ref, *ec, label);
222  example_changed_since_prediction = true;
223  return *this;
224  }
225 
227  {
228  ec->partial_prediction = 0.;
229  ec->weight = vw_par_ref->p->lp.get_weight(&ec->l);
230 
231  ec->num_features -= quadratic_features_num;
232  ec->total_sum_feat_sq -= quadratic_features_sqr;
233 
234  quadratic_features_num = 0;
235  quadratic_features_sqr = 0.;
236 
237  for (auto const& pair : vw_ref->pairs)
238  {
239  quadratic_features_num += ec->feature_space[(int)pair[0]].size() * ec->feature_space[(int)pair[1]].size();
240  quadratic_features_sqr +=
241  ec->feature_space[(int)pair[0]].sum_feat_sq * ec->feature_space[(int)pair[1]].sum_feat_sq;
242  }
243  ec->num_features += quadratic_features_num;
244  ec->total_sum_feat_sq += quadratic_features_sqr;
245  }
246 
247  size_t get_num_features() { return ec->num_features; }
248 
249  example* get()
250  {
251  if (example_changed_since_prediction)
252  mini_setup_example();
253  return ec;
254  }
255 
256  float predict()
257  {
258  setup_for_predict();
259  return ec->pred.scalar;
260  }
261 
263  {
264  setup_for_predict();
265  return ec->partial_prediction;
266  }
267 
268  void train() // if multiline, add to stack; otherwise, actually train
269  {
270  if (example_changed_since_prediction)
271  {
272  mini_setup_example();
273  example_changed_since_prediction = false;
274  }
275 
276  if (!is_multiline)
277  {
278  vw_ref->learn(*ec);
279  }
280  else // is multiline
281  { // we need to make a copy
282  example* copy = get_new_example();
283  assert(ec->in_use);
284  VW::copy_example_data(vw_ref->audit, copy, ec, vw_par_ref->p->lp.label_size, vw_par_ref->p->lp.copy_label);
285  assert(copy->in_use);
286  vw_ref->learn(*copy);
287  example_copies.push_back(copy);
288  }
289  }
290 
292  {
293  for (size_t i = 0; i < 256; i++)
294  {
295  if (current_ns == 0)
296  break;
297  remns();
298  }
299  }
300 
301  void finish()
302  {
303  static example* empty_example = is_multiline ? VW::read_example(*vw_par_ref, (char*)"") : nullptr;
304  if (is_multiline)
305  {
306  vw_ref->learn(*empty_example);
307  for (auto ecc : example_copies)
308  if (ecc->in_use)
309  VW::finish_example(*vw_par_ref, *ecc);
310  example_copies.clear();
311  }
312  }
313 
314  // HELPER FUNCTIONALITY
315 
316  inline fid hash(std::string fstr) { return VW::hash_feature(*vw_ref, fstr, current_seed); }
317  inline fid hash(char* fstr) { return VW::hash_feature_cstr(*vw_ref, fstr, current_seed); }
318  inline fid hash(char c, std::string fstr)
319  {
320  str[0] = c;
321  return VW::hash_feature(*vw_ref, fstr, VW::hash_space(*vw_ref, str));
322  }
323  inline fid hash(char c, char* fstr)
324  {
325  str[0] = c;
326  return VW::hash_feature_cstr(*vw_ref, fstr, VW::hash_space(*vw_ref, str));
327  }
328 
329  inline fid addf(fid fint) { return addf(fint, 1.0); }
330  inline fid addf(std::string fstr, float val) { return addf(hash(fstr), val); }
331  inline fid addf(std::string fstr) { return addf(hash(fstr), 1.0); }
332 
333  inline fid addf(char ns, fid fint) { return addf(ns, fint, 1.0); }
334  inline fid addf(char ns, std::string fstr, float val) { return addf(ns, hash(ns, fstr), val); }
335  inline fid addf(char ns, std::string fstr) { return addf(ns, hash(ns, fstr), 1.0); }
336 
338  {
339  addns(n.namespace_letter);
340  return *this;
341  }
342 
343  inline ezexample& operator()(fid fint)
344  {
345  addf(fint, 1.0);
346  return *this;
347  }
348  inline ezexample& operator()(std::string fstr)
349  {
350  addf(fstr, 1.0);
351  return *this;
352  }
353  inline ezexample& operator()(const char* fstr)
354  {
355  addf(fstr, 1.0);
356  return *this;
357  }
358  inline ezexample& operator()(fid fint, float val)
359  {
360  addf(fint, val);
361  return *this;
362  }
363  inline ezexample& operator()(std::string fstr, float val)
364  {
365  addf(fstr, val);
366  return *this;
367  }
368  inline ezexample& operator()(const char* fstr, float val)
369  {
370  addf(fstr, val);
371  return *this;
372  }
373 
374  inline ezexample& operator()(char ns, fid fint)
375  {
376  addf(ns, fint, 1.0);
377  return *this;
378  }
379  inline ezexample& operator()(char ns, std::string fstr)
380  {
381  addf(ns, fstr, 1.0);
382  return *this;
383  }
384  inline ezexample& operator()(char ns, const char* fstr)
385  {
386  addf(ns, fstr, 1.0);
387  return *this;
388  }
389  inline ezexample& operator()(char ns, fid fint, float val)
390  {
391  addf(ns, fint, val);
392  return *this;
393  }
394  inline ezexample& operator()(char ns, std::string fstr, float val)
395  {
396  addf(ns, fstr, val);
397  return *this;
398  }
399  inline ezexample& operator()(char ns, const char* fstr, float val)
400  {
401  addf(ns, fstr, val);
402  return *this;
403  }
404 
405  inline ezexample& operator()(example& other, char other_ns, char to_ns)
406  {
407  add_other_example_ns(other, other_ns, to_ns);
408  return *this;
409  }
410  inline ezexample& operator()(example& other, char ns)
411  {
412  add_other_example_ns(other, ns);
413  return *this;
414  }
415  inline ezexample& operator()(ezexample& other, char other_ns, char to_ns)
416  {
417  add_other_example_ns(other, other_ns, to_ns);
418  return *this;
419  }
420  inline ezexample& operator()(ezexample& other, char ns)
421  {
422  add_other_example_ns(other, ns);
423  return *this;
424  }
425 
427  {
428  remns();
429  return *this;
430  }
431 
432  inline float operator()() { return predict(); }
433 };
fid current_seed
Definition: ezexample.h:27
ezexample & operator()(char ns, fid fint)
Definition: ezexample.h:374
v_array< char > tag
Definition: example.h:63
void learn(example &)
Definition: global_data.cc:137
v_array< namespace_index > indices
fid addf(std::string fstr)
Definition: ezexample.h:331
bool is_ring_example(vw &all, example *ae)
Definition: parser.cc:1009
void parse_example_label(vw &all, example &ec, std::string label)
Definition: parser.cc:846
parameters weights
Definition: global_data.h:537
void(* copy_label)(void *, void *)
Definition: label_parser.h:18
ezexample & operator()(char ns, const char *fstr, float val)
Definition: ezexample.h:399
fid addf(fid fint)
Definition: ezexample.h:329
T pop()
Definition: v_array.h:58
std::vector< std::string > pairs
Definition: global_data.h:459
vw * vw_par_ref
Definition: ezexample.h:20
float scalar
Definition: example.h:45
fid hash(char c, std::string fstr)
Definition: ezexample.h:318
void copy_example_data(bool audit, example *dst, example *src)
Definition: example.cc:72
ezexample & operator()(ezexample &other, char ns)
Definition: ezexample.h:420
ezexample & operator()(char ns, std::string fstr, float val)
Definition: ezexample.h:394
v_array< feature_index > indicies
vw_namespace(const char c)
Definition: ezexample.h:13
ezexample(vw *this_vw, example *this_ec, bool multiline=false, vw *this_vw_parser=nullptr)
Definition: ezexample.h:107
void(* default_label)(void *)
Definition: label_parser.h:12
bool add_constant
Definition: global_data.h:496
fid addf(char ns, fid fint)
Definition: ezexample.h:333
void clear_features()
Definition: ezexample.h:291
v_array< example * > example_copies
Definition: ezexample.h:34
ezexample & operator--()
Definition: ezexample.h:426
the core definition of a set of features.
void add_other_example_ns(ezexample &other, char other_ns, char to_ns)
Definition: ezexample.h:213
float confidence
Definition: example.h:72
float partial_prediction
Definition: example.h:68
v_array< feature_value > values
float updated_prediction
Definition: example.h:69
ezexample & operator()(char ns, std::string fstr)
Definition: ezexample.h:379
float predict()
Definition: ezexample.h:256
ezexample & operator()(char ns, fid fint, float val)
Definition: ezexample.h:389
ezexample & operator()(std::string fstr, float val)
Definition: ezexample.h:363
bool example_changed_since_prediction
Definition: ezexample.h:32
vw * vw_ref
Definition: ezexample.h:19
bool is_multiline
Definition: ezexample.h:21
ezexample & operator()(const char *fstr, float val)
Definition: ezexample.h:368
std::array< bool, NUM_NAMESPACES > ignore
Definition: global_data.h:463
std::vector< fid > past_seeds
Definition: ezexample.h:26
ezexample & operator()(fid fint, float val)
Definition: ezexample.h:358
void train()
Definition: ezexample.h:268
example * ec
Definition: ezexample.h:24
fid hash(char c, char *fstr)
Definition: ezexample.h:323
parser * p
Definition: global_data.h:377
bool ensure_ns_exists(char c)
Definition: ezexample.h:133
float predict_partial()
Definition: ezexample.h:262
std::array< features, NUM_NAMESPACES > feature_space
ezexample & operator()(char ns, const char *fstr)
Definition: ezexample.h:384
bool ignore_some
Definition: global_data.h:462
size_t size() const
bool we_create_ec
Definition: ezexample.h:25
fid addf(fid fint, float v)
Definition: ezexample.h:195
float(* get_weight)(void *)
Definition: label_parser.h:17
fid hash(char *fstr)
Definition: ezexample.h:317
void push_back(const T &new_ele)
Definition: v_array.h:107
fid addf(char to_ns, fid fint, float v)
Definition: ezexample.h:181
uint64_t hash_feature_cstr(vw &all, char *fstr, uint64_t u)
Definition: vw.h:169
char namespace_letter
Definition: ezexample.h:10
ezexample & operator()(example &other, char ns)
Definition: ezexample.h:410
void remns()
Definition: ezexample.h:156
void clear()
Definition: v_array.h:88
void mini_setup_example()
Definition: ezexample.h:226
ezexample & operator()(fid fint)
Definition: ezexample.h:343
size_t num_features
Definition: example.h:67
void empty_example(vw &, example &ec)
Definition: parser.cc:857
void add_constant_feature(vw &vw, example *ec)
Definition: parser.cc:774
ezexample & operator()(const vw_namespace &n)
Definition: ezexample.h:337
void finish()
Definition: ezexample.h:301
ezexample & operator()(std::string fstr)
Definition: ezexample.h:348
ezexample & set_label(std::string label)
Definition: ezexample.h:219
char current_ns
Definition: ezexample.h:30
void finish_example(vw &, example &)
Definition: parser.cc:881
size_t get_num_features()
Definition: ezexample.h:247
ezexample & operator()(ezexample &other, char other_ns, char to_ns)
Definition: ezexample.h:415
~ezexample()
Definition: ezexample.h:122
float loss
Definition: example.h:70
void setup_for_predict()
Definition: ezexample.h:77
fid addf(char ns, std::string fstr, float val)
Definition: ezexample.h:334
void addns(char c)
Definition: ezexample.h:144
float quadratic_features_sqr
Definition: ezexample.h:29
polylabel l
Definition: example.h:57
ezexample & operator()(const char *fstr)
Definition: ezexample.h:353
bool in_use
Definition: example.h:79
float total_sum_feat_sq
Definition: example.h:71
features * passthrough
Definition: example.h:74
float sum_feat_sq
example * read_example(vw &all, char *example_line)
Definition: parser.cc:761
size_t label_size
Definition: label_parser.h:23
bool empty() const
Definition: v_array.h:59
example * new_unused_example(vw &all)
Definition: parser.cc:753
ezexample(vw *this_vw, bool multiline=false, vw *this_vw_parser=nullptr)
Definition: ezexample.h:93
uint64_t hash_feature(vw &all, const std::string &s, uint64_t u)
Definition: vw.h:153
uint64_t hash_space(vw &all, const std::string &s)
Definition: vw.h:138
uint32_t stride_shift()
bool audit
Definition: global_data.h:486
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
polyprediction pred
Definition: example.h:60
void add_other_example_ns(example &other, char ns)
Definition: ezexample.h:208
size_t quadratic_features_num
Definition: ezexample.h:28
ezexample & operator()(example &other, char other_ns, char to_ns)
Definition: ezexample.h:405
float operator()()
Definition: ezexample.h:432
fid addf(std::string fstr, float val)
Definition: ezexample.h:330
float weight
Definition: example.h:62
constexpr uint64_t c
Definition: rand48.cc:12
uint32_t fid
Definition: ezexample.h:6
fid addf(char ns, std::string fstr)
Definition: ezexample.h:335
void add_other_example_ns(ezexample &other, char ns)
Definition: ezexample.h:217
example * get_new_example()
Definition: ezexample.h:39
void setup_new_ezexample(vw *this_vw, bool multiline, vw *this_vw_parser)
Definition: ezexample.h:58
label_parser lp
Definition: parser.h:102
fid hash(std::string fstr)
Definition: ezexample.h:316
void add_other_example_ns(example &other, char other_ns, char to_ns)
Definition: ezexample.h:198