Vowpal Wabbit
example.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include <cstdint>
7 #include <algorithm>
8 
9 #include "gd.h"
10 
12 {
13  uint64_t last_index = (uint64_t)-1;
14  float sum_sq = 0.f;
15  features::iterator pos = fs.begin();
16  for (features::iterator& f : fs)
17  {
18  if (last_index == f.index())
19  pos.value() += f.value();
20  else
21  {
22  sum_sq += pos.value() * pos.value();
23  ++pos;
24  pos.value() = f.value();
25  pos.index() = f.index();
26  last_index = f.index();
27  }
28  }
29 
30  sum_sq += pos.value() * pos.value();
31  fs.sum_feat_sq = sum_sq;
32  ++pos;
33  fs.truncate_to(pos);
34 
35  return sum_sq;
36 }
37 
38 namespace VW
39 {
40 void copy_example_label(example* dst, example* src, size_t, void (*copy_label)(void*, void*))
41 {
42  if (copy_label)
43  copy_label(&dst->l, &src->l); // TODO: we really need to delete_label on dst :(
44  else
45  dst->l = src->l;
46 }
47 
48 void copy_example_metadata(bool /* audit */, example* dst, example* src)
49 {
50  copy_array(dst->tag, src->tag);
51  dst->example_counter = src->example_counter;
52 
53  dst->ft_offset = src->ft_offset;
54 
56  if (src->passthrough == nullptr)
57  dst->passthrough = nullptr;
58  else
59  {
60  dst->passthrough = new features;
62  }
63  dst->loss = src->loss;
64  dst->weight = src->weight;
65  dst->confidence = src->confidence;
66  dst->test_only = src->test_only;
67  dst->end_pass = src->end_pass;
68  dst->sorted = src->sorted;
69  dst->in_use = src->in_use;
70 }
71 
72 void copy_example_data(bool audit, example* dst, example* src)
73 {
74  // std::cerr << "copy_example_data dst = " << dst << std::endl;
75  copy_example_metadata(audit, dst, src);
76 
77  // copy feature data
78  copy_array(dst->indices, src->indices);
79  for (namespace_index c : src->indices) dst->feature_space[c].deep_copy_from(src->feature_space[c]);
80  // copy_array(dst->atomics[i], src->atomics[i]);
81  dst->num_features = src->num_features;
83  dst->interactions = src->interactions;
84 }
85 
86 void copy_example_data(bool audit, example* dst, example* src, size_t label_size, void (*copy_label)(void*, void*))
87 {
88  copy_example_data(audit, dst, src);
89  copy_example_label(dst, src, label_size, copy_label);
90 }
91 
93 {
94  if (std::find(src->indices.begin(), src->indices.end(), c) == src->indices.end())
95  return; // index not present in src
96  if (std::find(dst->indices.begin(), dst->indices.end(), c) == dst->indices.end())
97  dst->indices.push_back(c);
98 
99  auto& fdst = dst->feature_space[c];
100  auto& fsrc = src->feature_space[c];
101 
102  src->num_features -= fsrc.size();
103  src->total_sum_feat_sq -= fsrc.sum_feat_sq;
104  std::swap(fdst, fsrc);
105  dst->num_features += fdst.size();
106  dst->total_sum_feat_sq += fdst.sum_feat_sq;
107 }
108 
109 } // namespace VW
110 
112 {
113  v_array<feature> feature_map; // map to store sparse feature vectors
114  uint32_t stride_shift;
115  uint64_t mask;
116 };
117 
118 void vec_store(features_and_source& p, float fx, uint64_t fi)
119 {
120  p.feature_map.push_back(feature(fx, (uint64_t)(fi >> p.stride_shift) & p.mask));
121 }
122 
123 namespace VW
124 {
125 feature* get_features(vw& all, example* ec, size_t& feature_map_len)
126 {
128  fs.stride_shift = all.weights.stride_shift();
129  fs.mask = (uint64_t)all.weights.mask() >> all.weights.stride_shift();
130  fs.feature_map = v_init<feature>();
131  GD::foreach_feature<features_and_source, uint64_t, vec_store>(all, *ec, fs);
132 
133  feature_map_len = fs.feature_map.size();
134  return fs.feature_map.begin();
135 }
136 
138 } // namespace VW
139 
141 {
143  uint32_t stride_shift;
144  uint64_t mask;
145 };
146 
147 void vec_ffs_store(full_features_and_source& p, float fx, uint64_t fi)
148 {
149  p.fs.push_back(fx, (uint64_t)(fi >> p.stride_shift) & p.mask);
150 }
151 
153 {
154  flat_example& fec = calloc_or_throw<flat_example>();
155  fec.l = ec->l;
156  fec.l.simple.weight = ec->weight;
157 
158  fec.tag_len = ec->tag.size();
159  if (fec.tag_len > 0)
160  {
161  fec.tag = calloc_or_throw<char>(fec.tag_len + 1);
162  memcpy(fec.tag, ec->tag.begin(), fec.tag_len);
163  }
164 
166  fec.ft_offset = ec->ft_offset;
167  fec.num_features = ec->num_features;
168 
170  ffs.stride_shift = all.weights.stride_shift();
171  if (all.weights.not_null()) // TODO:temporary fix. all.weights is not initialized at this point in some cases.
172  ffs.mask = (uint64_t)all.weights.mask() >> all.weights.stride_shift();
173  else
174  ffs.mask = (uint64_t)LONG_MAX >> all.weights.stride_shift();
175  GD::foreach_feature<full_features_and_source, uint64_t, vec_ffs_store>(all, *ec, ffs);
176 
177  fec.fs = ffs.fs;
178 
179  return &fec;
180 }
181 
183 {
184  flat_example* fec = flatten_example(all, ec);
185  fec->fs.sort(all.parse_mask);
187  return fec;
188 }
189 
191 {
192  // note: The label memory should be freed by by freeing the original example.
193  if (fec)
194  {
195  fec->fs.delete_v();
196  if (fec->tag_len > 0)
197  free(fec->tag);
198  free(fec);
199  }
200 }
201 
202 namespace VW
203 {
204 example* alloc_examples(size_t, size_t count = 1)
205 {
206  example* ec = calloc_or_throw<example>(count);
207  if (ec == nullptr)
208  return nullptr;
209  for (size_t i = 0; i < count; i++)
210  {
211  ec[i].in_use = true;
212  ec[i].ft_offset = 0;
213  // std::cerr << " alloc_example.indices.begin()=" << ec->indices.begin() << " end=" << ec->indices.end() << " //
214  // ld = " << ec->ld << "\t|| me = " << ec << std::endl;
215  }
216  return ec;
217 }
218 
219 void dealloc_example(void (*delete_label)(void*), example& ec, void (*delete_prediction)(void*))
220 {
221  if (delete_label)
222  delete_label(&ec.l);
223 
224  if (delete_prediction)
225  delete_prediction(&ec.pred);
226 
227  ec.tag.delete_v();
228 
229  if (ec.passthrough)
230  {
231  ec.passthrough->delete_v();
232  delete ec.passthrough;
233  }
234 
235  for (auto& j : ec.feature_space) j.delete_v();
236 
237  ec.indices.delete_v();
238 }
239 
240 void finish_example(vw&, example&);
241 void clean_example(vw&, example&, bool rewind);
242 
243 void finish_example(vw& all, multi_ex& ec_seq)
244 {
245  if (!ec_seq.empty())
246  for (example* ecc : ec_seq)
247  if (ecc->in_use)
248  VW::finish_example(all, *ecc);
249 }
250 
252 {
253  for (auto ec : examples)
254  {
255  clean_example(all, *ec, true);
256  }
257  examples.clear();
258 }
259 } // namespace VW
void return_features(feature *f)
Definition: example.cc:137
void return_multiple_example(vw &all, v_array< example *> &examples)
Definition: example.cc:251
v_array< char > tag
Definition: example.h:63
void copy_label(void *dst, void *src)
Definition: cb.cc:104
v_array< namespace_index > indices
size_t example_counter
Definition: example.h:64
void clean_example(vw &, example &, bool rewind)
Definition: parser.cc:867
parameters weights
Definition: global_data.h:537
iterator begin()
void deep_copy_from(const features &src)
void push_back(feature_value v, feature_index i)
void copy_example_metadata(bool, example *dst, example *src)
Definition: example.cc:48
void copy_example_data(bool audit, example *dst, example *src)
Definition: example.cc:72
void copy_array(v_array< T > &dst, const v_array< T > &src)
Definition: v_array.h:185
std::vector< std::string > * interactions
bool sorted
Definition: example.h:78
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
Definition: example.cc:219
float total_sum_feat_sq
Definition: example.h:96
void delete_v()
the core definition of a set of features.
void delete_label(void *v)
Definition: cb.cc:98
float confidence
Definition: example.h:72
float partial_prediction
Definition: example.h:68
float weight
Definition: simple_label.h:15
label_data simple
Definition: example.h:28
void free_flatten_example(flat_example *fec)
Definition: example.cc:190
v_array< feature > feature_map
Definition: example.cc:113
polylabel l
Definition: example.h:86
features fs
Definition: example.h:97
float collision_cleanup(features &fs)
Definition: example.cc:11
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
example * alloc_examples(size_t, size_t count=1)
Definition: example.cc:204
void free_it(void *ptr)
Definition: memory.h:94
std::array< features, NUM_NAMESPACES > feature_space
feature * get_features(vw &all, example *ec, size_t &feature_map_len)
Definition: example.cc:125
uint32_t stride_shift
Definition: example.cc:114
void push_back(const T &new_ele)
Definition: v_array.h:107
char * tag
Definition: example.h:89
size_t num_features
Definition: example.h:67
bool sort(uint64_t parse_mask)
unsigned char namespace_index
flat_example * flatten_example(vw &all, example *ec)
Definition: example.cc:152
void vec_ffs_store(full_features_and_source &p, float fx, uint64_t fi)
Definition: example.cc:147
void finish_example(vw &, example &)
Definition: parser.cc:881
T *& end()
Definition: v_array.h:43
float loss
Definition: example.h:70
std::vector< example * > multi_ex
Definition: example.h:122
iterator over values and indicies
uint64_t ft_offset
Definition: example.h:92
size_t tag_len
Definition: example.h:88
flat_example * flatten_sort_example(vw &all, example *ec)
Definition: example.cc:182
polylabel l
Definition: example.h:57
uint64_t parse_mask
Definition: global_data.h:453
node_pred * find(recall_tree &b, uint32_t cn, example &ec)
Definition: recall_tree.cc:126
size_t num_features
Definition: example.h:95
bool in_use
Definition: example.h:79
float total_sum_feat_sq
Definition: example.h:71
features * passthrough
Definition: example.h:74
Definition: autolink.cc:11
size_t example_counter
Definition: example.h:91
void copy_example_label(example *dst, example *src, size_t, void(*copy_label)(void *, void *))
Definition: example.cc:40
void move_feature_namespace(example *dst, example *src, namespace_index c)
Definition: example.cc:92
uint32_t stride_shift()
polyprediction pred
Definition: example.h:60
void delete_v()
Definition: v_array.h:98
float weight
Definition: example.h:62
void vec_store(features_and_source &p, float fx, uint64_t fi)
Definition: example.cc:118
bool end_pass
Definition: example.h:77
uint64_t mask()
constexpr uint64_t c
Definition: rand48.cc:12
float f
Definition: cache.cc:40
feature_value & value()
Definition: feature_group.h:71
bool test_only
Definition: example.h:76