Vowpal Wabbit
feature_group.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5 */
6 
7 #pragma once
8 
9 #include <memory>
10 #include <string>
11 #include <cstddef>
12 #include "v_array.h"
13 
14 #ifndef _WIN32
15 #include <sys/types.h>
16 #else
17 #define ssize_t int64_t
18 #endif
19 
20 typedef float feature_value;
21 typedef uint64_t feature_index;
22 typedef std::pair<std::string, std::string> audit_strings;
23 typedef std::shared_ptr<audit_strings> audit_strings_ptr;
24 
25 struct feature // sparse feature definition for the library interface
26 {
27  float x;
28  uint64_t weight_index;
29  feature(float _x, uint64_t _index) : x(_x), weight_index(_index) {}
30  feature() : x(0.f), weight_index(0) {}
31 };
32 
33 struct feature_slice // a helper struct for functions using the set {v,i,space_name}
34 {
38 };
39 
40 template <class T>
41 inline int order_features(const void* first, const void* second)
42 {
43  if (((T*)first)->weight_index != ((T*)second)->weight_index)
44  return (int)(((T*)first)->weight_index - ((T*)second)->weight_index);
45  else if (((T*)first)->x > ((T*)second)->x)
46  return 1;
47  else
48  return -1;
49 }
50 
51 struct features;
52 
55 {
56  protected:
58 
59  public:
60  features_value_iterator(feature_value* begin) : _begin(begin) {}
61 
62  features_value_iterator(const features_value_iterator& other) : _begin(other._begin) {}
63 
65  {
66  _begin++;
67  return *this;
68  }
69 
71  inline feature_value& value() { return *_begin; }
72 
75  template <typename T>
77  {
78  return features_value_iterator(_begin + index);
79  }
80 
81  template <typename T>
83  {
84  _begin += index;
85  return *this;
86  }
87 
88  template <typename T>
90  {
91  _begin -= index;
92  return *this;
93  }
94 
96  {
97  _begin = other._begin;
98  return *this;
99  }
100 
101  features_value_iterator& operator*() { return *this; }
102 
103  bool operator==(const features_value_iterator& rhs) { return _begin == rhs._begin; }
104  bool operator!=(const features_value_iterator& rhs) { return _begin != rhs._begin; }
105 
106  friend void swap(features_value_iterator& lhs, features_value_iterator& rhs) { std::swap(lhs._begin, rhs._begin); }
107 
108  friend struct features;
109 };
110 
113 {
114  protected:
116 
117  public:
119  : features_value_iterator(begin), _begin_index(begin_index)
120  {
121  }
122 
124  : features_value_iterator(other), _begin_index(other._begin_index)
125  {
126  }
127 
129  {
131  _begin_index++;
132  return *this;
133  }
134 
135  inline feature_index& index() { return *_begin_index; }
136 
137  template <typename T>
139  {
141  _begin_index += index;
142  return *this;
143  }
144 
145  template <typename T>
147  {
148  return features_value_index_iterator(_begin + index, _begin_index + index);
149  }
150 
151  template <typename T>
153  {
155  _begin_index -= index;
156  return *this;
157  }
158 
160  {
162  _begin_index = other._begin_index;
163  return *this;
164  }
165 
167 
169  {
170  swap(static_cast<features_value_iterator&>(lhs), static_cast<features_value_iterator&>(rhs));
171  std::swap(lhs._begin_index, rhs._begin_index);
172  }
173 };
174 
177 {
178  protected:
180 
181  public:
183  : features_value_index_iterator(begin, begin_index), _begin_audit(begin_audit)
184  {
185  }
186 
188  : features_value_index_iterator(other), _begin_audit(other._begin_audit)
189  {
190  }
191 
192  // prefix increment
194  {
196  _begin_audit++;
197  return *this;
198  }
199 
200  inline audit_strings_ptr& audit() { return *_begin_audit; }
201 
202  template <typename T>
204  {
206  _begin_audit += index;
207  return *this;
208  }
209 
210  template <typename T>
212  {
213  return features_value_index_audit_iterator(_begin + index, _begin_index + index, _begin_audit + index);
214  }
215 
216  template <typename T>
218  {
220  _begin_audit += index;
221  return *this;
222  }
223 
225  {
227  _begin_audit = other._begin_audit;
228  return *this;
229  }
230 
232 
234  {
235  swap(static_cast<features_value_index_iterator&>(lhs), static_cast<features_value_index_iterator&>(rhs));
236  std::swap(lhs._begin_audit, rhs._begin_audit);
237  }
238 };
239 
241 struct features
242 {
243  v_array<feature_value> values; // Always needed.
244  v_array<feature_index> indicies; // Optional for sparse data.
245  v_array<audit_strings_ptr> space_names; // Optional for audit mode.
246 
247  float sum_feat_sq;
248 
252 
255  {
256  private:
258 
259  public:
260  features_value_index_audit_range(features* outer) : _outer(outer) {}
261 
262  iterator_all begin()
263  {
264  return iterator_all(_outer->values.begin(), _outer->indicies.begin(), _outer->space_names.begin());
265  }
266  iterator_all end() { return iterator_all(_outer->values.end(), _outer->indicies.end(), _outer->space_names.end()); }
267  };
268 
270  {
271  values = v_init<feature_value>();
272  indicies = v_init<feature_index>();
273  space_names = v_init<audit_strings_ptr>();
274  sum_feat_sq = 0.f;
275  }
276 
277  // if one wants to add proper destructor for features, make sure to update ezexample_predict::~ezexample_predict();
278  // ~features() { ... }
279 
280  inline size_t size() const { return values.size(); }
281 
282  inline bool nonempty() const { return !values.empty(); }
283 
284  void free_space_names(size_t i)
285  {
286  for (; i < space_names.size(); i++) space_names[i].~audit_strings_ptr();
287  }
288 
290 
291  // default iterator for values & features
292  iterator begin() { return iterator(values.begin(), indicies.begin()); }
293 
294  iterator end() { return iterator(values.end(), indicies.end()); }
295 
296  void clear()
297  {
298  sum_feat_sq = 0.f;
299  values.clear();
300  indicies.clear();
301  space_names.clear();
302  }
303 
305  {
306  ssize_t i = pos._begin - values.begin();
307  values.end() = pos._begin;
308  if (indicies.end() != indicies.begin())
309  indicies.end() = indicies.begin() + i;
310  if (space_names.begin() != space_names.end())
311  {
312  free_space_names((size_t)i);
313  space_names.end() = space_names.begin() + i;
314  }
315  }
316 
317  void truncate_to(size_t i)
318  {
319  values.end() = values.begin() + i;
320  if (indicies.end() != indicies.begin())
321  indicies.end() = indicies.begin() + i;
322  if (space_names.begin() != space_names.end())
323  {
324  free_space_names(i);
325  space_names.end() = space_names.begin() + i;
326  }
327  }
328 
329  void delete_v()
330  {
331  values.delete_v();
332  indicies.delete_v();
333  space_names.delete_v();
334  }
335 
337  {
338  values.push_back(v);
339  indicies.push_back(i);
340  sum_feat_sq += v * v;
341  }
342 
343  bool sort(uint64_t parse_mask)
344  {
345  if (indicies.empty())
346  return false;
347 
348  if (!space_names.empty())
349  {
350  v_array<feature_slice> slice = v_init<feature_slice>();
351  for (size_t i = 0; i < indicies.size(); i++)
352  {
353  feature_slice temp = {values[i], indicies[i] & parse_mask, *space_names[i].get()};
354  slice.push_back(temp);
355  }
356  qsort(slice.begin(), slice.size(), sizeof(feature_slice), order_features<feature_slice>);
357  for (size_t i = 0; i < slice.size(); i++)
358  {
359  values[i] = slice[i].x;
360  indicies[i] = slice[i].weight_index;
361  *space_names[i].get() = slice[i].space_name;
362  }
363  slice.delete_v();
364  }
365  else
366  {
367  v_array<feature> slice = v_init<feature>();
368  for (size_t i = 0; i < indicies.size(); i++)
369  {
370  feature temp = {values[i], indicies[i] & parse_mask};
371  slice.push_back(temp);
372  }
373  qsort(slice.begin(), slice.size(), sizeof(feature), order_features<feature>);
374  for (size_t i = 0; i < slice.size(); i++)
375  {
376  values[i] = slice[i].x;
377  indicies[i] = slice[i].weight_index;
378  }
379  slice.delete_v();
380  }
381  return true;
382  }
383 
384  void deep_copy_from(const features& src)
385  {
386  copy_array(values, src.values);
387  copy_array(indicies, src.indicies);
388  copy_array_no_memcpy(space_names, src.space_names);
389  sum_feat_sq = src.sum_feat_sq;
390  }
391 };
features_value_index_iterator & operator+=(T index)
features_value_iterator & operator*()
features_value_index_iterator & operator++()
features_value_index_audit_iterator & operator=(const features_value_index_audit_iterator &other)
iterator begin()
bool operator!=(const features_value_iterator &rhs)
void deep_copy_from(const features &src)
void push_back(feature_value v, feature_index i)
float x
Definition: feature_group.h:27
features_value_index_iterator(feature_value *begin, feature_index *begin_index)
features_value_index_iterator & operator-=(T index)
std::shared_ptr< audit_strings > audit_strings_ptr
Definition: feature_group.h:23
void copy_array(v_array< T > &dst, const v_array< T > &src)
Definition: v_array.h:185
v_array< feature_index > indicies
friend void swap(features_value_index_audit_iterator &lhs, features_value_index_audit_iterator &rhs)
void free_space_names(size_t i)
features_value_iterator & operator=(const features_value_iterator &other)
Definition: feature_group.h:95
void truncate_to(size_t i)
features_value_index_audit_range values_indices_audit()
features_value_index_audit_iterator(const features_value_index_audit_iterator &other)
void delete_v()
the core definition of a set of features.
features_value_index_audit_iterator & operator-=(T index)
features_value_index_audit_iterator & operator+=(T index)
float feature_value
Definition: feature_group.h:20
features_value_index_audit_iterator & operator++()
bool operator==(const features_value_iterator &rhs)
v_array< feature_value > values
features_value_index_iterator(const features_value_index_iterator &other)
features_value_iterator(const features_value_iterator &other)
Definition: feature_group.h:62
iterator end()
features_value_iterator & operator-=(T index)
Definition: feature_group.h:89
T *& begin()
Definition: v_array.h:42
uint64_t weight_index
Definition: feature_group.h:28
size_t size() const
Definition: v_array.h:68
feature_value * _begin
Definition: feature_group.h:57
void truncate_to(const features_value_iterator &pos)
size_t size() const
features_value_index_iterator iterator
features_value_index_audit_iterator(feature_value *begin, feature_index *begin_index, audit_strings_ptr *begin_audit)
void copy_array_no_memcpy(v_array< T > &dst, const v_array< T > &src)
Definition: v_array.h:193
audit_strings space_name
Definition: feature_group.h:37
void push_back(const T &new_ele)
Definition: v_array.h:107
iterator over feature values only
Definition: feature_group.h:54
feature_index weight_index
Definition: feature_group.h:36
defines a "range" usable by C++ 11 for loops
void clear()
Definition: v_array.h:88
bool nonempty() const
iterator over values, indicies and audit space names
bool sort(uint64_t parse_mask)
friend void swap(features_value_iterator &lhs, features_value_iterator &rhs)
uint64_t feature_index
Definition: feature_group.h:21
void clear()
feature_value x
Definition: feature_group.h:35
features_value_index_iterator & operator=(const features_value_index_iterator &other)
T *& end()
Definition: v_array.h:43
iterator over values and indicies
v_array< audit_strings_ptr > space_names
features_value_iterator operator+(T index)
Definition: feature_group.h:76
features_value_index_audit_iterator iterator_all
float sum_feat_sq
bool empty() const
Definition: v_array.h:59
features_value_index_audit_iterator & operator*()
features_value_iterator(feature_value *begin)
Definition: feature_group.h:60
features_value_iterator & operator+=(T index)
Definition: feature_group.h:82
features_value_iterator & operator++()
Definition: feature_group.h:64
features_value_index_iterator operator+(T index)
int order_features(const void *first, const void *second)
Definition: feature_group.h:41
void delete_v()
Definition: v_array.h:98
feature(float _x, uint64_t _index)
Definition: feature_group.h:29
features_value_index_audit_iterator operator+(T index)
friend void swap(features_value_index_iterator &lhs, features_value_index_iterator &rhs)
float f
Definition: cache.cc:40
features_value_index_iterator & operator*()
feature_value & value()
Definition: feature_group.h:71
features_value_iterator iterator_value
std::pair< std::string, std::string > audit_strings
Definition: feature_group.h:22