Vowpal Wabbit
Classes | Functions | Variables
INTERACTIONS Namespace Reference

Classes

struct  feature_gen_data
 

Functions

void expand_namespaces_with_recursion (std::string const &ns, std::vector< std::string > &res, std::string &val, size_t pos)
 
std::vector< std::string > expand_interactions (const std::vector< std::string > &vec, const size_t required_length, const std::string &err_msg)
 
bool must_be_left_sorted (const std::string &oi)
 
void sort_and_filter_duplicate_interactions (std::vector< std::string > &vec, bool filter_duplicates, size_t &removed_cnt, size_t &sorted_cnt)
 
size_t factor (const size_t n, const size_t start_from=1)
 
void eval_count_of_generated_ft (vw &all, example &ec, size_t &new_features_cnt, float &new_features_value)
 
constexpr bool is_printable_namespace (const unsigned char ns)
 
template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func>
void generate_interactions (vw &all, example_predict &ec, R &dat)
 
template<class R , class S , void(*)(R &, float, S) T>
void generate_interactions (vw &all, example_predict &ec, R &dat)
 
int64_t choose (int64_t n, int64_t k)
 
template<class R , void(*)(R &, const float, float &) T, class W >
void call_T (R &dat, W &weights, const float ft_value, const uint64_t ft_idx)
 
template<class R , void(*)(R &, const float, const float &) T, class W >
void call_T (R &dat, const W &weights, const float ft_value, const uint64_t ft_idx)
 
float INTERACTION_VALUE (float value1, float value2)
 
template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func, class W >
void inner_kernel (R &dat, features::iterator_all &begin, features::iterator_all &end, const uint64_t offset, W &weights, feature_value ft_value, feature_index halfhash)
 
template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func, class W >
void generate_interactions (std::vector< std::string > &interactions, bool permutations, example_predict &ec, R &dat, W &weights)
 

Variables

constexpr int64_t fast_factorial []
 
constexpr size_t size_fast_factorial = sizeof(fast_factorial) / sizeof(*fast_factorial)
 
constexpr unsigned char printable_start = ' '
 
constexpr unsigned char printable_end = '~'
 
constexpr unsigned char printable_ns_size = printable_end - printable_start
 
constexpr uint64_t valid_ns_size
 
constexpr bool feature_self_interactions = true
 

Function Documentation

◆ call_T() [1/2]

template<class R , void(*)(R &, const float, float &) T, class W >
void INTERACTIONS::call_T ( R &  dat,
W &  weights,
const float  ft_value,
const uint64_t  ft_idx 
)
inline

Definition at line 31 of file interactions_predict.h.

Referenced by call_T().

32 {
33  T(dat, ft_value, weights[ft_idx]);
34 }

◆ call_T() [2/2]

template<class R , void(*)(R &, const float, const float &) T, class W >
void INTERACTIONS::call_T ( R &  dat,
const W &  weights,
const float  ft_value,
const uint64_t  ft_idx 
)
inline

Definition at line 37 of file interactions_predict.h.

References call_T().

38 {
39  T(dat, ft_value, weights[ft_idx]);
40 }

◆ choose()

int64_t INTERACTIONS::choose ( int64_t  n,
int64_t  k 
)
inline

Definition at line 69 of file interactions.h.

Referenced by eval_count_of_generated_ft().

70 {
71  if (k > n)
72  return 0;
73  if (k < 0)
74  return 0;
75  if (k == n)
76  return 1;
77  if (k == 0 && n != 0)
78  return 1;
79  int64_t r = 1;
80  for (int64_t d = 1; d <= k; ++d)
81  {
82  r *= n--;
83  r /= d;
84  }
85  return r;
86 }

◆ eval_count_of_generated_ft()

void INTERACTIONS::eval_count_of_generated_ft ( vw all,
example ec,
size_t &  new_features_cnt,
float &  new_features_value 
)

Definition at line 228 of file interactions.cc.

References choose(), v_array< T >::delete_v(), example_predict::feature_space, example_predict::interactions, vw::permutations, PROCESS_SELF_INTERACTIONS, v_array< T >::push_back(), v_array< T >::size(), features::size(), and features::values.

Referenced by DepParserTask::extract_features(), is_printable_namespace(), and VW::setup_example().

229 {
230  new_features_cnt = 0;
231  new_features_value = 0.;
232 
233  v_array<float> results = v_init<float>();
234 
235  if (all.permutations)
236  {
237  // just multiply precomputed values for all namespaces
238  for (std::string& inter : *ec.interactions)
239  {
240  size_t num_features_in_inter = 1;
241  float sum_feat_sq_in_inter = 1.;
242 
243  for (namespace_index ns : inter)
244  {
245  num_features_in_inter *= ec.feature_space[ns].size();
246  sum_feat_sq_in_inter *= ec.feature_space[ns].sum_feat_sq;
247  if (num_features_in_inter == 0)
248  break;
249  }
250 
251  if (num_features_in_inter == 0)
252  continue;
253 
254  new_features_cnt += num_features_in_inter;
255  new_features_value += sum_feat_sq_in_inter;
256  }
257  }
258  else // case of simple combinations
259  {
260 #ifdef DEBUG_EVAL_COUNT_OF_GEN_FT
261  size_t correct_features_cnt = 0;
262  float correct_features_value = 0.;
263  eval_gen_data dat(correct_features_cnt, correct_features_value);
264  generate_interactions<eval_gen_data, uint64_t, ft_cnt>(all, ec, dat);
265 #endif
266 
267  for (std::string& inter : *ec.interactions)
268  {
269  size_t num_features_in_inter = 1;
270  float sum_feat_sq_in_inter = 1.;
271 
272  for (auto ns = inter.begin(); ns != inter.end(); ++ns)
273  {
274  if ((ns == inter.end() - 1) || (*ns != *(ns + 1))) // neighbour namespaces are different
275  {
276  // just multiply precomputed values
277  const int nsc = *ns;
278  num_features_in_inter *= ec.feature_space[nsc].size();
279  sum_feat_sq_in_inter *= ec.feature_space[nsc].sum_feat_sq;
280  if (num_features_in_inter == 0)
281  break; // one of namespaces has no features - go to next interaction
282  }
283  else // we are at beginning of a block made of same namespace (interaction is preliminary sorted)
284  {
285  // let's find out real length of this block
286  size_t order_of_inter = 2; // alredy compared ns == ns+1
287 
288  for (auto ns_end = ns + 2; ns_end < inter.end(); ++ns_end)
289  if (*ns == *ns_end)
290  ++order_of_inter;
291 
292  // namespace is same for whole block
293  features& fs = ec.feature_space[(const int)*ns];
294 
295  // count number of features with value != 1.;
296  size_t cnt_ft_value_non_1 = 0;
297 
298  // in this block we shall calculate number of generated features and sum of their values
299  // keeping in mind rules applicable for simple combinations instead of permutations
300 
301  // let's calculate sum of their squared value for whole block
302 
303  // ensure results as big as order_of_inter and empty.
304  for (size_t i = 0; i < results.size(); ++i) results[i] = 0.;
305  while (results.size() < order_of_inter) results.push_back(0.);
306 
307  // recurrent value calculations
308  for (size_t i = 0; i < fs.size(); ++i)
309  {
310  const float x = fs.values[i] * fs.values[i];
311 
312  if (!PROCESS_SELF_INTERACTIONS(fs.values[i]))
313  {
314  for (size_t i = order_of_inter - 1; i > 0; --i) results[i] += results[i - 1] * x;
315 
316  results[0] += x;
317  }
318  else
319  {
320  results[0] += x;
321 
322  for (size_t i = 1; i < order_of_inter; ++i) results[i] += results[i - 1] * x;
323 
324  ++cnt_ft_value_non_1;
325  }
326  }
327 
328  sum_feat_sq_in_inter *= results[order_of_inter - 1]; // will be explained in http://bit.ly/1Hk9JX1
329 
330  // let's calculate the number of a new features
331 
332  // if number of features is less than order of interaction then go to the next interaction
333  // as you can't make simple combination of interaction 'aaa' if a contains < 3 features.
334  // unless one of them has value != 1. and we are counting them.
335  const size_t ft_size = fs.size();
336  if (cnt_ft_value_non_1 == 0 && ft_size < order_of_inter)
337  {
338  num_features_in_inter = 0;
339  break;
340  }
341 
342  size_t n;
343  if (cnt_ft_value_non_1 == 0) // number of generated simple combinations is C(n,k)
344  {
345  n = (size_t)choose((int64_t)ft_size, (int64_t)order_of_inter);
346  }
347  else
348  {
349  n = 0;
350  for (size_t l = 0; l <= order_of_inter; ++l)
351  {
352  // C(l+m-1, l) * C(n-m, k-l)
353  size_t num = (l == 0) ? 1 : (size_t)choose(l + cnt_ft_value_non_1 - 1, l);
354 
355  if (ft_size - cnt_ft_value_non_1 >= order_of_inter - l)
356  num *= (size_t)choose(ft_size - cnt_ft_value_non_1, order_of_inter - l);
357  else
358  num = 0;
359 
360  n += num;
361  }
362 
363  } // details on http://bit.ly/1Hk9JX1
364 
365  num_features_in_inter *= n;
366 
367  ns += order_of_inter - 1; // jump over whole block
368  }
369  }
370 
371  if (num_features_in_inter == 0)
372  continue; // signal that values should be ignored (as default value is 1)
373 
374  new_features_cnt += num_features_in_inter;
375  new_features_value += sum_feat_sq_in_inter;
376  }
377 
378 #ifdef DEBUG_EVAL_COUNT_OF_GEN_FT
379  if (correct_features_cnt != new_features_cnt)
380  all.trace_message << "Incorrect new features count " << new_features_cnt << " must be " << correct_features_cnt
381  << std::endl;
382  if (fabs(correct_features_value - new_features_value) > 1e-5)
383  all.trace_message << "Incorrect new features value " << new_features_value << " must be "
384  << correct_features_value << std::endl;
385 #endif
386  }
387 
388  results.delete_v();
389 }
std::vector< std::string > * interactions
the core definition of a set of features.
v_array< feature_value > values
size_t size() const
Definition: v_array.h:68
std::array< features, NUM_NAMESPACES > feature_space
size_t size() const
void push_back(const T &new_ele)
Definition: v_array.h:107
unsigned char namespace_index
#define PROCESS_SELF_INTERACTIONS(ft_value)
int64_t choose(int64_t n, int64_t k)
Definition: boosting.cc:41
bool permutations
Definition: global_data.h:454
void delete_v()
Definition: v_array.h:98

◆ expand_interactions()

std::vector< std::string > INTERACTIONS::expand_interactions ( const std::vector< std::string > &  vec,
const size_t  required_length,
const std::string &  err_msg 
)

Definition at line 56 of file interactions.cc.

References expand_namespaces_with_recursion(), and THROW.

Referenced by is_printable_namespace(), and parse_feature_tweaks().

58 {
59  std::vector<std::string> res;
60 
61  for (std::string const& i : vec)
62  {
63  const size_t len = i.length();
64  if (required_length > 0 && len != required_length)
65  // got strict requirement of interaction length and it was failed.
66  {
67  THROW(err_msg);
68  }
69  else if (len < 2)
70  // regardles of required_length value this check is always performed
71  THROW("error, feature interactions must involve at least two namespaces" << err_msg);
72 
73  std::string temp;
74  expand_namespaces_with_recursion(i, res, temp, 0);
75  }
76  return res;
77 }
void expand_namespaces_with_recursion(std::string const &ns, std::vector< std::string > &res, std::string &val, size_t pos)
Definition: interactions.cc:14
#define THROW(args)
Definition: vw_exception.h:181

◆ expand_namespaces_with_recursion()

void INTERACTIONS::expand_namespaces_with_recursion ( std::string const &  ns,
std::vector< std::string > &  res,
std::string &  val,
size_t  pos 
)

Definition at line 14 of file interactions.cc.

References printable_end, printable_start, and valid_ns().

Referenced by expand_interactions().

16 {
17  assert(pos <= ns.size());
18 
19  if (pos == ns.size())
20  {
21  // we're at the end of interaction
22 
23  // and store it in res
24  res.push_back(val);
25  // don't free s memory as it's data will be used later
26  }
27  else
28  {
29  // we're at the middle of interaction
30  if (ns[pos] != ':')
31  {
32  // not a wildcard
33  val.push_back(ns[pos]);
34  expand_namespaces_with_recursion(ns, res, val, pos + 1);
35  val.pop_back(); // i don't need value itself
36  }
37  else
38  {
39  for (unsigned char j = printable_start; j <= printable_end; ++j)
40  {
41  if (valid_ns(j))
42  {
43  val.push_back(j);
44  expand_namespaces_with_recursion(ns, res, val, pos + 1);
45  val.pop_back(); // i don't need value itself
46  }
47  }
48  }
49  }
50 }
constexpr unsigned char printable_end
Definition: interactions.h:17
constexpr unsigned char printable_start
Definition: interactions.h:16
bool valid_ns(char c)
Definition: example.h:111
void expand_namespaces_with_recursion(std::string const &ns, std::vector< std::string > &res, std::string &val, size_t pos)
Definition: interactions.cc:14

◆ factor()

size_t INTERACTIONS::factor ( const size_t  n,
const size_t  start_from = 1 
)
inline

Definition at line 214 of file interactions.cc.

215 {
216  if (n <= 0)
217  return 1;
218  if (start_from == 1 && n < size_fast_factorial)
219  return (size_t)fast_factorial[n];
220 
221  size_t res = 1;
222  for (size_t i = start_from + 1; i <= n; ++i) res *= i;
223  return res;
224 }
constexpr size_t size_fast_factorial
constexpr int64_t fast_factorial[]

◆ generate_interactions() [1/3]

template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func>
void INTERACTIONS::generate_interactions ( vw all,
example_predict ec,
R &  dat 
)
inline

Definition at line 45 of file interactions.h.

References parameters::dense_weights, example_predict::interactions, vw::permutations, parameters::sparse, parameters::sparse_weights, and vw::weights.

Referenced by audit_regressor().

46 {
47  if (all.weights.sparse)
48  generate_interactions<R, S, T, audit, audit_func, sparse_parameters>(
49  *ec.interactions, all.permutations, ec, dat, all.weights.sparse_weights);
50  else
51  generate_interactions<R, S, T, audit, audit_func, dense_parameters>(
52  *ec.interactions, all.permutations, ec, dat, all.weights.dense_weights);
53 }
parameters weights
Definition: global_data.h:537
std::vector< std::string > * interactions
dense_parameters dense_weights
sparse_parameters sparse_weights
bool permutations
Definition: global_data.h:454

◆ generate_interactions() [2/3]

template<class R , class S , void(*)(R &, float, S) T>
void INTERACTIONS::generate_interactions ( vw all,
example_predict ec,
R &  dat 
)
inline

Definition at line 57 of file interactions.h.

References parameters::dense_weights, vw::interactions, vw::permutations, parameters::sparse, parameters::sparse_weights, and vw::weights.

58 {
59  if (all.weights.sparse)
60  generate_interactions<R, S, T, sparse_parameters>(
61  all.interactions, all.permutations, ec, dat, all.weights.sparse_weights);
62  else
63  generate_interactions<R, S, T, dense_parameters>(
64  all.interactions, all.permutations, ec, dat, all.weights.dense_weights);
65 }
parameters weights
Definition: global_data.h:537
dense_parameters dense_weights
sparse_parameters sparse_weights
std::vector< std::string > interactions
Definition: global_data.h:457
bool permutations
Definition: global_data.h:454

◆ generate_interactions() [3/3]

template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func, class W >
void INTERACTIONS::generate_interactions ( std::vector< std::string > &  interactions,
bool  permutations,
example_predict ec,
R &  dat,
W &  weights 
)
inline

Definition at line 98 of file interactions_predict.h.

References v_array< T >::begin(), features::features_value_index_audit_range::begin(), v_array< T >::delete_v(), v_array< T >::end(), features::features_value_index_audit_range::end(), example_predict::feature_space, FNV_prime, INTERACTIONS::feature_gen_data::ft_arr, example_predict::ft_offset, INTERACTIONS::feature_gen_data::hash, features::indicies, INTERACTION_VALUE(), INTERACTIONS::feature_gen_data::loop_end, INTERACTIONS::feature_gen_data::loop_idx, features::nonempty(), PROCESS_SELF_INTERACTIONS, v_array< T >::push_back(), INTERACTIONS::feature_gen_data::self_interaction, v_array< T >::size(), features::space_names, features::values, features::values_indices_audit(), and INTERACTIONS::feature_gen_data::x.

101 {
102  features* features_data = ec.feature_space.data();
103 
104  // often used values
105  const uint64_t offset = ec.ft_offset;
106  // const uint64_t stride_shift = all.stride_shift; // it seems we don't need stride shift in FTRL-like hash
107 
108  // statedata for generic non-recursive iteration
109  v_array<feature_gen_data> state_data = v_init<feature_gen_data>();
110 
111  feature_gen_data empty_ns_data; // micro-optimization. don't want to call its constructor each time in loop.
112  empty_ns_data.loop_idx = 0;
113  empty_ns_data.x = 1.;
114  empty_ns_data.loop_end = 0;
115  empty_ns_data.self_interaction = false;
116 
117  // loop throw the set of possible interactions
118  for (auto& ns : interactions)
119  { // current list of namespaces to interact.
120 
121 #ifndef GEN_INTER_LOOP
122 
123  // unless GEN_INTER_LOOP is defined we use nested 'for' loops for interactions length 2 (pairs) and 3 (triples)
124  // and generic non-recursive algorythm for all other cases.
125  // nested 'for' loops approach is faster, but can't be used for interation of any length.
126 
127  const size_t len = ns.size();
128 
129  if (len == 2) // special case of pairs
130  {
131  features& first = features_data[(uint8_t)ns[0]];
132  if (first.nonempty())
133  {
134  features& second = features_data[(uint8_t)ns[1]];
135  if (second.nonempty())
136  {
137  const bool same_namespace = (!permutations && (ns[0] == ns[1]));
138 
139  for (size_t i = 0; i < first.indicies.size(); ++i)
140  {
141  feature_index halfhash = FNV_prime * (uint64_t)first.indicies[i];
142  if (audit)
143  audit_func(dat, first.space_names[i].get());
144  // next index differs for permutations and simple combinations
145  feature_value ft_value = first.values[i];
147  features::iterator_all begin = range.begin();
148  if (same_namespace)
149  begin += (PROCESS_SELF_INTERACTIONS(ft_value)) ? i : i + 1;
150 
151  features::iterator_all end = range.end();
152  inner_kernel<R, S, T, audit, audit_func>(dat, begin, end, offset, weights, ft_value, halfhash);
153 
154  if (audit)
155  audit_func(dat, nullptr);
156  } // end for(fst)
157  } // end if (data[snd] size > 0)
158  } // end if (data[fst] size > 0)
159  }
160  else if (len == 3) // special case for triples
161  {
162  features& first = features_data[(uint8_t)ns[0]];
163  if (first.nonempty())
164  {
165  features& second = features_data[(uint8_t)ns[1]];
166  if (second.nonempty())
167  {
168  features& third = features_data[(uint8_t)ns[2]];
169  if (third.nonempty())
170  { // don't compare 1 and 3 as interaction is sorted
171  const bool same_namespace1 = (!permutations && (ns[0] == ns[1]));
172  const bool same_namespace2 = (!permutations && (ns[1] == ns[2]));
173 
174  for (size_t i = 0; i < first.indicies.size(); ++i)
175  {
176  if (audit)
177  audit_func(dat, first.space_names[i].get());
178  const uint64_t halfhash1 = FNV_prime * (uint64_t)first.indicies[i];
179  const float& first_ft_value = first.values[i];
180  size_t j = 0;
181  if (same_namespace1) // next index differs for permutations and simple combinations
182  j = (PROCESS_SELF_INTERACTIONS(first_ft_value)) ? i : i + 1;
183 
184  for (; j < second.indicies.size(); ++j)
185  { // f3 x k*(f2 x k*f1)
186  if (audit)
187  audit_func(dat, second.space_names[j].get());
188  feature_index halfhash = FNV_prime * (halfhash1 ^ (uint64_t)second.indicies[j]);
189  feature_value ft_value = INTERACTION_VALUE(first_ft_value, second.values[j]);
190 
192  features::iterator_all begin = range.begin();
193  if (same_namespace2) // next index differs for permutations and simple combinations
194  begin += (PROCESS_SELF_INTERACTIONS(ft_value)) ? j : j + 1;
195 
196  features::iterator_all end = range.end();
197  inner_kernel<R, S, T, audit, audit_func>(dat, begin, end, offset, weights, ft_value, halfhash);
198  if (audit)
199  audit_func(dat, nullptr);
200  } // end for (snd)
201  if (audit)
202  audit_func(dat, nullptr);
203  } // end for (fst)
204 
205  } // end if (data[thr] size > 0)
206  } // end if (data[snd] size > 0)
207  } // end if (data[fst] size > 0)
208  }
209  else // generic case: quatriples, etc.
210 
211 #endif
212  {
213  bool must_skip_interaction = false;
214  // preparing state data
215  feature_gen_data* fgd = state_data.begin();
216  feature_gen_data* fgd2; // for further use
217  for (namespace_index n : ns)
218  {
219  features& ft = features_data[(int32_t)n];
220  const size_t ft_cnt = ft.indicies.size();
221 
222  if (ft_cnt == 0)
223  {
224  must_skip_interaction = true;
225  break;
226  }
227 
228  if (fgd == state_data.end())
229  {
230  state_data.push_back(empty_ns_data);
231  fgd = state_data.end() - 1; // reassign as memory could be realloced
232  }
233 
234  fgd->loop_end = ft_cnt - 1; // saving number of features for each namespace
235  fgd->ft_arr = &ft;
236  ++fgd;
237  }
238 
239  // if any of interacting namespace has 0 features - whole interaction is skipped
240  if (must_skip_interaction)
241  continue; // no_data_to_interact
242 
243  if (!permutations) // adjust state_data for simple combinations
244  { // if permutations mode is disabeled then namespaces in ns are already sorted and thus grouped
245  // (in fact, currently they are sorted even for enabled permutations mode)
246  // let's go throw the list and calculate number of features to skip in namespaces which
247  // repeated more than once to generate only simple combinations of features
248 
249  size_t margin = 0; // number of features to ignore if namespace has been seen before
250 
251  // iterate list backward as margin grows in this order
252 
253  for (fgd = state_data.end() - 1; fgd > state_data.begin(); --fgd)
254  {
255  fgd2 = fgd - 1;
256  fgd->self_interaction = (fgd->ft_arr == fgd2->ft_arr); // state_data.begin().self_interaction is always false
257  if (fgd->self_interaction)
258  {
259  size_t& loop_end = fgd2->loop_end;
260 
261  if (!PROCESS_SELF_INTERACTIONS((*fgd2->ft_arr).values[loop_end - margin]))
262  {
263  ++margin; // otherwise margin can't be increased
264  must_skip_interaction = loop_end < margin;
265  if (must_skip_interaction)
266  break;
267  }
268 
269  if (margin != 0)
270  loop_end -= margin; // skip some features and increase margin
271  }
272  else if (margin != 0)
273  margin = 0;
274  }
275 
276  // if impossible_without_permutations == true then we faced with case like interaction 'aaaa'
277  // where namespace 'a' contains less than 4 unique features. It's impossible to make simple
278  // combination of length 4 without repetitions from 3 or less elements.
279  if (must_skip_interaction)
280  continue; // impossible_without_permutations
281  } // end of state_data adjustment
282 
283  fgd = state_data.begin(); // always equal to first ns
284  fgd2 = state_data.end() - 1; // always equal to last ns
285  fgd->loop_idx = 0; // loop_idx contains current feature id for curently processed namespace.
286 
287  // beware: micro-optimization.
288  /* start & end are always point to features in last namespace of interaction.
289  for 'all.permutations == true' they are constant.*/
290  size_t start_i = 0;
291 
292  feature_gen_data* cur_data = fgd;
293  // end of micro-optimization block
294 
295  // generic feature generation cycle for interactions of any length
296  bool do_it = true;
297  while (do_it)
298  {
299  if (cur_data < fgd2) // can go further threw the list of namespaces in interaction
300  {
301  feature_gen_data* next_data = cur_data + 1;
302  size_t feature = cur_data->loop_idx;
303  features& fs = *(cur_data->ft_arr);
304 
305  if (next_data->self_interaction)
306  { // if next namespace is same, we should start with loop_idx + 1 to avoid feature interaction with itself
307  // unless feature has value x and x != x*x. E.g. x != 0 and x != 1. Features with x == 0 are already
308  // filtered out in parce_args.cc::maybeFeature().
309 
310  next_data->loop_idx =
311  (PROCESS_SELF_INTERACTIONS(fs.values[feature])) ? cur_data->loop_idx : cur_data->loop_idx + 1;
312  }
313  else
314  next_data->loop_idx = 0;
315 
316  if (audit)
317  audit_func(dat, fs.space_names[feature].get());
318 
319  if (cur_data == fgd) // first namespace
320  {
321  next_data->hash = FNV_prime * (uint64_t)fs.indicies[feature];
322  next_data->x = fs.values[feature]; // data->x == 1.
323  }
324  else
325  { // feature2 xor (16777619*feature1)
326  next_data->hash = FNV_prime * (cur_data->hash ^ (uint64_t)fs.indicies[feature]);
327  next_data->x = INTERACTION_VALUE(fs.values[feature], cur_data->x);
328  }
329 
330  ++cur_data;
331  }
332  else
333  { // last namespace - iterate its features and go back
334  if (!permutations) // start value is not a constant in this case
335  start_i = fgd2->loop_idx;
336 
337  features& fs = *(fgd2->ft_arr);
338 
339  feature_value ft_value = fgd2->x;
340  feature_index halfhash = fgd2->hash;
341 
343  features::iterator_all begin = range.begin();
344  begin += start_i;
345  features::iterator_all end = range.begin();
346  end += fgd2->loop_end + 1;
347  inner_kernel<R, S, T, audit, audit_func, W>(dat, begin, end, offset, weights, ft_value, halfhash);
348 
349  // trying to go back increasing loop_idx of each namespace by the way
350 
351  bool go_further = true;
352 
353  do
354  {
355  --cur_data;
356  go_further = (++cur_data->loop_idx > cur_data->loop_end); // increment loop_idx
357  if (audit)
358  audit_func(dat, nullptr);
359  } while (go_further && cur_data != fgd);
360 
361  do_it = !(cur_data == fgd && go_further);
362  // if do_it==false - we've reached 0 namespace but its 'cur_data.loop_idx > cur_data.loop_end' -> exit the
363  // while loop
364  } // if last namespace
365  } // while do_it
366  }
367  } // foreach interaction in all.interactions
368 
369  state_data.delete_v();
370 }
v_array< feature_index > indicies
features_value_index_audit_range values_indices_audit()
the core definition of a set of features.
float feature_value
Definition: feature_group.h:20
v_array< feature_value > values
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
std::array< features, NUM_NAMESPACES > feature_space
void push_back(const T &new_ele)
Definition: v_array.h:107
defines a "range" usable by C++ 11 for loops
bool nonempty() const
iterator over values, indicies and audit space names
unsigned char namespace_index
uint64_t feature_index
Definition: feature_group.h:21
#define PROCESS_SELF_INTERACTIONS(ft_value)
T *& end()
Definition: v_array.h:43
v_array< audit_strings_ptr > space_names
constexpr uint32_t FNV_prime
Definition: constant.h:17
float INTERACTION_VALUE(float value1, float value2)
void delete_v()
Definition: v_array.h:98

◆ inner_kernel()

template<class R , class S , void(*)(R &, float, S) T, bool audit, void(*)(R &, const audit_strings *) audit_func, class W >
void INTERACTIONS::inner_kernel ( R &  dat,
features::iterator_all begin,
features::iterator_all end,
const uint64_t  offset,
W &  weights,
feature_value  ft_value,
feature_index  halfhash 
)
inline

Definition at line 74 of file interactions_predict.h.

References features_value_index_audit_iterator::audit(), features_value_index_iterator::index(), INTERACTION_VALUE(), and features_value_iterator::value().

76 {
77  if (audit)
78  {
79  for (; begin != end; ++begin)
80  {
81  audit_func(dat, begin.audit().get());
82  call_T<R, T>(dat, weights, INTERACTION_VALUE(ft_value, begin.value()), (begin.index() ^ halfhash) + offset);
83  audit_func(dat, nullptr);
84  }
85  }
86  else
87  {
88  for (; begin != end; ++begin)
89  call_T<R, T>(dat, weights, INTERACTION_VALUE(ft_value, begin.value()), (begin.index() ^ halfhash) + offset);
90  }
91 }
float INTERACTION_VALUE(float value1, float value2)
feature_value & value()
Definition: feature_group.h:71

◆ INTERACTION_VALUE()

float INTERACTIONS::INTERACTION_VALUE ( float  value1,
float  value2 
)
inline

Definition at line 66 of file interactions_predict.h.

Referenced by generate_interactions(), and inner_kernel().

66 { return value1 * value2; }

◆ is_printable_namespace()

constexpr bool INTERACTIONS::is_printable_namespace ( const unsigned char  ns)
inline

Definition at line 22 of file interactions.h.

References eval_count_of_generated_ft(), expand_interactions(), printable_end, and sort_and_filter_duplicate_interactions().

Referenced by CCB::calculate_and_insert_interactions().

23 {
24  return ns >= printable_start && ns <= printable_end;
25 }
constexpr unsigned char printable_end
Definition: interactions.h:17
constexpr unsigned char printable_start
Definition: interactions.h:16

◆ must_be_left_sorted()

bool INTERACTIONS::must_be_left_sorted ( const std::string &  oi)
inline

Definition at line 87 of file interactions.cc.

Referenced by sort_and_filter_duplicate_interactions().

88 {
89  if (oi.size() <= 1)
90  return true; // one letter in std::string - no need to sort
91 
92  bool diff_ns_found = false;
93  bool pair_found = false;
94 
95  for (auto i = std::begin(oi); i != std::end(oi) - 1; ++i)
96  if (*i == *(i + 1)) // pair found
97  {
98  if (diff_ns_found)
99  return true; // case 'abb'
100  pair_found = true;
101  }
102  else
103  {
104  if (pair_found)
105  return true; // case 'aab'
106  diff_ns_found = true;
107  }
108 
109  return false; // 'aaa' or 'abc'
110 }

◆ sort_and_filter_duplicate_interactions()

void INTERACTIONS::sort_and_filter_duplicate_interactions ( std::vector< std::string > &  vec,
bool  filter_duplicates,
size_t &  removed_cnt,
size_t &  sorted_cnt 
)

Definition at line 116 of file interactions.cc.

References a, and must_be_left_sorted().

Referenced by is_printable_namespace(), and parse_feature_tweaks().

118 {
119  // 2 out parameters
120  removed_cnt = 0;
121  sorted_cnt = 0;
122 
123  // interaction value sort + original position
124  std::vector<std::pair<std::string, size_t>> vec_sorted;
125  for (size_t i = 0; i < vec.size(); ++i)
126  {
127  std::string sorted_i(vec[i]);
128  std::stable_sort(std::begin(sorted_i), std::end(sorted_i));
129  vec_sorted.push_back(std::make_pair(sorted_i, i));
130  }
131 
132  if (filter_duplicates)
133  {
134  // remove duplicates
135  std::stable_sort(vec_sorted.begin(), vec_sorted.end(),
136  [](std::pair<std::string, size_t> const& a, std::pair<std::string, size_t> const& b) {
137  return a.first < b.first;
138  });
139  auto last = unique(vec_sorted.begin(), vec_sorted.end(),
140  [](std::pair<std::string, size_t> const& a, std::pair<std::string, size_t> const& b) {
141  return a.first == b.first;
142  });
143  vec_sorted.erase(last, vec_sorted.end());
144 
145  // report number of removed interactions
146  removed_cnt = vec.size() - vec_sorted.size();
147 
148  // restore original order
149  std::stable_sort(vec_sorted.begin(), vec_sorted.end(),
150  [](std::pair<std::string, size_t> const& a, std::pair<std::string, size_t> const& b) {
151  return a.second < b.second;
152  });
153  }
154 
155  // we have original vector and vector with duplicates removed + corresponding indexes in original vector
156  // plus second vector's data is sorted. We can reuse it if we need interaction to be left sorted.
157  // let's make a new vector from these two sources - without dulicates and with sorted data whenever it's needed.
158  std::vector<std::string> res;
159  for (auto& i : vec_sorted)
160  {
161  if (must_be_left_sorted(i.first))
162  {
163  // if so - copy sorted data to result
164  res.push_back(i.first);
165  ++sorted_cnt;
166  }
167  else // else - move unsorted data to result
168  res.push_back(vec[i.second]);
169  }
170 
171  vec = res;
172 }
bool must_be_left_sorted(const std::string &oi)
Definition: interactions.cc:87
constexpr uint64_t a
Definition: rand48.cc:11

Variable Documentation

◆ fast_factorial

constexpr int64_t INTERACTIONS::fast_factorial[]
Initial value:
= {1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600,
6227020800, 87178291200, 1307674368000, 20922789888000, 355687428096000, 6402373705728000, 121645100408832000,
2432902008176640000}

Definition at line 203 of file interactions.cc.

◆ feature_self_interactions

constexpr bool INTERACTIONS::feature_self_interactions = true

Definition at line 23 of file interactions_predict.h.

◆ printable_end

constexpr unsigned char INTERACTIONS::printable_end = '~'

Definition at line 17 of file interactions.h.

Referenced by expand_namespaces_with_recursion(), and is_printable_namespace().

◆ printable_ns_size

constexpr unsigned char INTERACTIONS::printable_ns_size = printable_end - printable_start

Definition at line 18 of file interactions.h.

◆ printable_start

constexpr unsigned char INTERACTIONS::printable_start = ' '

◆ size_fast_factorial

constexpr size_t INTERACTIONS::size_fast_factorial = sizeof(fast_factorial) / sizeof(*fast_factorial)

Definition at line 206 of file interactions.cc.

◆ valid_ns_size

constexpr uint64_t INTERACTIONS::valid_ns_size
Initial value:
=
constexpr unsigned char printable_end
Definition: interactions.h:17
constexpr unsigned char printable_start
Definition: interactions.h:16

Definition at line 19 of file interactions.h.