Vowpal Wabbit
interactions.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include "global_data.h"
4 #include "interactions_predict.h"
5 
6 /*
7  * Interactions preprocessing and feature combinations generation
8  */
9 
10 namespace INTERACTIONS
11 {
12 /*
13  * Interactions preprocessing
14  */
15 
16 constexpr unsigned char printable_start = ' ';
17 constexpr unsigned char printable_end = '~';
18 constexpr unsigned char printable_ns_size = printable_end - printable_start;
19 constexpr uint64_t valid_ns_size =
20  printable_end - printable_start - 1; // -1 to skip characters ':' and '|' excluded in is_valid_ns()
21 
22 inline constexpr bool is_printable_namespace(const unsigned char ns)
23 {
24  return ns >= printable_start && ns <= printable_end;
25 }
26 
27 // exand all wildcard namespaces in vector<string>
28 // req_length must be 0 if interactions of any length are allowed, otherwise contains required length
29 // err_msg will be printed plus exception will be thrown if req_length != 0 and mismatch interaction length.
30 std::vector<std::string> expand_interactions(
31  const std::vector<std::string>& vec, const size_t required_length, const std::string& err_msg);
32 
33 // remove duplicate interactions and sort namespaces in them (if required)
35  std::vector<std::string>& vec, bool filter_duplicates, size_t& removed_cnt, size_t& sorted_cnt);
36 
37 /*
38  * Feature combinations generation
39  */
40 
41 // function estimates how many new features will be generated for example and ther sum(value^2).
42 void eval_count_of_generated_ft(vw& all, example& ec, size_t& new_features_cnt, float& new_features_value);
43 
44 template <class R, class S, void (*T)(R&, float, S), bool audit, void (*audit_func)(R&, const audit_strings*)>
45 inline void generate_interactions(vw& all, example_predict& ec, R& dat)
46 {
47  if (all.weights.sparse)
48  generate_interactions<R, S, T, audit, audit_func, sparse_parameters>(
49  *ec.interactions, all.permutations, ec, dat, all.weights.sparse_weights);
50  else
51  generate_interactions<R, S, T, audit, audit_func, dense_parameters>(
52  *ec.interactions, all.permutations, ec, dat, all.weights.dense_weights);
53 }
54 
55 // this code is for C++98/03 complience as I unable to pass null function-pointer as template argument in g++-4.6
56 template <class R, class S, void (*T)(R&, float, S)>
57 inline void generate_interactions(vw& all, example_predict& ec, R& dat)
58 {
59  if (all.weights.sparse)
60  generate_interactions<R, S, T, sparse_parameters>(
61  all.interactions, all.permutations, ec, dat, all.weights.sparse_weights);
62  else
63  generate_interactions<R, S, T, dense_parameters>(
64  all.interactions, all.permutations, ec, dat, all.weights.dense_weights);
65 }
66 
67 // C(n,k) = n!/(k!(n-k)!)
68 
69 inline int64_t choose(int64_t n, int64_t k)
70 {
71  if (k > n)
72  return 0;
73  if (k < 0)
74  return 0;
75  if (k == n)
76  return 1;
77  if (k == 0 && n != 0)
78  return 1;
79  int64_t r = 1;
80  for (int64_t d = 1; d <= k; ++d)
81  {
82  r *= n--;
83  r /= d;
84  }
85  return r;
86 }
87 
88 } // namespace INTERACTIONS
parameters weights
Definition: global_data.h:537
constexpr unsigned char printable_end
Definition: interactions.h:17
constexpr uint64_t valid_ns_size
Definition: interactions.h:19
void eval_count_of_generated_ft(vw &all, example &ec, size_t &new_features_cnt, float &new_features_value)
std::vector< std::string > * interactions
constexpr bool is_printable_namespace(const unsigned char ns)
Definition: interactions.h:22
int64_t choose(int64_t n, int64_t k)
Definition: interactions.h:69
void generate_interactions(vw &all, example_predict &ec, R &dat)
Definition: interactions.h:45
constexpr unsigned char printable_start
Definition: interactions.h:16
std::vector< std::string > expand_interactions(const std::vector< std::string > &vec, const size_t required_length, const std::string &err_msg)
Definition: interactions.cc:56
dense_parameters dense_weights
sparse_parameters sparse_weights
std::vector< std::string > interactions
Definition: global_data.h:457
constexpr unsigned char printable_ns_size
Definition: interactions.h:18
bool permutations
Definition: global_data.h:454
void sort_and_filter_duplicate_interactions(std::vector< std::string > &vec, bool filter_duplicates, size_t &removed_cnt, size_t &sorted_cnt)