Vowpal Wabbit
mf.cc
Go to the documentation of this file.
1 /*
2  Copyright (c) by respective owners including Yahoo!, Microsoft, and
3  individual contributors. All rights reserved. Released under a BSD (revised)
4  license as described in the file LICENSE.
5  */
6 #ifdef _WIN32
7 #define NOMINMAX
8 #include <winsock2.h>
9 #else
10 #include <netdb.h>
11 #endif
12 #include "reductions.h"
13 #include "gd.h"
14 
15 using namespace LEARNER;
16 using namespace VW::config;
17 
18 struct mf
19 {
20  std::vector<std::string> pairs;
21 
22  size_t rank;
23 
24  uint32_t increment;
25 
26  // array to cache w*x, (l^k * x_l) and (r^k * x_r)
27  // [ w*(1,x_l,x_r) , l^1*x_l, r^1*x_r, l^2*x_l, r^2*x_2, ... ]
29 
30  // array for temp storage of indices during prediction
32 
33  // array for temp storage of indices
35 
36  // array for temp storage of features
38 
39  vw* all; // for pairs? and finalize
40 
41  ~mf()
42  {
43  // clean up local v_arrays
44  indices.delete_v();
45  sub_predictions.delete_v();
46  }
47 };
48 
49 template <bool cache_sub_predictions>
50 void predict(mf& data, single_learner& base, example& ec)
51 {
52  float prediction = 0;
53  if (cache_sub_predictions)
54  data.sub_predictions.resize(2 * data.rank + 1);
55 
56  // predict from linear terms
57  base.predict(ec);
58 
59  // store linear prediction
60  if (cache_sub_predictions)
62  prediction += ec.partial_prediction;
63 
64  // store namespace indices
66 
67  // erase indices
68  ec.indices.clear();
69  ec.indices.push_back(0);
70 
71  // add interaction terms to prediction
72  for (std::string& i : data.pairs)
73  {
74  int left_ns = (int)i[0];
75  int right_ns = (int)i[1];
76 
77  if (ec.feature_space[left_ns].size() > 0 && ec.feature_space[right_ns].size() > 0)
78  {
79  for (size_t k = 1; k <= data.rank; k++)
80  {
81  ec.indices[0] = left_ns;
82 
83  // compute l^k * x_l using base learner
84  base.predict(ec, k);
85  float x_dot_l = ec.partial_prediction;
86  if (cache_sub_predictions)
87  data.sub_predictions[2 * k - 1] = x_dot_l;
88 
89  // set example to right namespace only
90  ec.indices[0] = right_ns;
91 
92  // compute r^k * x_r using base learner
93  base.predict(ec, k + data.rank);
94  float x_dot_r = ec.partial_prediction;
95  if (cache_sub_predictions)
96  data.sub_predictions[2 * k] = x_dot_r;
97 
98  // accumulate prediction
99  prediction += (x_dot_l * x_dot_r);
100  }
101  }
102  }
103  // restore namespace indices and label
105 
106  // finalize prediction
107  ec.partial_prediction = prediction;
109 }
110 
111 void learn(mf& data, single_learner& base, example& ec)
112 {
113  // predict with current weights
114  predict<true>(data, base, ec);
115  float predicted = ec.pred.scalar;
116 
117  // update linear weights
118  base.update(ec);
120 
121  // store namespace indices
122  copy_array(data.indices, ec.indices);
123 
124  // erase indices
125  ec.indices.clear();
126  ec.indices.push_back(0);
127 
128  // update interaction terms
129  // looping over all pairs of non-empty namespaces
130  for (std::string& i : data.pairs)
131  {
132  int left_ns = (int)i[0];
133  int right_ns = (int)i[1];
134 
135  if (ec.feature_space[left_ns].size() > 0 && ec.feature_space[right_ns].size() > 0)
136  {
137  // set example to left namespace only
138  ec.indices[0] = left_ns;
139 
140  // store feature values in left namespace
141  data.temp_features.deep_copy_from(ec.feature_space[left_ns]);
142 
143  for (size_t k = 1; k <= data.rank; k++)
144  {
145  features& fs = ec.feature_space[left_ns];
146  // multiply features in left namespace by r^k * x_r
147  for (size_t i = 0; i < fs.size(); ++i) fs.values[i] *= data.sub_predictions[2 * k];
148 
149  // update l^k using base learner
150  base.update(ec, k);
151 
152  // restore left namespace features (undoing multiply)
153  fs.deep_copy_from(data.temp_features);
154 
155  // compute new l_k * x_l scaling factors
156  // base.predict(ec, k);
157  // data.sub_predictions[2*k-1] = ec.partial_prediction;
158  // ec.pred.scalar = ec.updated_prediction;
159  }
160 
161  // set example to right namespace only
162  ec.indices[0] = right_ns;
163 
164  // store feature values for right namespace
165  data.temp_features.deep_copy_from(ec.feature_space[right_ns]);
166 
167  for (size_t k = 1; k <= data.rank; k++)
168  {
169  features& fs = ec.feature_space[right_ns];
170  // multiply features in right namespace by l^k * x_l
171  for (size_t i = 0; i < fs.size(); ++i) fs.values[i] *= data.sub_predictions[2 * k - 1];
172 
173  // update r^k using base learner
174  base.update(ec, k + data.rank);
176 
177  // restore right namespace features
178  fs.deep_copy_from(data.temp_features);
179  }
180  }
181  }
182  // restore namespace indices
183  copy_array(ec.indices, data.indices);
184 
185  // restore original prediction
186  ec.pred.scalar = predicted;
187 }
188 
189 void finish(mf& o)
190 {
191  // restore global pairs
192  o.all->pairs = o.pairs;
193 }
194 
196 {
197  auto data = scoped_calloc_or_throw<mf>();
198  option_group_definition new_options("Matrix Factorization Reduction");
199  new_options.add(make_option("new_mf", data->rank).keep().help("rank for reduction-based matrix factorization"));
200  options.add_and_parse(new_options);
201 
202  if (!options.was_supplied("new_mf"))
203  return nullptr;
204 
205  data->all = &all;
206  // store global pairs in local data structure and clear global pairs
207  // for eventual calls to base learner
208  data->pairs = all.pairs;
209  all.pairs.clear();
210 
211  all.random_positive_weights = true;
212 
214  init_learner(data, as_singleline(setup_base(options, all)), learn, predict<false>, 2 * data->rank + 1);
215  l.set_finish(finish);
216  return make_base(l);
217 }
void resize(size_t length)
Definition: v_array.h:69
float finalize_prediction(shared_data *sd, float ret)
Definition: gd.cc:339
v_array< namespace_index > indices
void learn(mf &data, single_learner &base, example &ec)
Definition: mf.cc:111
void predict(E &ec, size_t i=0)
Definition: learner.h:169
void deep_copy_from(const features &src)
void finish(mf &o)
Definition: mf.cc:189
std::vector< std::string > pairs
Definition: global_data.h:459
float scalar
Definition: example.h:45
bool random_positive_weights
Definition: global_data.h:493
void copy_array(v_array< T > &dst, const v_array< T > &src)
Definition: v_array.h:185
the core definition of a set of features.
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
float partial_prediction
Definition: example.h:68
v_array< feature_value > values
v_array< float > sub_predictions
Definition: mf.cc:28
virtual void add_and_parse(const option_group_definition &group)=0
float updated_prediction
Definition: example.h:69
v_array< unsigned char > predict_indices
Definition: mf.cc:31
std::vector< std::string > pairs
Definition: mf.cc:20
std::array< features, NUM_NAMESPACES > feature_space
single_learner * as_singleline(learner< T, E > *l)
Definition: learner.h:476
size_t size() const
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void push_back(const T &new_ele)
Definition: v_array.h:107
shared_data * sd
Definition: global_data.h:375
void clear()
Definition: v_array.h:88
virtual bool was_supplied(const std::string &key)=0
size_t rank
Definition: mf.cc:22
base_learner * mf_setup(options_i &options, vw &all)
Definition: mf.cc:195
features temp_features
Definition: mf.cc:37
void predict(mf &data, single_learner &base, example &ec)
Definition: mf.cc:50
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
void set_finish(void(*f)(T &))
Definition: learner.h:265
uint32_t increment
Definition: mf.cc:24
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
v_array< unsigned char > indices
Definition: mf.cc:34
polyprediction pred
Definition: example.h:60
void delete_v()
Definition: v_array.h:98
vw * all
Definition: mf.cc:39
Definition: mf.cc:18
~mf()
Definition: mf.cc:41