Vowpal Wabbit
search_graph.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include "search_graph.h"
7 #include "vw.h"
8 #include "gd.h"
9 #include "vw_exception.h"
10 
11 using namespace VW::config;
12 
13 /*
14 example format:
15 
16 ALL NODES
17 ALL EDGES
18 <blank>
19 ALL NODES
20 ALL EDGES
21 <blank>
22 
23 and so on
24 
25 node lines look like normal vw examples with unary features:
26 
27 label:weight |n features
28 label:weight |n features
29 ...
30 
31 they are *implicitly* labeled starting at 1. (note the namespace
32 needn't be called n.) if weight is omitted it is assumed to be 1.0.
33 
34 edge lines look like:
35 
36 n1 n2 n3 ... |e features
37 n1 n2 n3 ... |e features
38 ...
39 
40 here, n1 n2 n3 are integer node ids, starting at one. technically
41 these are hyperedges, since they can touch more than two nodes. in the
42 canonical representation, there will just be n1 and n2.
43 
44 the only thing that differentiates edges from nodes is that edges have
45 >1 input.
46 
47 edges can either be directed or undirected. the default is undirected,
48 in which case the order of nodes listed in the edge is irrelevant. if
49 you add --search_graph_directed then the edges will be interpreted as
50 directed edges. for simple edges that connect two nodes, such as "n1
51 n2 | ...", the direction will be n1-->n2. for hyperedges of size K
52 such as "n1 n2 n3 n4 | ...", the default assumption is that the edge
53 has K-1 sources and 1 sink, so {n1,n2,n3}-->n4 in this case. if you
54 desire hyper-edges with more than one sink, you can specify them by
55 separating the source nodes from the sink nodes with a "0". for
56 instance "n1 n2 n3 0 n4 | ..." is the same as "n1 n2 n3 n4 | ...", but
57 "n1 n2 0 n3 n4 | ..." means {n1,n2}-->{n3,n4}. in undirected mode, all
58 0s are ignored; in directed mode, all but the first are ignored.
59 */
60 
61 namespace GraphTask
62 {
64 
65 struct task_data
66 {
67  // global data
68  size_t num_loops;
69  size_t K; // number of labels, *NOT* including the +1 for 'unlabeled'
70  size_t numN; // number of neighbor predictions (equals K+1 for undirected, or 2*(K+1) for directed)
73  bool directed;
74 
75  // for adding new features
76  uint64_t mask; // all->reg.weight_mask
77  uint64_t multiplier; // all.wpp << all.stride_shift
78  size_t ss; // stride_shift
79  size_t wpp;
80 
81  // per-example data
82  uint32_t N; // number of nodes
83  uint32_t E; // number of edges
84  std::vector<std::vector<size_t>> adj; // adj[n] is a vector of *edge example ids* that contain n
85  std::vector<uint32_t> bfs; // order of nodes to process
86  std::vector<size_t> pred; // predictions
87  example* cur_node; // pointer to the current node for add_edge_features_fn
88  float* neighbor_predictions; // prediction on this neighbor for add_edge_features_fn
89  uint32_t* confusion_matrix;
90  float* true_counts;
92 };
93 
94 inline bool example_is_test(polylabel& l) { return l.cs.costs.size() == 0; }
95 
96 void initialize(Search::search& sch, size_t& num_actions, options_i& options)
97 {
98  task_data* D = new task_data();
99 
100  option_group_definition new_options("search graphtask options");
101  new_options
102  .add(make_option("search_graph_num_loops", D->num_loops).default_value(2).help("how many loops to run [def: 2]"))
103  .add(make_option("search_graph_no_structure", D->use_structure).help("turn off edge features"))
104  .add(make_option("search_graph_separate_learners", D->separate_learners)
105  .help("use a different learner for each pass"))
106  .add(make_option("search_graph_directed", D->directed)
107  .help("construct features based on directed graph semantics"));
108  options.add_and_parse(new_options);
109 
110  D->use_structure = !D->use_structure;
111 
112  if (D->num_loops <= 1)
113  {
114  D->num_loops = 1;
115  D->separate_learners = false;
116  }
117 
118  D->K = num_actions;
119  D->numN = (D->directed + 1) * (D->K + 1);
120  std::cerr << "K=" << D->K << ", numN=" << D->numN << std::endl;
121  D->neighbor_predictions = calloc_or_throw<float>(D->numN);
122 
123  D->confusion_matrix = calloc_or_throw<uint32_t>((D->K + 1) * (D->K + 1));
124  D->true_counts = calloc_or_throw<float>(D->K + 1);
125  D->true_counts_total = (float)(D->K + 1);
126  for (size_t k = 0; k <= D->K; k++) D->true_counts[k] = 1.;
127 
128  if (D->separate_learners)
129  sch.set_num_learners(D->num_loops);
130 
131  sch.set_task_data<task_data>(D);
132  sch.set_options(0); // Search::AUTO_HAMMING_LOSS
134 }
135 
137 {
138  task_data* D = sch.get_task_data<task_data>();
139  free(D->neighbor_predictions);
140  free(D->confusion_matrix);
141  free(D->true_counts);
142  delete D;
143 }
144 
145 inline bool example_is_edge(example* e) { return e->l.cs.costs.size() > 1; }
146 
148 {
149  D.bfs.clear();
150  std::vector<bool> touched;
151  for (size_t n = 0; n < D.N; n++) touched.push_back(false);
152 
153  touched[0] = true;
154  D.bfs.push_back(0);
155 
156  size_t i = 0;
157  while (D.bfs.size() < D.N)
158  {
159  while (i < D.bfs.size())
160  {
161  uint32_t n = D.bfs[i];
162  for (size_t id : D.adj[n])
163  for (size_t j = 0; j < ec[id]->l.cs.costs.size(); j++)
164  {
165  uint32_t m = ec[id]->l.cs.costs[j].class_index;
166  if ((m > 0) && (!touched[m - 1]))
167  {
168  D.bfs.push_back(m - 1);
169  touched[m - 1] = true;
170  }
171  }
172  i++;
173  }
174 
175  if (D.bfs.size() < D.N)
176  // we finished a SCC, need to find another
177  for (uint32_t n = 0; n < D.N; n++)
178  if (!touched[n])
179  {
180  touched[n] = true;
181  D.bfs.push_back(n);
182  break;
183  }
184  }
185 }
186 
188 {
189  task_data& D = *sch.get_task_data<task_data>();
190  D.multiplier = D.wpp << D.ss;
191  D.wpp = sch.get_vw_pointer_unsafe().wpp;
194  D.N = 0;
195  D.E = 0;
196  for (size_t i = 0; i < ec.size(); i++)
197  if (example_is_edge(ec[i]))
198  D.E++;
199  else // it's a node!
200  {
201  if (D.E > 0)
202  THROW("error: got a node after getting edges!");
203 
204  D.N++;
205  if (ec[i]->l.cs.costs.size() > 0)
206  {
207  D.true_counts[ec[i]->l.cs.costs[0].class_index] += 1.;
208  D.true_counts_total += 1.;
209  }
210  }
211 
212  if ((D.N == 0) && (D.E > 0))
213  THROW("error: got edges without any nodes (perhaps ring_size is too small?)!");
214 
215  D.adj = std::vector<std::vector<size_t>>(D.N, std::vector<size_t>(0));
216 
217  for (size_t i = D.N; i < ec.size(); i++)
218  {
219  for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
220  {
221  if (ec[i]->l.cs.costs[n].class_index > D.N)
222  THROW("error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) << " > "
223  << D.N);
224  }
225  for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
226  {
227  size_t nn = ec[i]->l.cs.costs[n].class_index;
228  if ((nn > 0) &&
229  (((D.adj[nn - 1].size() == 0) || (D.adj[nn - 1][D.adj[nn - 1].size() - 1] != i)))) // don't allow dups
230  D.adj[nn - 1].push_back(i);
231  }
232  }
233 
234  run_bfs(D, ec);
235 
236  D.pred.clear();
237  for (size_t n = 0; n < D.N; n++) D.pred.push_back(D.K + 1);
238 }
239 
240 void takedown(Search::search& sch, multi_ex& /*ec*/)
241 {
242  task_data& D = *sch.get_task_data<task_data>();
243  D.bfs.clear();
244  D.pred.clear();
245  for (auto x : D.adj) x.clear();
246  D.adj.clear();
247 }
248 
249 void add_edge_features_group_fn(task_data& D, float fv, uint64_t fx)
250 {
251  example* node = D.cur_node;
252  uint64_t fx2 = fx / (uint64_t)D.multiplier;
253  for (size_t k = 0; k < D.numN; k++)
254  {
255  if (D.neighbor_predictions[k] == 0.)
256  continue;
257  node->feature_space[neighbor_namespace].push_back(
258  fv * D.neighbor_predictions[k], (uint64_t)((fx2 + 348919043 * k) * D.multiplier) & (uint64_t)D.mask);
259  }
260 }
261 
262 void add_edge_features_single_fn(task_data& D, float fv, uint64_t fx)
263 {
264  example* node = D.cur_node;
266  uint64_t fx2 = fx / (uint64_t)D.multiplier;
267  size_t k = (size_t)D.neighbor_predictions[0];
268  fs.push_back(fv, (uint32_t)((fx2 + 348919043 * k) * D.multiplier) & (uint64_t)D.mask);
269 }
270 
271 void add_edge_features(Search::search& sch, task_data& D, size_t n, multi_ex& ec)
272 {
273  D.cur_node = ec[n];
274 
275  for (size_t i : D.adj[n])
276  {
277  for (size_t k = 0; k < D.numN; k++) D.neighbor_predictions[k] = 0.;
278 
279  float pred_total = 0.;
280  uint32_t last_pred = 0;
281  if (D.use_structure)
282  {
283  bool n_in_sink = true;
284  if (D.directed)
285  for (size_t j = 0; j < ec[i]->l.cs.costs.size() - 1; j++)
286  {
287  size_t m = ec[i]->l.cs.costs[j].class_index;
288  if (m == 0)
289  break;
290  if (m - 1 == n)
291  {
292  n_in_sink = false;
293  break;
294  }
295  }
296 
297  bool m_in_sink = false;
298  for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
299  {
300  size_t m = ec[i]->l.cs.costs[j].class_index;
301  if (m == 0)
302  {
303  m_in_sink = true;
304  continue;
305  }
306  if (j == ec[i]->l.cs.costs.size() - 1)
307  m_in_sink = true;
308  m--;
309  if (m == n)
310  continue;
311  size_t other_side = (D.directed && (n_in_sink != m_in_sink)) ? (D.K + 1) : 0;
312  D.neighbor_predictions[D.pred[m] - 1 + other_side] += 1.;
313  pred_total += 1.;
314  last_pred = (uint32_t)D.pred[m] - 1 + (uint32_t)other_side;
315  }
316  }
317  else
318  {
319  D.neighbor_predictions[0] += 1.;
320  pred_total += 1.;
321  last_pred = 0;
322  }
323 
324  if (pred_total == 0.)
325  continue;
326  // std::cerr << n << ':' << i << " -> ["; for (size_t k=0; k<D.numN; k++) std::cerr << ' ' <<
327  // D.neighbor_predictions[k]; std::cerr
328  // << " ]" << std::endl;
329  for (size_t k = 0; k < D.numN; k++) D.neighbor_predictions[k] /= pred_total;
330  example& edge = *ec[i];
331 
332  if (pred_total <= 1.) // single edge
333  {
334  D.neighbor_predictions[0] = (float)last_pred;
335  GD::foreach_feature<task_data, uint64_t, add_edge_features_single_fn>(sch.get_vw_pointer_unsafe(), edge, D);
336  }
337  else // lots of edges
338  GD::foreach_feature<task_data, uint64_t, add_edge_features_group_fn>(sch.get_vw_pointer_unsafe(), edge, D);
339  }
340  ec[n]->indices.push_back(neighbor_namespace);
341  ec[n]->total_sum_feat_sq += ec[n]->feature_space[neighbor_namespace].sum_feat_sq;
342  ec[n]->num_features += ec[n]->feature_space[neighbor_namespace].size();
343 
344  vw& all = sch.get_vw_pointer_unsafe();
345  for (std::string& i : all.pairs)
346  {
347  int i0 = (int)i[0];
348  int i1 = (int)i[1];
349  if ((i0 == (int)neighbor_namespace) || (i1 == (int)neighbor_namespace))
350  {
351  ec[n]->num_features += ec[n]->feature_space[i0].size() * ec[n]->feature_space[i1].size();
352  ec[n]->total_sum_feat_sq += ec[n]->feature_space[i0].sum_feat_sq * ec[n]->feature_space[i1].sum_feat_sq;
353  }
354  }
355 }
356 
357 void del_edge_features(task_data& /*D*/, uint32_t n, multi_ex& ec)
358 {
359  ec[n]->indices.pop();
360  features& fs = ec[n]->feature_space[neighbor_namespace];
361  ec[n]->total_sum_feat_sq -= fs.sum_feat_sq;
362  ec[n]->num_features -= fs.size();
363  fs.clear();
364 }
365 
366 #define IDX(i, j) ((i) * (D.K + 1) + j)
367 
369 {
370  float total_f1 = 0.;
371  float count_f1 = 0.;
372  for (size_t k = 1; k <= D.K; k++)
373  {
374  float trueC = 0.;
375  float predC = 0.;
376  for (size_t j = 1; j <= D.K; j++)
377  {
378  trueC += (float)D.confusion_matrix[IDX(k, j)];
379  predC += (float)D.confusion_matrix[IDX(j, k)];
380  }
381  if (trueC == 0)
382  continue;
383  float correctC = (float)D.confusion_matrix[IDX(k, k)];
384  count_f1++;
385  if (correctC > 0)
386  {
387  float pre = correctC / predC;
388  float rec = correctC / trueC;
389  total_f1 += 2 * pre * rec / (pre + rec);
390  }
391  }
392  return total_f1 / count_f1;
393 }
394 
395 void run(Search::search& sch, multi_ex& ec)
396 {
397  task_data& D = *sch.get_task_data<task_data>();
398  float loss_val = 0.5f / (float)D.num_loops;
399  for (size_t n = 0; n < D.N; n++) D.pred[n] = D.K + 1;
400 
401  for (size_t loop = 0; loop < D.num_loops; loop++)
402  {
403  bool last_loop = loop == (D.num_loops - 1);
404  int start = 0;
405  int end = D.N;
406  int step = 1;
407  if (loop % 2 == 1)
408  {
409  start = D.N - 1;
410  end = -1;
411  step = -1;
412  } // go inward on odd loops
413  for (int n_id = start; n_id != end; n_id += step)
414  {
415  uint32_t n = D.bfs[n_id];
416  uint32_t k = (ec[n]->l.cs.costs.size() > 0) ? ec[n]->l.cs.costs[0].class_index : 0;
417 
418  bool add_features = /* D.use_structure && */ sch.predictNeedsExample();
419  // add_features = false;
420 
421  if (add_features)
422  add_edge_features(sch, D, n, ec);
423  Search::predictor P = Search::predictor(sch, n + 1);
424  P.set_input(*ec[n]);
425  if (false && (k > 0))
426  {
427  float min_count = 1e12f;
428  for (size_t k2 = 1; k2 <= D.K; k2++) min_count = std::min(min_count, D.true_counts[k2]);
429  float w = min_count / D.true_counts[k];
430  // float w = D.true_counts_total / D.true_counts[k] / (float)(D.K);
431  P.set_weight(w);
432  // std::cerr << "w = " << D.true_counts_total / D.true_counts[k] / (float)(D.K) << std::endl;
433  // P.set_weight( D.true_counts_total / D.true_counts[k] / (float)(D.K) );
434  }
435  if (D.separate_learners)
436  P.set_learner_id(loop);
437  if (k > 0) // for test examples
438  P.set_oracle(k);
439  // add all the conditioning
440  for (size_t i = 0; i < D.adj[n].size(); i++)
441  {
442  for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
443  {
444  uint32_t m = ec[i]->l.cs.costs[j].class_index;
445  if (m == 0)
446  continue;
447  m--;
448  if (m == n)
449  continue;
450  P.add_condition(m + 1, 'e');
451  }
452  }
453 
454  // make the prediction
455  D.pred[n] = P.predict();
456  if (ec[n]->l.cs.costs.size() > 0) // for test examples
457  sch.loss((ec[n]->l.cs.costs[0].class_index == D.pred[n]) ? 0.f : (last_loop ? 0.5f : loss_val));
458 
459  if (add_features)
460  del_edge_features(D, n, ec);
461  }
462  }
463 
464  for (uint32_t n = 0; n < D.N; n++) D.confusion_matrix[IDX(ec[n]->l.cs.costs[0].class_index, D.pred[n])]++;
465  sch.loss(1.f - macro_f(D));
466 
467  if (sch.output().good())
468  for (uint32_t n = 0; n < D.N; n++) sch.output() << D.pred[n] << ' ';
469 }
470 } // namespace GraphTask
predictor & set_oracle(action a)
Definition: search.cc:3314
parameters weights
Definition: global_data.h:537
vw * setup(options_i &options)
Definition: main.cc:27
std::vector< std::string > pairs
Definition: global_data.h:459
void push_back(feature_value v, feature_index i)
void set_label_parser(label_parser &lp, bool(*is_test)(polylabel &))
Definition: search.cc:3079
label_parser cs_label
std::stringstream & output()
Definition: search.cc:3043
predictor & add_condition(ptag tag, char name)
Definition: search.cc:3414
constexpr unsigned char neighbor_namespace
Definition: constant.h:25
the core definition of a set of features.
float * neighbor_predictions
Definition: search_graph.cc:88
void add_edge_features_single_fn(task_data &D, float fv, uint64_t fx)
virtual void add_and_parse(const option_group_definition &group)=0
predictor & set_weight(float w)
Definition: search.cc:3324
action predict()
Definition: search.cc:3460
void add_edge_features(Search::search &sch, task_data &D, size_t n, multi_ex &ec)
void finish(vw &all, bool delete_all)
Definition: parse_args.cc:1823
T * get_task_data()
Definition: search.h:89
std::vector< std::vector< size_t > > adj
Definition: search_graph.cc:84
#define IDX(i, j)
uint32_t * confusion_matrix
Definition: search_graph.cc:89
std::array< features, NUM_NAMESPACES > feature_space
size_t size() const
vw & get_vw_pointer_unsafe()
Definition: search.cc:3115
COST_SENSITIVE::label cs
Definition: example.h:30
float id(float in)
Definition: scorer.cc:51
std::vector< uint32_t > bfs
Definition: search_graph.cc:85
Definition: nn.cc:24
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
Definition: parse_args.cc:1654
void clear()
void del_edge_features(task_data &, uint32_t n, multi_ex &ec)
void set_options(uint32_t opts)
Definition: search.cc:3053
predictor & set_learner_id(size_t id)
Definition: search.cc:3448
uint32_t wpp
Definition: global_data.h:432
bool example_is_edge(example *e)
option_group_definition & add(T &&op)
Definition: options.h:90
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
std::vector< example * > multi_ex
Definition: example.h:122
polylabel l
Definition: example.h:57
void set_task_data(T *data)
Definition: search.h:84
Search::search_task task
Definition: search_graph.cc:63
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
float sum_feat_sq
void add_edge_features_group_fn(task_data &D, float fv, uint64_t fx)
uint32_t stride_shift()
bool example_is_test(polylabel &l)
Definition: search_graph.cc:94
bool predictNeedsExample()
Definition: search.cc:3041
v_array< wclass > costs
void takedown(Search::search &sch, multi_ex &)
uint64_t mask()
#define THROW(args)
Definition: vw_exception.h:181
void set_num_learners(size_t num_learners)
Definition: search.cc:3094
std::vector< size_t > pred
Definition: search_graph.cc:86
predictor & set_input(example &input_example)
Definition: search.cc:3173
void run(Search::search &sch, multi_ex &ec)
float f
Definition: cache.cc:40
void run_bfs(task_data &D, multi_ex &ec)
void loss(float incr_loss)
Definition: search.cc:3039
float macro_f(task_data &D)