Vowpal Wabbit
Classes | Functions | Variables
GraphTask Namespace Reference

Classes

struct  task_data
 

Functions

bool example_is_test (polylabel &l)
 
void initialize (Search::search &sch, size_t &num_actions, options_i &options)
 
void finish (Search::search &sch)
 
bool example_is_edge (example *e)
 
void run_bfs (task_data &D, multi_ex &ec)
 
void setup (Search::search &sch, multi_ex &ec)
 
void takedown (Search::search &sch, multi_ex &)
 
void add_edge_features_group_fn (task_data &D, float fv, uint64_t fx)
 
void add_edge_features_single_fn (task_data &D, float fv, uint64_t fx)
 
void add_edge_features (Search::search &sch, task_data &D, size_t n, multi_ex &ec)
 
void del_edge_features (task_data &, uint32_t n, multi_ex &ec)
 
float macro_f (task_data &D)
 
void run (Search::search &sch, multi_ex &ec)
 

Variables

Search::search_task task = {"graph", run, initialize, finish, setup, takedown}
 

Function Documentation

◆ add_edge_features()

void GraphTask::add_edge_features ( Search::search sch,
task_data D,
size_t  n,
multi_ex ec 
)

Definition at line 271 of file search_graph.cc.

References GraphTask::task_data::adj, GraphTask::task_data::cur_node, GraphTask::task_data::directed, Search::search::get_vw_pointer_unsafe(), GraphTask::task_data::K, neighbor_namespace, GraphTask::task_data::neighbor_predictions, GraphTask::task_data::numN, vw::pairs, GraphTask::task_data::pred, and GraphTask::task_data::use_structure.

Referenced by run().

272 {
273  D.cur_node = ec[n];
274 
275  for (size_t i : D.adj[n])
276  {
277  for (size_t k = 0; k < D.numN; k++) D.neighbor_predictions[k] = 0.;
278 
279  float pred_total = 0.;
280  uint32_t last_pred = 0;
281  if (D.use_structure)
282  {
283  bool n_in_sink = true;
284  if (D.directed)
285  for (size_t j = 0; j < ec[i]->l.cs.costs.size() - 1; j++)
286  {
287  size_t m = ec[i]->l.cs.costs[j].class_index;
288  if (m == 0)
289  break;
290  if (m - 1 == n)
291  {
292  n_in_sink = false;
293  break;
294  }
295  }
296 
297  bool m_in_sink = false;
298  for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
299  {
300  size_t m = ec[i]->l.cs.costs[j].class_index;
301  if (m == 0)
302  {
303  m_in_sink = true;
304  continue;
305  }
306  if (j == ec[i]->l.cs.costs.size() - 1)
307  m_in_sink = true;
308  m--;
309  if (m == n)
310  continue;
311  size_t other_side = (D.directed && (n_in_sink != m_in_sink)) ? (D.K + 1) : 0;
312  D.neighbor_predictions[D.pred[m] - 1 + other_side] += 1.;
313  pred_total += 1.;
314  last_pred = (uint32_t)D.pred[m] - 1 + (uint32_t)other_side;
315  }
316  }
317  else
318  {
319  D.neighbor_predictions[0] += 1.;
320  pred_total += 1.;
321  last_pred = 0;
322  }
323 
324  if (pred_total == 0.)
325  continue;
326  // std::cerr << n << ':' << i << " -> ["; for (size_t k=0; k<D.numN; k++) std::cerr << ' ' <<
327  // D.neighbor_predictions[k]; std::cerr
328  // << " ]" << std::endl;
329  for (size_t k = 0; k < D.numN; k++) D.neighbor_predictions[k] /= pred_total;
330  example& edge = *ec[i];
331 
332  if (pred_total <= 1.) // single edge
333  {
334  D.neighbor_predictions[0] = (float)last_pred;
335  GD::foreach_feature<task_data, uint64_t, add_edge_features_single_fn>(sch.get_vw_pointer_unsafe(), edge, D);
336  }
337  else // lots of edges
338  GD::foreach_feature<task_data, uint64_t, add_edge_features_group_fn>(sch.get_vw_pointer_unsafe(), edge, D);
339  }
340  ec[n]->indices.push_back(neighbor_namespace);
341  ec[n]->total_sum_feat_sq += ec[n]->feature_space[neighbor_namespace].sum_feat_sq;
342  ec[n]->num_features += ec[n]->feature_space[neighbor_namespace].size();
343 
344  vw& all = sch.get_vw_pointer_unsafe();
345  for (std::string& i : all.pairs)
346  {
347  int i0 = (int)i[0];
348  int i1 = (int)i[1];
349  if ((i0 == (int)neighbor_namespace) || (i1 == (int)neighbor_namespace))
350  {
351  ec[n]->num_features += ec[n]->feature_space[i0].size() * ec[n]->feature_space[i1].size();
352  ec[n]->total_sum_feat_sq += ec[n]->feature_space[i0].sum_feat_sq * ec[n]->feature_space[i1].sum_feat_sq;
353  }
354  }
355 }
std::vector< std::string > pairs
Definition: global_data.h:459
constexpr unsigned char neighbor_namespace
Definition: constant.h:25
vw & get_vw_pointer_unsafe()
Definition: search.cc:3115

◆ add_edge_features_group_fn()

void GraphTask::add_edge_features_group_fn ( task_data D,
float  fv,
uint64_t  fx 
)

Definition at line 249 of file search_graph.cc.

References GraphTask::task_data::cur_node, example_predict::feature_space, GraphTask::task_data::mask, GraphTask::task_data::multiplier, neighbor_namespace, GraphTask::task_data::neighbor_predictions, and GraphTask::task_data::numN.

250 {
251  example* node = D.cur_node;
252  uint64_t fx2 = fx / (uint64_t)D.multiplier;
253  for (size_t k = 0; k < D.numN; k++)
254  {
255  if (D.neighbor_predictions[k] == 0.)
256  continue;
257  node->feature_space[neighbor_namespace].push_back(
258  fv * D.neighbor_predictions[k], (uint64_t)((fx2 + 348919043 * k) * D.multiplier) & (uint64_t)D.mask);
259  }
260 }
constexpr unsigned char neighbor_namespace
Definition: constant.h:25
std::array< features, NUM_NAMESPACES > feature_space

◆ add_edge_features_single_fn()

void GraphTask::add_edge_features_single_fn ( task_data D,
float  fv,
uint64_t  fx 
)

Definition at line 262 of file search_graph.cc.

References GraphTask::task_data::cur_node, example_predict::feature_space, GraphTask::task_data::mask, GraphTask::task_data::multiplier, neighbor_namespace, GraphTask::task_data::neighbor_predictions, and features::push_back().

263 {
264  example* node = D.cur_node;
266  uint64_t fx2 = fx / (uint64_t)D.multiplier;
267  size_t k = (size_t)D.neighbor_predictions[0];
268  fs.push_back(fv, (uint32_t)((fx2 + 348919043 * k) * D.multiplier) & (uint64_t)D.mask);
269 }
void push_back(feature_value v, feature_index i)
constexpr unsigned char neighbor_namespace
Definition: constant.h:25
the core definition of a set of features.
std::array< features, NUM_NAMESPACES > feature_space

◆ del_edge_features()

void GraphTask::del_edge_features ( task_data ,
uint32_t  n,
multi_ex ec 
)

Definition at line 357 of file search_graph.cc.

References features::clear(), neighbor_namespace, features::size(), and features::sum_feat_sq.

Referenced by run().

358 {
359  ec[n]->indices.pop();
360  features& fs = ec[n]->feature_space[neighbor_namespace];
361  ec[n]->total_sum_feat_sq -= fs.sum_feat_sq;
362  ec[n]->num_features -= fs.size();
363  fs.clear();
364 }
constexpr unsigned char neighbor_namespace
Definition: constant.h:25
the core definition of a set of features.
size_t size() const
void clear()
float sum_feat_sq

◆ example_is_edge()

bool GraphTask::example_is_edge ( example e)
inline

Definition at line 145 of file search_graph.cc.

References COST_SENSITIVE::label::costs, polylabel::cs, and example::l.

Referenced by setup().

145 { return e->l.cs.costs.size() > 1; }
COST_SENSITIVE::label cs
Definition: example.h:30
polylabel l
Definition: example.h:57
v_array< wclass > costs

◆ example_is_test()

bool GraphTask::example_is_test ( polylabel l)
inline

Definition at line 94 of file search_graph.cc.

References COST_SENSITIVE::label::costs, and polylabel::cs.

Referenced by initialize().

94 { return l.cs.costs.size() == 0; }
COST_SENSITIVE::label cs
Definition: example.h:30
v_array< wclass > costs

◆ finish()

void GraphTask::finish ( Search::search sch)

Definition at line 136 of file search_graph.cc.

References GraphTask::task_data::confusion_matrix, Search::search::get_task_data(), GraphTask::task_data::neighbor_predictions, and GraphTask::task_data::true_counts.

137 {
138  task_data* D = sch.get_task_data<task_data>();
139  free(D->neighbor_predictions);
140  free(D->confusion_matrix);
141  free(D->true_counts);
142  delete D;
143 }
T * get_task_data()
Definition: search.h:89

◆ initialize()

void GraphTask::initialize ( Search::search sch,
size_t &  num_actions,
options_i options 
)

Definition at line 96 of file search_graph.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), GraphTask::task_data::confusion_matrix, COST_SENSITIVE::cs_label, GraphTask::task_data::directed, example_is_test(), GraphTask::task_data::K, VW::config::make_option(), GraphTask::task_data::neighbor_predictions, GraphTask::task_data::num_loops, GraphTask::task_data::numN, GraphTask::task_data::separate_learners, Search::search::set_label_parser(), Search::search::set_num_learners(), Search::search::set_options(), Search::search::set_task_data(), GraphTask::task_data::true_counts, GraphTask::task_data::true_counts_total, and GraphTask::task_data::use_structure.

97 {
98  task_data* D = new task_data();
99 
100  option_group_definition new_options("search graphtask options");
101  new_options
102  .add(make_option("search_graph_num_loops", D->num_loops).default_value(2).help("how many loops to run [def: 2]"))
103  .add(make_option("search_graph_no_structure", D->use_structure).help("turn off edge features"))
104  .add(make_option("search_graph_separate_learners", D->separate_learners)
105  .help("use a different learner for each pass"))
106  .add(make_option("search_graph_directed", D->directed)
107  .help("construct features based on directed graph semantics"));
108  options.add_and_parse(new_options);
109 
110  D->use_structure = !D->use_structure;
111 
112  if (D->num_loops <= 1)
113  {
114  D->num_loops = 1;
115  D->separate_learners = false;
116  }
117 
118  D->K = num_actions;
119  D->numN = (D->directed + 1) * (D->K + 1);
120  std::cerr << "K=" << D->K << ", numN=" << D->numN << std::endl;
121  D->neighbor_predictions = calloc_or_throw<float>(D->numN);
122 
123  D->confusion_matrix = calloc_or_throw<uint32_t>((D->K + 1) * (D->K + 1));
124  D->true_counts = calloc_or_throw<float>(D->K + 1);
125  D->true_counts_total = (float)(D->K + 1);
126  for (size_t k = 0; k <= D->K; k++) D->true_counts[k] = 1.;
127 
128  if (D->separate_learners)
129  sch.set_num_learners(D->num_loops);
130 
131  sch.set_task_data<task_data>(D);
132  sch.set_options(0); // Search::AUTO_HAMMING_LOSS
134 }
void set_label_parser(label_parser &lp, bool(*is_test)(polylabel &))
Definition: search.cc:3079
label_parser cs_label
virtual void add_and_parse(const option_group_definition &group)=0
void set_options(uint32_t opts)
Definition: search.cc:3053
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
void set_task_data(T *data)
Definition: search.h:84
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool example_is_test(polylabel &l)
Definition: search_graph.cc:94
void set_num_learners(size_t num_learners)
Definition: search.cc:3094

◆ macro_f()

float GraphTask::macro_f ( task_data D)

Definition at line 368 of file search_graph.cc.

References GraphTask::task_data::confusion_matrix, IDX, and GraphTask::task_data::K.

Referenced by run().

369 {
370  float total_f1 = 0.;
371  float count_f1 = 0.;
372  for (size_t k = 1; k <= D.K; k++)
373  {
374  float trueC = 0.;
375  float predC = 0.;
376  for (size_t j = 1; j <= D.K; j++)
377  {
378  trueC += (float)D.confusion_matrix[IDX(k, j)];
379  predC += (float)D.confusion_matrix[IDX(j, k)];
380  }
381  if (trueC == 0)
382  continue;
383  float correctC = (float)D.confusion_matrix[IDX(k, k)];
384  count_f1++;
385  if (correctC > 0)
386  {
387  float pre = correctC / predC;
388  float rec = correctC / trueC;
389  total_f1 += 2 * pre * rec / (pre + rec);
390  }
391  }
392  return total_f1 / count_f1;
393 }
#define IDX(i, j)

◆ run()

void GraphTask::run ( Search::search sch,
multi_ex ec 
)

Definition at line 395 of file search_graph.cc.

References Search::predictor::add_condition(), add_edge_features(), GraphTask::task_data::adj, GraphTask::task_data::bfs, GraphTask::task_data::confusion_matrix, del_edge_features(), f, Search::search::get_task_data(), IDX, GraphTask::task_data::K, Search::search::loss(), macro_f(), GraphTask::task_data::N, GraphTask::task_data::num_loops, Search::search::output(), GraphTask::task_data::pred, Search::predictor::predict(), Search::search::predictNeedsExample(), GraphTask::task_data::separate_learners, Search::predictor::set_input(), Search::predictor::set_learner_id(), Search::predictor::set_oracle(), Search::predictor::set_weight(), and GraphTask::task_data::true_counts.

396 {
397  task_data& D = *sch.get_task_data<task_data>();
398  float loss_val = 0.5f / (float)D.num_loops;
399  for (size_t n = 0; n < D.N; n++) D.pred[n] = D.K + 1;
400 
401  for (size_t loop = 0; loop < D.num_loops; loop++)
402  {
403  bool last_loop = loop == (D.num_loops - 1);
404  int start = 0;
405  int end = D.N;
406  int step = 1;
407  if (loop % 2 == 1)
408  {
409  start = D.N - 1;
410  end = -1;
411  step = -1;
412  } // go inward on odd loops
413  for (int n_id = start; n_id != end; n_id += step)
414  {
415  uint32_t n = D.bfs[n_id];
416  uint32_t k = (ec[n]->l.cs.costs.size() > 0) ? ec[n]->l.cs.costs[0].class_index : 0;
417 
418  bool add_features = /* D.use_structure && */ sch.predictNeedsExample();
419  // add_features = false;
420 
421  if (add_features)
422  add_edge_features(sch, D, n, ec);
423  Search::predictor P = Search::predictor(sch, n + 1);
424  P.set_input(*ec[n]);
425  if (false && (k > 0))
426  {
427  float min_count = 1e12f;
428  for (size_t k2 = 1; k2 <= D.K; k2++) min_count = std::min(min_count, D.true_counts[k2]);
429  float w = min_count / D.true_counts[k];
430  // float w = D.true_counts_total / D.true_counts[k] / (float)(D.K);
431  P.set_weight(w);
432  // std::cerr << "w = " << D.true_counts_total / D.true_counts[k] / (float)(D.K) << std::endl;
433  // P.set_weight( D.true_counts_total / D.true_counts[k] / (float)(D.K) );
434  }
435  if (D.separate_learners)
436  P.set_learner_id(loop);
437  if (k > 0) // for test examples
438  P.set_oracle(k);
439  // add all the conditioning
440  for (size_t i = 0; i < D.adj[n].size(); i++)
441  {
442  for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
443  {
444  uint32_t m = ec[i]->l.cs.costs[j].class_index;
445  if (m == 0)
446  continue;
447  m--;
448  if (m == n)
449  continue;
450  P.add_condition(m + 1, 'e');
451  }
452  }
453 
454  // make the prediction
455  D.pred[n] = P.predict();
456  if (ec[n]->l.cs.costs.size() > 0) // for test examples
457  sch.loss((ec[n]->l.cs.costs[0].class_index == D.pred[n]) ? 0.f : (last_loop ? 0.5f : loss_val));
458 
459  if (add_features)
460  del_edge_features(D, n, ec);
461  }
462  }
463 
464  for (uint32_t n = 0; n < D.N; n++) D.confusion_matrix[IDX(ec[n]->l.cs.costs[0].class_index, D.pred[n])]++;
465  sch.loss(1.f - macro_f(D));
466 
467  if (sch.output().good())
468  for (uint32_t n = 0; n < D.N; n++) sch.output() << D.pred[n] << ' ';
469 }
predictor & set_oracle(action a)
Definition: search.cc:3314
std::stringstream & output()
Definition: search.cc:3043
predictor & add_condition(ptag tag, char name)
Definition: search.cc:3414
predictor & set_weight(float w)
Definition: search.cc:3324
action predict()
Definition: search.cc:3460
void add_edge_features(Search::search &sch, task_data &D, size_t n, multi_ex &ec)
T * get_task_data()
Definition: search.h:89
#define IDX(i, j)
void del_edge_features(task_data &, uint32_t n, multi_ex &ec)
predictor & set_learner_id(size_t id)
Definition: search.cc:3448
bool predictNeedsExample()
Definition: search.cc:3041
predictor & set_input(example &input_example)
Definition: search.cc:3173
float f
Definition: cache.cc:40
void loss(float incr_loss)
Definition: search.cc:3039
float macro_f(task_data &D)

◆ run_bfs()

void GraphTask::run_bfs ( task_data D,
multi_ex ec 
)

Definition at line 147 of file search_graph.cc.

References GraphTask::task_data::adj, GraphTask::task_data::bfs, id(), and GraphTask::task_data::N.

Referenced by setup().

148 {
149  D.bfs.clear();
150  std::vector<bool> touched;
151  for (size_t n = 0; n < D.N; n++) touched.push_back(false);
152 
153  touched[0] = true;
154  D.bfs.push_back(0);
155 
156  size_t i = 0;
157  while (D.bfs.size() < D.N)
158  {
159  while (i < D.bfs.size())
160  {
161  uint32_t n = D.bfs[i];
162  for (size_t id : D.adj[n])
163  for (size_t j = 0; j < ec[id]->l.cs.costs.size(); j++)
164  {
165  uint32_t m = ec[id]->l.cs.costs[j].class_index;
166  if ((m > 0) && (!touched[m - 1]))
167  {
168  D.bfs.push_back(m - 1);
169  touched[m - 1] = true;
170  }
171  }
172  i++;
173  }
174 
175  if (D.bfs.size() < D.N)
176  // we finished a SCC, need to find another
177  for (uint32_t n = 0; n < D.N; n++)
178  if (!touched[n])
179  {
180  touched[n] = true;
181  D.bfs.push_back(n);
182  break;
183  }
184  }
185 }
float id(float in)
Definition: scorer.cc:51

◆ setup()

void GraphTask::setup ( Search::search sch,
multi_ex ec 
)

Definition at line 187 of file search_graph.cc.

References GraphTask::task_data::E, example_is_edge(), Search::search::get_task_data(), Search::search::get_vw_pointer_unsafe(), GraphTask::task_data::mask, parameters::mask(), GraphTask::task_data::multiplier, GraphTask::task_data::N, run_bfs(), GraphTask::task_data::ss, parameters::stride_shift(), THROW, GraphTask::task_data::true_counts, GraphTask::task_data::true_counts_total, vw::weights, GraphTask::task_data::wpp, and vw::wpp.

188 {
189  task_data& D = *sch.get_task_data<task_data>();
190  D.multiplier = D.wpp << D.ss;
191  D.wpp = sch.get_vw_pointer_unsafe().wpp;
192  D.mask = sch.get_vw_pointer_unsafe().weights.mask();
194  D.N = 0;
195  D.E = 0;
196  for (size_t i = 0; i < ec.size(); i++)
197  if (example_is_edge(ec[i]))
198  D.E++;
199  else // it's a node!
200  {
201  if (D.E > 0)
202  THROW("error: got a node after getting edges!");
203 
204  D.N++;
205  if (ec[i]->l.cs.costs.size() > 0)
206  {
207  D.true_counts[ec[i]->l.cs.costs[0].class_index] += 1.;
208  D.true_counts_total += 1.;
209  }
210  }
211 
212  if ((D.N == 0) && (D.E > 0))
213  THROW("error: got edges without any nodes (perhaps ring_size is too small?)!");
214 
215  D.adj = std::vector<std::vector<size_t>>(D.N, std::vector<size_t>(0));
216 
217  for (size_t i = D.N; i < ec.size(); i++)
218  {
219  for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
220  {
221  if (ec[i]->l.cs.costs[n].class_index > D.N)
222  THROW("error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) << " > "
223  << D.N);
224  }
225  for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
226  {
227  size_t nn = ec[i]->l.cs.costs[n].class_index;
228  if ((nn > 0) &&
229  (((D.adj[nn - 1].size() == 0) || (D.adj[nn - 1][D.adj[nn - 1].size() - 1] != i)))) // don't allow dups
230  D.adj[nn - 1].push_back(i);
231  }
232  }
233 
234  run_bfs(D, ec);
235 
236  D.pred.clear();
237  for (size_t n = 0; n < D.N; n++) D.pred.push_back(D.K + 1);
238 }
parameters weights
Definition: global_data.h:537
T * get_task_data()
Definition: search.h:89
vw & get_vw_pointer_unsafe()
Definition: search.cc:3115
Definition: nn.cc:24
uint32_t wpp
Definition: global_data.h:432
bool example_is_edge(example *e)
uint32_t stride_shift()
uint64_t mask()
#define THROW(args)
Definition: vw_exception.h:181
void run_bfs(task_data &D, multi_ex &ec)

◆ takedown()

void GraphTask::takedown ( Search::search sch,
multi_ex  
)

Definition at line 240 of file search_graph.cc.

References GraphTask::task_data::adj, GraphTask::task_data::bfs, Search::search::get_task_data(), and GraphTask::task_data::pred.

241 {
242  task_data& D = *sch.get_task_data<task_data>();
243  D.bfs.clear();
244  D.pred.clear();
245  for (auto x : D.adj) x.clear();
246  D.adj.clear();
247 }
T * get_task_data()
Definition: search.h:89

Variable Documentation

◆ task

Search::search_task GraphTask::task = {"graph", run, initialize, finish, setup, takedown}

Definition at line 63 of file search_graph.cc.