84 std::vector<std::vector<size_t>>
adj;
85 std::vector<uint32_t>
bfs;
102 .
add(
make_option(
"search_graph_num_loops", D->
num_loops).default_value(2).help(
"how many loops to run [def: 2]"))
105 .help(
"use a different learner for each pass"))
107 .help(
"construct features based on directed graph semantics"));
120 std::cerr <<
"K=" << D->
K <<
", numN=" << D->
numN << std::endl;
126 for (
size_t k = 0; k <= D->
K; k++) D->
true_counts[k] = 1.;
150 std::vector<bool> touched;
151 for (
size_t n = 0; n < D.
N; n++) touched.push_back(
false);
157 while (D.
bfs.size() < D.
N)
159 while (i < D.
bfs.size())
161 uint32_t n = D.
bfs[i];
162 for (
size_t id : D.
adj[n])
163 for (
size_t j = 0; j < ec[
id]->l.cs.costs.size(); j++)
165 uint32_t m = ec[
id]->l.cs.costs[j].class_index;
166 if ((m > 0) && (!touched[m - 1]))
168 D.
bfs.push_back(m - 1);
169 touched[m - 1] =
true;
175 if (D.
bfs.size() < D.
N)
177 for (uint32_t n = 0; n < D.
N; n++)
196 for (
size_t i = 0; i < ec.size(); i++)
202 THROW(
"error: got a node after getting edges!");
205 if (ec[i]->l.cs.costs.size() > 0)
207 D.
true_counts[ec[i]->l.cs.costs[0].class_index] += 1.;
212 if ((D.N == 0) && (D.E > 0))
213 THROW(
"error: got edges without any nodes (perhaps ring_size is too small?)!");
215 D.adj = std::vector<std::vector<size_t>>(D.N, std::vector<size_t>(0));
217 for (
size_t i = D.N; i < ec.size(); i++)
219 for (
size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
221 if (ec[i]->l.cs.costs[n].class_index > D.N)
222 THROW(
"error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) <<
" > " 225 for (
size_t n = 0; n < ec[i]->l.cs.costs.size(); n++)
227 size_t nn = ec[i]->l.cs.costs[n].class_index;
229 (((D.adj[nn - 1].size() == 0) || (D.adj[nn - 1][D.adj[nn - 1].size() - 1] != i))))
230 D.adj[nn - 1].push_back(i);
237 for (
size_t n = 0; n < D.N; n++) D.pred.push_back(D.K + 1);
245 for (
auto x : D.
adj) x.clear();
253 for (
size_t k = 0; k < D.
numN; k++)
275 for (
size_t i : D.
adj[n])
279 float pred_total = 0.;
280 uint32_t last_pred = 0;
283 bool n_in_sink =
true;
285 for (
size_t j = 0; j < ec[i]->l.cs.costs.size() - 1; j++)
287 size_t m = ec[i]->l.cs.costs[j].class_index;
297 bool m_in_sink =
false;
298 for (
size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
300 size_t m = ec[i]->l.cs.costs[j].class_index;
306 if (j == ec[i]->l.cs.costs.size() - 1)
311 size_t other_side = (D.
directed && (n_in_sink != m_in_sink)) ? (D.
K + 1) : 0;
314 last_pred = (uint32_t)D.
pred[m] - 1 + (uint32_t)other_side;
324 if (pred_total == 0.)
332 if (pred_total <= 1.)
335 GD::foreach_feature<task_data, uint64_t, add_edge_features_single_fn>(sch.
get_vw_pointer_unsafe(), edge, D);
338 GD::foreach_feature<task_data, uint64_t, add_edge_features_group_fn>(sch.
get_vw_pointer_unsafe(), edge, D);
345 for (std::string& i : all.
pairs)
351 ec[n]->num_features += ec[n]->feature_space[i0].size() * ec[n]->feature_space[i1].size();
352 ec[n]->total_sum_feat_sq += ec[n]->feature_space[i0].sum_feat_sq * ec[n]->feature_space[i1].sum_feat_sq;
359 ec[n]->indices.pop();
362 ec[n]->num_features -= fs.
size();
366 #define IDX(i, j) ((i) * (D.K + 1) + j) 372 for (
size_t k = 1; k <= D.
K; k++)
376 for (
size_t j = 1; j <= D.
K; j++)
387 float pre = correctC / predC;
388 float rec = correctC / trueC;
389 total_f1 += 2 * pre * rec / (pre + rec);
392 return total_f1 / count_f1;
398 float loss_val = 0.5f / (float)D.
num_loops;
399 for (
size_t n = 0; n < D.
N; n++) D.
pred[n] = D.
K + 1;
401 for (
size_t loop = 0; loop < D.
num_loops; loop++)
403 bool last_loop = loop == (D.
num_loops - 1);
413 for (
int n_id = start; n_id != end; n_id += step)
415 uint32_t n = D.
bfs[n_id];
416 uint32_t k = (ec[n]->l.cs.costs.size() > 0) ? ec[n]->l.cs.costs[0].class_index : 0;
425 if (
false && (k > 0))
427 float min_count = 1e12f;
428 for (
size_t k2 = 1; k2 <= D.
K; k2++) min_count = std::min(min_count, D.
true_counts[k2]);
440 for (
size_t i = 0; i < D.
adj[n].size(); i++)
442 for (
size_t j = 0; j < ec[i]->l.cs.costs.size(); j++)
444 uint32_t m = ec[i]->l.cs.costs[j].class_index;
456 if (ec[n]->l.cs.costs.size() > 0)
457 sch.
loss((ec[n]->l.cs.costs[0].class_index == D.
pred[n]) ? 0.f : (last_loop ? 0.5f : loss_val));
468 for (uint32_t n = 0; n < D.
N; n++) sch.
output() << D.
pred[n] <<
' ';
predictor & set_oracle(action a)
vw * setup(options_i &options)
std::vector< std::string > pairs
void push_back(feature_value v, feature_index i)
void set_label_parser(label_parser &lp, bool(*is_test)(polylabel &))
std::stringstream & output()
predictor & add_condition(ptag tag, char name)
constexpr unsigned char neighbor_namespace
the core definition of a set of features.
float * neighbor_predictions
void add_edge_features_single_fn(task_data &D, float fv, uint64_t fx)
virtual void add_and_parse(const option_group_definition &group)=0
predictor & set_weight(float w)
void add_edge_features(Search::search &sch, task_data &D, size_t n, multi_ex &ec)
void finish(vw &all, bool delete_all)
std::vector< std::vector< size_t > > adj
uint32_t * confusion_matrix
std::array< features, NUM_NAMESPACES > feature_space
vw & get_vw_pointer_unsafe()
std::vector< uint32_t > bfs
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
void del_edge_features(task_data &, uint32_t n, multi_ex &ec)
void set_options(uint32_t opts)
predictor & set_learner_id(size_t id)
bool example_is_edge(example *e)
option_group_definition & add(T &&op)
int add(svm_params ¶ms, svm_example *fec)
std::vector< example * > multi_ex
void set_task_data(T *data)
typed_option< T > make_option(std::string name, T &location)
void add_edge_features_group_fn(task_data &D, float fv, uint64_t fx)
bool example_is_test(polylabel &l)
bool predictNeedsExample()
void takedown(Search::search &sch, multi_ex &)
void set_num_learners(size_t num_learners)
std::vector< size_t > pred
predictor & set_input(example &input_example)
void run(Search::search &sch, multi_ex &ec)
void run_bfs(task_data &D, multi_ex &ec)
void loss(float incr_loss)
float macro_f(task_data &D)