Vowpal Wabbit
search_sequencetask.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include "search_sequencetask.h"
7 #include "vw.h"
8 
9 using namespace VW::config;
10 
11 namespace SequenceTask
12 {
13 Search::search_task task = {"sequence", run, initialize, nullptr, nullptr, nullptr};
14 }
16 {
18 }
20 {
21 Search::search_task task = {"sequence_ctg", run, initialize, nullptr, nullptr, nullptr};
22 }
23 namespace ArgmaxTask
24 {
25 Search::search_task task = {"argmax", run, initialize, finish, nullptr, nullptr};
26 }
28 {
29 Search::search_task task = {"sequence_demoldf", run, initialize, finish, nullptr, nullptr};
30 }
31 
32 namespace SequenceTask
33 {
34 void initialize(Search::search& sch, size_t& /*num_actions*/, options_i& /*options*/)
35 {
36  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
37  Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss
38  Search::EXAMPLES_DONT_CHANGE | // we don't do any internal example munging
39  0);
40 }
41 
42 void run(Search::search& sch, multi_ex& ec)
43 {
44  Search::predictor P(sch, (ptag)0);
45  for (size_t i = 0; i < ec.size(); i++)
46  {
47  action oracle = ec[i]->l.multi.label;
48  size_t prediction = P.set_tag((ptag)i + 1)
49  .set_input(*ec[i])
50  .set_oracle(oracle)
52  .predict();
53 
54  if (sch.output().good())
55  sch.output() << sch.pretty_label((uint32_t)prediction) << ' ';
56  }
57 }
58 } // namespace SequenceTask
59 
60 namespace SequenceSpanTask
61 {
63 {
64  BIO,
66 };
67 // the format for the BIO encoding is:
68 // label description
69 // 1 "O" (out)
70 // n even begin X, where X is defined by n/2
71 // n odd in X, where X is (n-1)/2
72 // thus, valid transitions are:
73 // * -> 1 (anything to OUT)
74 // * -> n even (anything in BEGIN X)
75 // n even -> n+1 (BEGIN X to IN X)
76 // n odd>1 -> n (IN X to IN X)
77 // the format for the BILOU (begin, inside, last, out, unit-length) encoding is:
78 // label description
79 // 1 out
80 // n>1: let m=n-2:
81 // m % 4 == 0 unit-(m div 4)
82 // m % 4 == 1 begin-(m div 4)
83 // m % 4 == 2 in-(m div 4)
84 // m % 4 == 3 last-(m div 4)
85 // thus, valid transitions are:
86 // 1 -> 1; 2, 6, 10, ...; 3, 7, 11, ... out to { out, unit-Y, begin-Y } 1
87 // m%4=0 -> 1; 2, 6, 10, ..., 3, 7, 11, ... unit-X to { out, unit-Y, begin-Y } 2, 6, 10, 14, ...
88 // m%4=1 -> m+1, m+2 begin-X to { in-X, last-X } 3, 7, 11, 15, ...
89 // m%4=2 -> m, m+1 in-X to { in-X, last-X } 4, 8, 12, 16, ...
90 // m%4=3 -> 1; 2, 6, 10, ...; 3, 7, 11, ... last-X to { out, unit-Y, begin-Y } 5, 9, 13, 17, ...
91 
93 {
94  return y / 2 + 1; // out -> out, {unit,begin} -> begin; {in,last} -> in
95 }
96 
98 {
99  for (size_t n = 0; n < ec.size(); n++)
100  {
101  MULTICLASS::label_t& ylab = ec[n]->l.multi;
102  action y = ylab.label;
103  action nexty = (n == ec.size() - 1) ? 0 : ec[n + 1]->l.multi.label;
104  if (y == 1) // do nothing
105  ;
106  else if (y % 2 == 0) // this is a begin-X
107  {
108  if (nexty != y + 1) // should be unit
109  ylab.label = (y / 2 - 1) * 4 + 2; // from 2 to 2, 4 to 6, 6 to 10, etc.
110  else // should be begin-X
111  ylab.label = (y / 2 - 1) * 4 + 3; // from 2 to 3, 4 to 7, 6 to 11, etc.
112  }
113  else if (y % 2 == 1) // this is an in-X
114  {
115  if (nexty != y) // should be last
116  ylab.label = (y - 1) * 2 + 1; // from 3 to 5, 5 to 9, 7 to 13, etc.
117  else // should be in-X
118  ylab.label = (y - 1) * 2; // from 3 to 4, 5 to 8, 7 to 12, etc.
119  }
120  assert(y == bilou_to_bio(ylab.label));
121  }
122 }
123 
124 struct task_data
125 {
128  v_array<action> only_two_allowed; // used for BILOU encoding
129  size_t multipass;
130 };
131 
132 void initialize(Search::search& sch, size_t& num_actions, options_i& options)
133 {
134  task_data* D = new task_data();
135 
136  bool search_span_bilou = false;
137  option_group_definition new_options("search sequencespan options");
138  new_options
139  .add(make_option("search_span_bilou", search_span_bilou)
140  .help("switch to (internal) BILOU encoding instead of BIO encoding"))
141  .add(make_option("search_span_multipass", D->multipass).default_value(1).help("do multiple passes"));
142  options.add_and_parse(new_options);
143 
144  if (search_span_bilou)
145  {
146  std::cerr << "switching to BILOU encoding for sequence span labeling" << std::endl;
147  D->encoding = BILOU;
148  num_actions = num_actions * 2 - 1;
149  }
150  else
151  D->encoding = BIO;
152 
153  D->allowed_actions.clear();
154 
155  if (D->encoding == BIO)
156  {
158  for (action l = 2; l < num_actions; l += 2) D->allowed_actions.push_back(l);
159  D->allowed_actions.push_back(1); // push back an extra 1 that we can overwrite later if we want
160  }
161  else if (D->encoding == BILOU)
162  {
164  for (action l = 2; l < num_actions; l += 4)
165  {
167  D->allowed_actions.push_back(l + 1);
168  }
171  }
172 
173  sch.set_task_data<task_data>(D);
174  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
175  Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss
176  Search::EXAMPLES_DONT_CHANGE | // we don't do any internal example munging
177  0);
178  sch.set_num_learners(D->multipass);
179 }
180 
182 {
183  task_data* D = sch.get_task_data<task_data>();
186  delete D;
187 }
188 
190 {
191  task_data& D = *sch.get_task_data<task_data>();
192  if (D.encoding == BILOU)
194 }
195 
197 {
198  task_data& D = *sch.get_task_data<task_data>();
199 
200  if (D.encoding == BILOU)
201  for (size_t n = 0; n < ec.size(); n++)
202  {
203  MULTICLASS::label_t ylab = ec[n]->l.multi;
204  ylab.label = bilou_to_bio(ylab.label);
205  }
206 }
207 
208 void run(Search::search& sch, multi_ex& ec)
209 {
210  task_data& D = *sch.get_task_data<task_data>();
211  v_array<action>* y_allowed = &(D.allowed_actions);
212  Search::predictor P(sch, (ptag)0);
213  for (size_t pass = 1; pass <= D.multipass; pass++)
214  {
215  action last_prediction = 1;
216  for (size_t i = 0; i < ec.size(); i++)
217  {
218  action oracle = ec[i]->l.multi.label;
219  size_t len = y_allowed->size();
220  P.set_tag((ptag)i + 1);
221  P.set_learner_id(pass - 1);
222  if (D.encoding == BIO)
223  {
224  if (last_prediction == 1)
225  P.set_allowed(y_allowed->begin(), len - 1);
226  else if (last_prediction % 2 == 0)
227  {
228  (*y_allowed)[len - 1] = last_prediction + 1;
229  P.set_allowed(*y_allowed);
230  }
231  else
232  {
233  (*y_allowed)[len - 1] = last_prediction;
234  P.set_allowed(*y_allowed);
235  }
236  if ((oracle > 1) && (oracle % 2 == 1) && (last_prediction != oracle) && (last_prediction != oracle - 1))
237  oracle = 1; // if we are supposed to I-X, but last wasn't B-X or I-X, then say O
238  }
239  else if (D.encoding == BILOU)
240  {
241  if ((last_prediction == 1) || ((last_prediction - 2) % 4 == 0) ||
242  ((last_prediction - 2) % 4 == 3)) // O or unit-X or last-X
243  {
245  // we cannot allow in-X or last-X next
246  if ((oracle > 1) && (((oracle - 2) % 4 == 2) || ((oracle - 2) % 4 == 3)))
247  oracle = 1;
248  }
249  else // begin-X or in-X
250  {
251  action other = ((last_prediction - 2) % 4 == 1) ? (last_prediction + 2) : last_prediction;
252  P.set_allowed(last_prediction + 1);
253  P.add_allowed(other);
254  if ((oracle != last_prediction + 1) && (oracle != other))
255  oracle = other;
256  }
257  }
258  P.set_input(*ec[i]);
259  P.set_condition_range((ptag)i, sch.get_history_length(), 'p');
260  if (pass > 1)
261  P.add_condition_range((ptag)(i + 1 + sch.get_history_length()), sch.get_history_length() + 1, 'a');
262  P.set_oracle(oracle);
263  last_prediction = P.predict();
264 
265  if ((pass == D.multipass) && sch.output().good())
266  sch.output() << ((D.encoding == BIO) ? last_prediction : bilou_to_bio(last_prediction)) << ' ';
267  }
268  }
269 }
270 } // namespace SequenceSpanTask
271 
272 namespace SequenceTaskCostToGo
273 {
274 void initialize(Search::search& sch, size_t& num_actions, options_i& /*options*/)
275 {
276  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
277  Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss
278  Search::EXAMPLES_DONT_CHANGE | // we don't do any internal example munging
279  Search::ACTION_COSTS | // we'll provide cost-per-action (rather than oracle)
280  0);
281  sch.set_task_data<size_t>(&num_actions);
282 }
283 
284 void run(Search::search& sch, multi_ex& ec)
285 {
286  size_t K = *sch.get_task_data<size_t>();
287  float* costs = calloc_or_throw<float>(K);
288  Search::predictor P(sch, (ptag)0);
289  for (size_t i = 0; i < ec.size(); i++)
290  {
291  action oracle = ec[i]->l.multi.label;
292  for (size_t k = 0; k < K; k++) costs[k] = 1.;
293  costs[oracle - 1] = 0.;
294  size_t prediction = P.set_tag((ptag)i + 1)
295  .set_input(*ec[i])
296  .set_allowed(nullptr, costs, K)
297  .set_condition_range((ptag)i, sch.get_history_length(), 'p')
298  .predict();
299  if (sch.output().good())
300  sch.output() << sch.pretty_label((uint32_t)prediction) << ' ';
301  }
302  free(costs);
303 }
304 } // namespace SequenceTaskCostToGo
305 
306 namespace ArgmaxTask
307 {
308 struct task_data
309 {
313 };
314 
315 void initialize(Search::search& sch, size_t& /*num_actions*/, options_i& options)
316 {
317  task_data* D = new task_data();
318 
319  option_group_definition new_options("argmax options");
320  new_options.add(make_option("cost", D->false_negative_cost).default_value(10.0f).help("False Negative Cost"))
321  .add(make_option("negative_weight", D->negative_weight)
322  .default_value(1.f)
323  .help("Relative weight of negative examples"))
324  .add(make_option("max", D->predict_max).help("Disable structure: just predict the max"));
325  options.add_and_parse(new_options);
326 
327  sch.set_task_data(D);
328 
329  if (D->predict_max)
330  sch.set_options(Search::EXAMPLES_DONT_CHANGE); // we don't do any internal example munging
331  else
332  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
333  Search::EXAMPLES_DONT_CHANGE); // we don't do any internal example munging
334 }
335 
337 {
338  task_data* D = sch.get_task_data<task_data>();
339  delete D;
340 }
341 
342 void run(Search::search& sch, multi_ex& ec)
343 {
344  task_data& D = *sch.get_task_data<task_data>();
345  uint32_t max_prediction = 1;
346  uint32_t max_label = 1;
347 
348  for (size_t i = 0; i < ec.size(); i++) max_label = std::max(ec[i]->l.multi.label, max_label);
349 
350  for (ptag i = 0; i < ec.size(); i++)
351  {
352  // labels should be 1 or 2, and our output is MAX of all predicted values
353  uint32_t oracle = D.predict_max ? max_label : ec[i]->l.multi.label;
354  uint32_t prediction = sch.predict(*ec[i], i + 1, &oracle, 1, &i, "p");
355 
356  max_prediction = std::max(prediction, max_prediction);
357  }
358  float loss = 0.;
359  if (max_label > max_prediction)
361  else if (max_prediction > max_label)
362  loss = 1.;
363  sch.loss(loss);
364 
365  if (sch.output().good())
366  sch.output() << max_prediction;
367 }
368 } // namespace ArgmaxTask
369 
370 namespace SequenceTask_DemoLDF // this is just to debug/show off how to do LDF
371 {
372 namespace CS = COST_SENSITIVE;
373 struct task_data
374 {
376  size_t num_actions;
377 };
378 
379 void initialize(Search::search& sch, size_t& num_actions, options_i& /*options*/)
380 {
381  CS::wclass default_wclass = {0., 0, 0., 0.};
382 
383  example* ldf_examples = VW::alloc_examples(sizeof(CS::label), num_actions);
384  for (size_t a = 0; a < num_actions; a++)
385  {
386  CS::label& lab = ldf_examples[a].l.cs;
388  lab.costs.push_back(default_wclass);
389  ldf_examples[a].interactions = &sch.get_vw_pointer_unsafe().interactions;
390  }
391 
392  task_data* data = &calloc_or_throw<task_data>();
393  data->ldf_examples = ldf_examples;
394  data->num_actions = num_actions;
395 
396  sch.set_task_data<task_data>(data);
397  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
398  Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss
399  Search::IS_LDF); // we generate ldf examples
400 }
401 
403 {
404  task_data* data = sch.get_task_data<task_data>();
405  for (size_t a = 0; a < data->num_actions; a++) VW::dealloc_example(CS::cs_label.delete_label, data->ldf_examples[a]);
406  free(data->ldf_examples);
407  free(data);
408 }
409 
410 // this is totally bogus for the example -- you'd never actually do this!
412  Search::search& sch, bool /* audit */, example* ec, uint64_t mult_amount, uint64_t plus_amount)
413 {
414  size_t ss = sch.get_stride_shift();
415  for (features& fs : *ec)
416  for (feature_index& idx : fs.indicies) idx = (((idx >> ss) * mult_amount) + plus_amount) << ss;
417 }
418 
419 void run(Search::search& sch, multi_ex& ec)
420 {
421  task_data* data = sch.get_task_data<task_data>();
422  Search::predictor P(sch, (ptag)0);
423  for (ptag i = 0; i < ec.size(); i++)
424  {
425  for (uint32_t a = 0; a < data->num_actions; a++)
426  {
427  if (sch.predictNeedsExample()) // we can skip this work if `predict` won't actually use the example data
428  {
429  VW::copy_example_data(false, &data->ldf_examples[a], ec[i]); // copy but leave label alone!
430  // now, offset it appropriately for the action id
431  my_update_example_indicies(sch, true, &data->ldf_examples[a], 28904713, 4832917 * (uint64_t)a);
432  }
433 
434  // regardless of whether the example is needed or not, the class info is needed
435  CS::label& lab = data->ldf_examples[a].l.cs;
436  // need to tell search what the action id is, so that it can add history features correctly!
437  lab.costs[0].x = 0.;
438  lab.costs[0].class_index = a + 1;
439  lab.costs[0].partial_prediction = 0.;
440  lab.costs[0].wap_value = 0.;
441  }
442 
443  action oracle = ec[i]->l.multi.label - 1;
444  action pred_id = P.set_tag((ptag)(i + 1))
445  .set_input(data->ldf_examples, data->num_actions)
446  .set_oracle(oracle)
447  .set_condition_range(i, sch.get_history_length(), 'p')
448  .predict();
449  action prediction = pred_id + 1; // or ldf_examples[pred_id]->ld.costs[0].weight_index
450 
451  if (sch.output().good())
452  sch.output() << prediction << ' ';
453  }
454 }
455 } // namespace SequenceTask_DemoLDF
predictor & set_oracle(action a)
Definition: search.cc:3314
size_t get_stride_shift()
Definition: search.cc:3097
uint32_t get_history_length()
Definition: search.cc:3098
vw * setup(options_i &options)
Definition: main.cc:27
Search::search_task task
label_parser cs_label
std::stringstream & output()
Definition: search.cc:3043
void copy_example_data(bool audit, example *dst, example *src)
Definition: example.cc:72
std::string pretty_label(action a)
Definition: search.cc:3100
std::vector< std::string > * interactions
void(* default_label)(void *)
Definition: label_parser.h:12
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
Definition: example.cc:219
the core definition of a set of features.
void delete_label(void *v)
Definition: cb.cc:98
uint32_t action
Definition: search.h:19
virtual void add_and_parse(const option_group_definition &group)=0
action predict()
Definition: search.cc:3460
void finish(vw &all, bool delete_all)
Definition: parse_args.cc:1823
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60
action bilou_to_bio(action y)
T * get_task_data()
Definition: search.h:89
uint32_t ACTION_COSTS
Definition: search.cc:50
example * alloc_examples(size_t, size_t count=1)
Definition: example.cc:204
void convert_bio_to_bilou(multi_ex &ec)
uint32_t AUTO_CONDITION_FEATURES
Definition: search.cc:49
predictor & add_condition_range(ptag hi, ptag count, char name0)
Definition: search.cc:3427
vw & get_vw_pointer_unsafe()
Definition: search.cc:3115
void push_back(const T &new_ele)
Definition: v_array.h:107
COST_SENSITIVE::label cs
Definition: example.h:30
void clear()
Definition: v_array.h:88
void my_update_example_indicies(Search::search &sch, bool, example *ec, uint64_t mult_amount, uint64_t plus_amount)
uint32_t IS_LDF
Definition: search.cc:49
uint64_t feature_index
Definition: feature_group.h:21
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
Definition: parse_args.cc:1654
predictor & add_allowed(action a)
Definition: search.cc:3342
void set_options(uint32_t opts)
Definition: search.cc:3053
predictor & set_learner_id(size_t id)
Definition: search.cc:3448
action predict(example &ec, ptag my_tag, const action *oracle_actions, size_t oracle_actions_cnt=1, const ptag *condition_on=nullptr, const char *condition_on_names=nullptr, const action *allowed_actions=nullptr, size_t allowed_actions_cnt=0, const float *allowed_actions_cost=nullptr, size_t learner_id=0, float weight=0.)
Definition: search.cc:2967
predictor & set_allowed(action a)
Definition: search.cc:3352
option_group_definition & add(T &&op)
Definition: options.h:90
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
std::vector< example * > multi_ex
Definition: example.h:122
uint32_t AUTO_HAMMING_LOSS
Definition: search.cc:49
polylabel l
Definition: example.h:57
constexpr uint64_t a
Definition: rand48.cc:11
void set_task_data(T *data)
Definition: search.h:84
predictor & set_condition_range(ptag hi, ptag count, char name0)
Definition: search.cc:3441
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
predictor & set_tag(ptag tag)
Definition: search.cc:3454
std::vector< std::string > interactions
Definition: global_data.h:457
void predict(bfgs &b, base_learner &, example &ec)
Definition: bfgs.cc:956
void delete_v()
Definition: v_array.h:98
uint32_t ptag
Definition: search.h:20
void takedown(Search::search &sch, multi_ex &ec)
uint32_t EXAMPLES_DONT_CHANGE
Definition: search.cc:49
bool predictNeedsExample()
Definition: search.cc:3041
v_array< wclass > costs
void set_num_learners(size_t num_learners)
Definition: search.cc:3094
predictor & set_input(example &input_example)
Definition: search.cc:3173
float f
Definition: cache.cc:40
void run(Search::search &sch, multi_ex &ec)
void loss(float incr_loss)
Definition: search.cc:3039