Vowpal Wabbit
Classes | Enumerations | Functions | Variables
SequenceSpanTask Namespace Reference

Classes

struct  task_data
 

Enumerations

enum  EncodingType { BIO, BILOU }
 

Functions

action bilou_to_bio (action y)
 
void convert_bio_to_bilou (multi_ex &ec)
 
void initialize (Search::search &sch, size_t &num_actions, options_i &options)
 
void finish (Search::search &sch)
 
void setup (Search::search &sch, multi_ex &ec)
 
void takedown (Search::search &sch, multi_ex &ec)
 
void run (Search::search &sch, multi_ex &ec)
 

Variables

Search::search_task task = {"sequencespan", run, initialize, finish, setup, takedown}
 

Enumeration Type Documentation

◆ EncodingType

Enumerator
BIO 
BILOU 

Definition at line 62 of file search_sequencetask.cc.

Function Documentation

◆ bilou_to_bio()

action SequenceSpanTask::bilou_to_bio ( action  y)
inline

Definition at line 92 of file search_sequencetask.cc.

Referenced by convert_bio_to_bilou(), run(), and takedown().

93 {
94  return y / 2 + 1; // out -> out, {unit,begin} -> begin; {in,last} -> in
95 }

◆ convert_bio_to_bilou()

void SequenceSpanTask::convert_bio_to_bilou ( multi_ex ec)

Definition at line 97 of file search_sequencetask.cc.

References bilou_to_bio(), and MULTICLASS::label_t::label.

Referenced by setup().

98 {
99  for (size_t n = 0; n < ec.size(); n++)
100  {
101  MULTICLASS::label_t& ylab = ec[n]->l.multi;
102  action y = ylab.label;
103  action nexty = (n == ec.size() - 1) ? 0 : ec[n + 1]->l.multi.label;
104  if (y == 1) // do nothing
105  ;
106  else if (y % 2 == 0) // this is a begin-X
107  {
108  if (nexty != y + 1) // should be unit
109  ylab.label = (y / 2 - 1) * 4 + 2; // from 2 to 2, 4 to 6, 6 to 10, etc.
110  else // should be begin-X
111  ylab.label = (y / 2 - 1) * 4 + 3; // from 2 to 3, 4 to 7, 6 to 11, etc.
112  }
113  else if (y % 2 == 1) // this is an in-X
114  {
115  if (nexty != y) // should be last
116  ylab.label = (y - 1) * 2 + 1; // from 3 to 5, 5 to 9, 7 to 13, etc.
117  else // should be in-X
118  ylab.label = (y - 1) * 2; // from 3 to 4, 5 to 8, 7 to 12, etc.
119  }
120  assert(y == bilou_to_bio(ylab.label));
121  }
122 }
uint32_t action
Definition: search.h:19
action bilou_to_bio(action y)

◆ finish()

void SequenceSpanTask::finish ( Search::search sch)

Definition at line 181 of file search_sequencetask.cc.

References SequenceSpanTask::task_data::allowed_actions, v_array< T >::delete_v(), Search::search::get_task_data(), and SequenceSpanTask::task_data::only_two_allowed.

182 {
183  task_data* D = sch.get_task_data<task_data>();
184  D->allowed_actions.delete_v();
185  D->only_two_allowed.delete_v();
186  delete D;
187 }
T * get_task_data()
Definition: search.h:89

◆ initialize()

void SequenceSpanTask::initialize ( Search::search sch,
size_t &  num_actions,
options_i options 
)

Definition at line 132 of file search_sequencetask.cc.

References VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), SequenceSpanTask::task_data::allowed_actions, Search::AUTO_CONDITION_FEATURES, Search::AUTO_HAMMING_LOSS, BILOU, BIO, v_array< T >::clear(), SequenceSpanTask::task_data::encoding, Search::EXAMPLES_DONT_CHANGE, VW::config::make_option(), SequenceSpanTask::task_data::multipass, SequenceSpanTask::task_data::only_two_allowed, v_array< T >::push_back(), Search::search::set_num_learners(), Search::search::set_options(), and Search::search::set_task_data().

133 {
134  task_data* D = new task_data();
135 
136  bool search_span_bilou = false;
137  option_group_definition new_options("search sequencespan options");
138  new_options
139  .add(make_option("search_span_bilou", search_span_bilou)
140  .help("switch to (internal) BILOU encoding instead of BIO encoding"))
141  .add(make_option("search_span_multipass", D->multipass).default_value(1).help("do multiple passes"));
142  options.add_and_parse(new_options);
143 
144  if (search_span_bilou)
145  {
146  std::cerr << "switching to BILOU encoding for sequence span labeling" << std::endl;
147  D->encoding = BILOU;
148  num_actions = num_actions * 2 - 1;
149  }
150  else
151  D->encoding = BIO;
152 
153  D->allowed_actions.clear();
154 
155  if (D->encoding == BIO)
156  {
157  D->allowed_actions.push_back(1);
158  for (action l = 2; l < num_actions; l += 2) D->allowed_actions.push_back(l);
159  D->allowed_actions.push_back(1); // push back an extra 1 that we can overwrite later if we want
160  }
161  else if (D->encoding == BILOU)
162  {
163  D->allowed_actions.push_back(1);
164  for (action l = 2; l < num_actions; l += 4)
165  {
166  D->allowed_actions.push_back(l);
167  D->allowed_actions.push_back(l + 1);
168  }
169  D->only_two_allowed.push_back(0);
170  D->only_two_allowed.push_back(0);
171  }
172 
173  sch.set_task_data<task_data>(D);
174  sch.set_options(Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please
175  Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss
176  Search::EXAMPLES_DONT_CHANGE | // we don't do any internal example munging
177  0);
178  sch.set_num_learners(D->multipass);
179 }
uint32_t action
Definition: search.h:19
virtual void add_and_parse(const option_group_definition &group)=0
uint32_t AUTO_CONDITION_FEATURES
Definition: search.cc:49
void set_options(uint32_t opts)
Definition: search.cc:3053
uint32_t AUTO_HAMMING_LOSS
Definition: search.cc:49
void set_task_data(T *data)
Definition: search.h:84
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
uint32_t EXAMPLES_DONT_CHANGE
Definition: search.cc:49
void set_num_learners(size_t num_learners)
Definition: search.cc:3094

◆ run()

void SequenceSpanTask::run ( Search::search sch,
multi_ex ec 
)

Definition at line 208 of file search_sequencetask.cc.

References Search::predictor::add_allowed(), Search::predictor::add_condition_range(), SequenceSpanTask::task_data::allowed_actions, BILOU, bilou_to_bio(), BIO, SequenceSpanTask::task_data::encoding, Search::search::get_history_length(), Search::search::get_task_data(), SequenceSpanTask::task_data::multipass, Search::search::output(), Search::predictor::predict(), Search::predictor::set_allowed(), Search::predictor::set_condition_range(), Search::predictor::set_input(), Search::predictor::set_learner_id(), Search::predictor::set_oracle(), and Search::predictor::set_tag().

209 {
210  task_data& D = *sch.get_task_data<task_data>();
211  v_array<action>* y_allowed = &(D.allowed_actions);
212  Search::predictor P(sch, (ptag)0);
213  for (size_t pass = 1; pass <= D.multipass; pass++)
214  {
215  action last_prediction = 1;
216  for (size_t i = 0; i < ec.size(); i++)
217  {
218  action oracle = ec[i]->l.multi.label;
219  size_t len = y_allowed->size();
220  P.set_tag((ptag)i + 1);
221  P.set_learner_id(pass - 1);
222  if (D.encoding == BIO)
223  {
224  if (last_prediction == 1)
225  P.set_allowed(y_allowed->begin(), len - 1);
226  else if (last_prediction % 2 == 0)
227  {
228  (*y_allowed)[len - 1] = last_prediction + 1;
229  P.set_allowed(*y_allowed);
230  }
231  else
232  {
233  (*y_allowed)[len - 1] = last_prediction;
234  P.set_allowed(*y_allowed);
235  }
236  if ((oracle > 1) && (oracle % 2 == 1) && (last_prediction != oracle) && (last_prediction != oracle - 1))
237  oracle = 1; // if we are supposed to I-X, but last wasn't B-X or I-X, then say O
238  }
239  else if (D.encoding == BILOU)
240  {
241  if ((last_prediction == 1) || ((last_prediction - 2) % 4 == 0) ||
242  ((last_prediction - 2) % 4 == 3)) // O or unit-X or last-X
243  {
244  P.set_allowed(D.allowed_actions);
245  // we cannot allow in-X or last-X next
246  if ((oracle > 1) && (((oracle - 2) % 4 == 2) || ((oracle - 2) % 4 == 3)))
247  oracle = 1;
248  }
249  else // begin-X or in-X
250  {
251  action other = ((last_prediction - 2) % 4 == 1) ? (last_prediction + 2) : last_prediction;
252  P.set_allowed(last_prediction + 1);
253  P.add_allowed(other);
254  if ((oracle != last_prediction + 1) && (oracle != other))
255  oracle = other;
256  }
257  }
258  P.set_input(*ec[i]);
259  P.set_condition_range((ptag)i, sch.get_history_length(), 'p');
260  if (pass > 1)
261  P.add_condition_range((ptag)(i + 1 + sch.get_history_length()), sch.get_history_length() + 1, 'a');
262  P.set_oracle(oracle);
263  last_prediction = P.predict();
264 
265  if ((pass == D.multipass) && sch.output().good())
266  sch.output() << ((D.encoding == BIO) ? last_prediction : bilou_to_bio(last_prediction)) << ' ';
267  }
268  }
269 }
uint32_t get_history_length()
Definition: search.cc:3098
std::stringstream & output()
Definition: search.cc:3043
uint32_t action
Definition: search.h:19
action bilou_to_bio(action y)
T * get_task_data()
Definition: search.h:89
uint32_t ptag
Definition: search.h:20

◆ setup()

void SequenceSpanTask::setup ( Search::search sch,
multi_ex ec 
)

◆ takedown()

void SequenceSpanTask::takedown ( Search::search sch,
multi_ex ec 
)

Definition at line 196 of file search_sequencetask.cc.

References BILOU, bilou_to_bio(), SequenceSpanTask::task_data::encoding, Search::search::get_task_data(), and MULTICLASS::label_t::label.

197 {
198  task_data& D = *sch.get_task_data<task_data>();
199 
200  if (D.encoding == BILOU)
201  for (size_t n = 0; n < ec.size(); n++)
202  {
203  MULTICLASS::label_t ylab = ec[n]->l.multi;
204  ylab.label = bilou_to_bio(ylab.label);
205  }
206 }
action bilou_to_bio(action y)
T * get_task_data()
Definition: search.h:89

Variable Documentation

◆ task

Search::search_task SequenceSpanTask::task = {"sequencespan", run, initialize, finish, setup, takedown}

Definition at line 17 of file search_sequencetask.cc.