15 #define val_namespace 100 // valency and distance feature space 16 #define offset_const 344429 63 .help(
"Ensure that there is only one root in each sentence"));
64 new_options.add(
make_option(
"num_label", data->
num_label).keep().default_value(12).help(
"Number of arc labels"));
68 .help(
"1: arc-hybrid 2: arc-eager"));
71 .help(
"Using one learner instead of three learners for labeled parser"));
74 .help(
"Estimating cost-to-go matrix based on dynamic oracle rathan than rolling-out"));
90 const char *pair[] = {
91 "BC",
"BE",
"BB",
"CC",
"DD",
"EE",
"FF",
"GG",
"EF",
"BH",
"BJ",
"EL",
"dB",
"dC",
"dD",
"dE",
"dF",
"dG",
"dd"};
92 const char *triple[] = {
"EFG",
"BEF",
"BCE",
"BCD",
"BEL",
"ELM",
"BHI",
"BCC",
"BEJ",
"BEH",
"BJK",
"BEN"};
93 std::vector<std::string> newpairs(pair, pair + 19);
94 std::vector<std::string> newtriples(triple, triple + 12);
95 all.
pairs.swap(newpairs);
131 example &ex, uint64_t idx,
unsigned char ns, uint64_t mask, uint64_t multiplier,
bool =
false)
133 ex.
feature_space[(int)ns].push_back(1.0
f, (idx * multiplier) & mask);
137 uint64_t offset,
bool =
false)
143 tgt_fs.
push_back(1.0
f, ((i / multiplier + offset) * multiplier) & mask);
150 for (
features &fs : *ex) fs.clear();
165 else if (a_id == REDUCE_RIGHT)
167 uint32_t last = stack.last();
168 uint32_t hd = stack[stack.size() - 2];
170 children[5][hd] = children[4][hd];
171 children[4][hd] = last;
174 sch.
loss(gold_heads[last] != heads[last] ? 2 : (gold_tags[last] != t_id) ? 1.
f : 0.
f);
175 assert(!stack.empty());
179 else if (a_id == REDUCE_LEFT)
181 size_t last = stack.last();
184 children[3][hd] = children[2][hd];
185 children[2][hd] = (uint32_t)last;
188 sch.
loss(gold_heads[last] != heads[last] ? 2 : (gold_tags[last] != t_id) ? 1.
f : 0.
f);
189 assert(!stack.empty());
193 THROW(
"transition_hybrid failed");
208 else if (a_id == REDUCE_RIGHT)
210 uint32_t hd = stack.last();
211 stack.push_back(idx);
214 children[5][hd] = children[4][hd];
215 children[4][hd] = last;
218 sch.
loss(gold_heads[last] != heads[last] ? 2 : (gold_tags[last] != t_id) ? 1.
f : 0.
f);
221 else if (a_id == REDUCE_LEFT)
223 size_t last = stack.last();
224 uint32_t hd = (idx > n) ? 0 : idx;
226 children[3][hd] = children[2][hd];
227 children[2][hd] = (uint32_t)last;
230 sch.
loss(gold_heads[last] != heads[last] ? 2 : (gold_tags[last] != t_id) ? 1.
f : 0.
f);
231 assert(!stack.empty());
235 else if (a_id == REDUCE)
237 assert(!stack.empty());
241 THROW(
"transition_eager failed");
256 size_t n = ec.size();
257 bool empty = stack.
empty();
258 size_t last = empty ? 0 : stack.
last();
260 for (
size_t i = 0; i < 13; i++) ec_buf[i] =
nullptr;
263 for (
size_t i = 0; i < 3; i++)
264 ec_buf[i] = (stack.
size() > i && *(stack.
end() - (i + 1)) != 0) ? ec[*(stack.
end() - (i + 1)) - 1] : 0;
267 for (
size_t i = 3; i < 6; i++) ec_buf[i] = (idx + (i - 3) - 1 < n) ? ec[idx + i - 3 - 1] : 0;
271 for (
size_t i = 6; i < 10; i++)
272 if (!empty && last != 0 &&
children[i - 4][last] != 0)
273 ec_buf[i] = ec[
children[i - 4][last] - 1];
276 for (
size_t i = 10; i < 12; i++)
277 ec_buf[i] = (idx <= n && children[i - 8][idx] != 0) ? ec[children[i - 8][idx] - 1] : 0;
278 ec_buf[12] = (stack.
size() > 1 && *(stack.
end() - 2) != 0 && children[2][*(stack.
end() - 2)] != 0)
279 ? ec[children[2][*(stack.
end() - 2)] - 1]
283 for (
size_t i = 0; i < 13; i++)
285 uint64_t additional_offset = (uint64_t)(i *
offset_const);
287 add_feature(ex, (uint64_t)438129041 + additional_offset, (
unsigned char)((i + 1) +
'A'), mask, multiplier);
289 add_all_features(ex, *ec_buf[i],
'A' + (
unsigned char)(i + 1), mask, multiplier, additional_offset,
false);
294 temp[0] = empty ? 0 : (idx > n ? 1 : 2 + std::min(static_cast<uint32_t>(5), idx - (uint32_t)last));
295 temp[1] = empty ? 1 : 1 + std::min(static_cast<uint32_t>(5), children[0][last]);
296 temp[2] = empty ? 1 : 1 + std::min(static_cast<uint32_t>(5), children[1][last]);
297 temp[3] = idx > n ? 1 : 1 + std::min(static_cast<uint32_t>(5), children[0][idx]);
298 for (
size_t i = 4; i < 8; i++) temp[i] = (!empty && children[i - 2][last] != 0) ? tags[children[i - 2][last]] : 15;
299 for (
size_t i = 8; i < 10; i++) temp[i] = (idx <= n && children[i - 6][idx] != 0) ? tags[children[i - 6][idx]] : 15;
302 for (
size_t j = 0; j < 10; j++)
304 additional_offset += j * 1023;
323 uint64_t stack_depth, uint64_t state)
328 valid_action.
clear();
333 if (stack_depth >= 2)
335 if (stack_depth >= 1 && state != 0 && idx <= n)
341 for (
size_t i = 0; i <= 4; i++) temp.push_back(1);
348 if (stack_depth == 0)
350 else if (idx <= n + 1 && heads[stack.
last()] ==
my_null)
353 if (stack_depth == 0)
361 temp[REDUCE_LEFT] = 0;
362 if (idx <= n && heads[idx] != my_null)
365 for (uint32_t i = 1; i <= 4; i++)
375 for (
size_t i = 0; i < valid_actions.
size(); i++)
376 if (valid_actions[i] == action)
386 size_t size = stack.
size();
387 size_t last = (size == 0) ? 0 : stack.last();
388 for (
size_t i = 1; i <= 4; i++) action_loss[i] = 0;
390 for (
size_t i = 0; i < size; i++)
392 if (gold_heads[stack[i]] == idx && heads[stack[i]] == my_null)
394 action_loss[
SHIFT] += 1;
397 if (idx <= n && (gold_heads[idx] == stack[i]))
400 action_loss[
SHIFT] += 1;
401 if (stack[i] != last)
405 for (
size_t i = idx; i <= n + 1; i++)
407 if (i <= n && gold_heads[i] == last)
412 if (i != idx && gold_heads[last] == i)
418 if (gold_heads[idx] > idx || (gold_heads[idx] == 0 && size > 0 && stack[0] != 0))
419 action_loss[REDUCE_RIGHT] += 1;
426 size_t size = stack.
size();
427 size_t last = (size == 0) ? 0 : stack.last();
429 for (
size_t i = 1; i <= 3; i++) action_loss[i] = 0;
431 for (
size_t i = 0; i < size - 1; i++)
432 if (idx <= n && (gold_heads[stack[i]] == idx || gold_heads[idx] == stack[i]))
433 action_loss[SHIFT] += 1;
435 if (size > 0 && gold_heads[last] == idx)
436 action_loss[
SHIFT] += 1;
438 for (
size_t i = idx + 1; i <= n; i++)
439 if (gold_heads[i] == last || gold_heads[last] == i)
441 if (size > 0 && idx <= n && gold_heads[idx] == last)
443 if (size >= 2 && gold_heads[last] == stack[size - 2])
446 if (gold_heads[last] >= idx)
449 for (
size_t i = idx; i <= n; i++)
450 if (gold_heads[i] == (uint32_t)last)
451 action_loss[REDUCE_RIGHT] += 1;
455 uint32_t left_label, uint32_t right_label)
462 gold_action_losses.clear();
467 gold_action_losses.push_back(std::make_pair(SHIFT, (
float)action_loss[SHIFT]));
468 for (uint32_t i = 2; i <= 3; i++)
471 for (uint32_t j = 1; j <= num_label; j++)
473 gold_action_losses.push_back(std::make_pair((1 + j + (i - 2) * num_label),
474 action_loss[i] + (
float)(j != (i == REDUCE_LEFT ? left_label : right_label))));
477 gold_action_losses.push_back(std::make_pair(2 + num_label * 2, (
float)action_loss[REDUCE]));
481 for (
action i = 1; i <= 3; i++)
483 gold_action_losses.push_back(std::make_pair(i, (
float)action_loss[i]));
485 gold_action_losses.push_back(std::make_pair(REDUCE, (
float)action_loss[REDUCE]));
494 gold_actions.
clear();
495 size_t size = stack.size();
496 size_t last = (size == 0) ? 0 : stack.last();
499 if (sys ==
arc_hybrid &&
is_valid(SHIFT, valid_actions) && (stack.empty() || gold_heads[idx] == last))
505 if (sys ==
arc_hybrid &&
is_valid(REDUCE_LEFT, valid_actions) && gold_heads[last] == idx)
510 size_t best_action = 1;
512 for (uint32_t i = 1; i <= 4; i++)
516 if (action_loss[i] < action_loss[best_action] &&
is_valid(i, valid_actions))
520 gold_actions.
clear();
523 else if (action_loss[i] == action_loss[best_action] &&
is_valid(i, valid_actions))
532 uint32_t left_label, uint32_t right_label)
537 actions_onelearner.
clear();
541 actions_onelearner.
push_back(2 + 2 * num_label);
542 if (left_label != my_null &&
is_valid(REDUCE_RIGHT, actions))
543 actions_onelearner.
push_back(1 + right_label);
544 if (left_label != my_null &&
is_valid(REDUCE_LEFT, actions))
545 actions_onelearner.
push_back(1 + left_label + num_label);
546 if (left_label == my_null &&
is_valid(REDUCE_RIGHT, actions))
547 for (uint32_t i = 0; i < num_label; i++)
550 if (left_label == my_null &&
is_valid(REDUCE_LEFT, actions))
551 for (uint32_t i = 0; i < num_label; i++)
553 actions_onelearner.
push_back((uint32_t)(i + 2 + num_label));
561 size_t n = ec.
size();
565 gold_heads.push_back(0);
567 gold_tags.push_back(0);
568 for (
size_t i = 0; i < n; i++)
574 uint32_t label = costs[0].class_index;
575 head = (label & 255) - 1;
580 head = (costs.
size() == 0) ? 0 : costs[0].class_index;
581 tag = (costs.
size() <= 1) ? (uint32_t)data->
root_label : costs[1].class_index;
584 THROW(
"invalid label " << tag <<
" which is > num actions=" << data->
num_label);
586 gold_heads.push_back(head);
587 gold_tags.push_back(tag);
591 for (
size_t i = 0; i < 6; i++) data->
children[i].
resize(n + (
size_t)1);
606 uint32_t n = (uint32_t)ec.size();
607 uint32_t left_label, right_label;
610 for (
size_t i = 0; i < 6; i++)
611 for (
size_t j = 0; j < n + 1; j++) data->
children[i][j] = 0;
612 for (
size_t i = 0; i < n; i++)
622 if (sys ==
arc_hybrid && stack.size() <= 1 && idx > n)
624 else if (sys ==
arc_eager && stack.size() == 0 && idx > n)
626 bool computedFeatures =
false;
630 computedFeatures =
true;
632 get_valid_actions(sch, valid_actions, idx, n, (uint64_t)stack.size(), stack.empty() ? 0 : stack.last());
639 left_label = stack.empty() ?
my_null : gold_tags[stack.last()];
641 right_label = stack.empty() ?
my_null : gold_tags[stack.last()];
643 right_label = idx <= n ? gold_tags[idx] : (uint32_t)data->
root_label;
645 THROW(
"unknown transition system");
647 uint32_t a_id = 0, t_id = 0;
675 else if (a_id == 2 * num_label + 2)
680 else if (a_id > 1 && a_id - 1 <= num_label)
687 t_id = (uint64_t)a_id - num_label - 1;
718 if (a_id != SHIFT && a_id != REDUCE)
725 gold_action_losses.
clear();
726 for (
size_t i = 1; i <= data->
num_label; i++)
728 std::make_pair((
action)i, i != (a_id == REDUCE_LEFT ? left_label : right_label)));
733 .set_learner_id(a_id - 1)
740 .
set_oracle(a_id == REDUCE_LEFT ? left_label : right_label)
743 .set_learner_id(a_id - 1)
756 heads[stack.last()] = 0;
757 tags[stack.last()] = (uint32_t)data->
root_label;
758 sch.
loss((gold_heads[stack.last()] != heads[stack.last()]));
761 for (
size_t i = 1; i <= n; i++) sch.
output() << (heads[i]) <<
":" << tags[i] << std::endl;
predictor & set_oracle(action a)
void resize(size_t length)
v_array< namespace_index > indices
uint32_t get_history_length()
v_array< uint32_t > heads
vw * setup(options_i &options)
std::vector< std::string > pairs
void push_back(feature_value v, feature_index i)
void add_all_features(example &ex, example &src, unsigned char tgt_ns, uint64_t mask, uint64_t multiplier, uint64_t offset, bool=false)
void set_label_parser(label_parser &lp, bool(*is_test)(polylabel &))
std::stringstream & output()
v_array< std::pair< action, float > > gold_action_losses
void get_gold_actions(Search::search &sch, uint32_t idx, uint64_t, v_array< action > &gold_actions)
void eval_count_of_generated_ft(vw &all, example &ec, size_t &new_features_cnt, float &new_features_value)
std::vector< std::string > * interactions
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
the core definition of a set of features.
uint32_t transition_system
void delete_label(void *v)
constexpr action REDUCE_LEFT
size_t transition_hybrid(Search::search &sch, uint64_t a_id, uint32_t idx, uint32_t t_id, uint32_t)
virtual void add_and_parse(const option_group_definition &group)=0
void finish(vw &all, bool delete_all)
void get_valid_actions(Search::search &sch, v_array< uint32_t > &valid_action, uint64_t idx, uint64_t n, uint64_t stack_depth, uint64_t state)
v_array< uint32_t > action_loss
example * alloc_examples(size_t, size_t count=1)
void reset_ex(example *ex)
v_array< uint32_t > children[6]
std::array< features, NUM_NAMESPACES > feature_space
uint32_t AUTO_CONDITION_FEATURES
void add_feature(example &ex, uint64_t idx, unsigned char ns, uint64_t mask, uint64_t multiplier, bool=false)
vw & get_vw_pointer_unsafe()
void push_back(const T &new_ele)
predictor & erase_alloweds()
v_array< uint32_t > gold_tags
void get_cost_to_go_losses(Search::search &sch, v_array< std::pair< action, float >> &gold_action_losses, uint32_t left_label, uint32_t right_label)
v_array< uint32_t > valid_action_temp
unsigned char namespace_index
void extract_features(Search::search &sch, uint32_t idx, multi_ex &ec)
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
v_array< uint32_t > stack
v_array< action > gold_actions
void set_options(uint32_t opts)
size_t transition_eager(Search::search &sch, uint64_t a_id, uint32_t idx, uint32_t t_id, uint32_t n)
constexpr uint32_t my_null
bool is_valid(uint64_t action, v_array< uint32_t > valid_actions)
predictor & set_allowed(action a)
std::vector< std::string > triples
std::vector< example * > multi_ex
void set_task_data(T *data)
predictor & set_condition_range(ptag hi, ptag count, char name0)
typed_option< T > make_option(std::string name, T &location)
v_array< uint32_t > valid_actions
predictor & set_tag(ptag tag)
std::vector< std::string > interactions
constexpr action REDUCE_RIGHT
v_array< uint32_t > gold_heads
void get_eager_action_cost(Search::search &sch, uint32_t idx, uint64_t n)
void get_hybrid_action_cost(Search::search &sch, size_t idx, uint64_t n)
bool predictNeedsExample()
constexpr unsigned char constant_namespace
bool children(log_multi &b, uint32_t ¤t, uint32_t &class_index, uint32_t label)
void set_num_learners(size_t num_learners)
void convert_to_onelearner_actions(Search::search &sch, v_array< action > &actions, v_array< action > &actions_onelearner, uint32_t left_label, uint32_t right_label)
predictor & set_input(example &input_example)
v_array< action > gold_action_temp
void loss(float incr_loss)
void run(Search::search &sch, multi_ex &ec)