25 if (index >= array.
size())
27 std::cout <<
"ERROR: index is larger than the size" << std::endl;
30 if (index == array.
size() - 1)
35 for (
size_t i = index + 1; i < array.
size(); i++)
37 array[i - 1] = array[i];
74 float denominator = pow(norm_sq1 * norm_sq2, 0.5
f);
78 while (idx1 < f1.
size() && idx2 < f2.
size())
85 else if (ec1pos > ec2pos)
90 total_sum_feat_sq += f1.
values[idx1] * f2.
values[idx2] / denominator;
97 int cmpfunc(
const void*
a,
const void* b) {
return *(
char*)a - *(
char*)b; }
164 examples_index = v_init<uint32_t>();
216 nodes = v_init<node>();
217 examples = v_init<example*>();
247 if (fs_2.indicies.size() == 0)
250 for (
size_t idx1 = 0, idx2 = 0; idx1 < fs_1.
size() && idx2 < fs_2.size(); idx1++)
252 uint64_t ec1pos = fs_1.
indicies[idx1];
253 uint64_t ec2pos = fs_2.indicies[idx2];
257 while (ec1pos > ec2pos && ++idx2 < fs_2.size()) ec2pos = fs_2.indicies[idx2];
259 if (ec1pos == ec2pos)
261 dotprod += fs_1.
values[idx1] * fs_2.values[idx2];
278 return linear_prod / norm_sqrt;
298 b.
nodes[0].internal = -1;
301 b.
kprod_ec = &calloc_or_throw<example>();
305 std::cout <<
"tree initiazliation is done...." << std::endl
306 <<
"max nodes " << b.
max_nodes << std::endl
307 <<
"tree size: " << b.
nodes.
size() << std::endl
308 <<
"max number of unique labels: " << b.
max_num_labels << std::endl
310 <<
"num of dream operations per example: " << b.
dream_repeats << std::endl
312 <<
"oas: " << b.
oas << std::endl;
335 while (b.
nodes[cn].internal == 1)
338 if (b.
nodes[cn].nl < 1)
340 else if (b.
nodes[cn].nr < 1)
342 else if ((b.
nodes[cn].nl >= 1) && (b.
nodes[cn].nr >= 1))
347 std::cout << cn <<
" " << b.
nodes[cn].nl <<
" " << b.
nodes[cn].nr << std::endl;
348 std::cout <<
"Error: nl = 0, and nr = 0, exit...";
355 cn = b.
nodes[cn].left;
360 cn = b.
nodes[cn].right;
364 if (b.
nodes[cn].examples_index.
size() >= 1)
367 uint32_t ec_id = b.
nodes[cn].examples_index[loc_at_leaf];
382 uint32_t save_multi_pred = 0;
401 float weighted_value =
402 (float)((1. - b.
alpha) * log(b.
nodes[cn].nl / (b.
nodes[cn].nr + 1e-1)) / log(2.) + b.
alpha * prediction);
403 float route_label = weighted_value < 0.f ? -1.f : 1.f;
406 float ec_input_weight = ec.
weight;
408 ec.
l.
simple = {route_label, 1., 0.f};
424 ec.
weight = ec_input_weight;
426 return save_binary_scalar;
434 b.
nodes[cn].internal = 1;
435 uint32_t left_child = (uint32_t)b.
nodes.
size();
437 b.
nodes[left_child].internal = -1;
439 uint32_t right_child = (uint32_t)b.
nodes.
size();
441 b.
nodes[right_child].internal = -1;
447 std::cout <<
"depth " << b.
max_depth << std::endl;
450 b.
nodes[cn].left = left_child;
451 b.
nodes[cn].right = right_child;
452 b.
nodes[left_child].parent = cn;
453 b.
nodes[right_child].parent = cn;
454 b.
nodes[left_child].depth = b.
nodes[cn].depth + 1;
455 b.
nodes[right_child].depth = b.
nodes[cn].depth + 1;
461 for (
size_t ec_id = 0; ec_id < b.
nodes[cn].examples_index.
size(); ec_id++)
463 uint32_t ec_pos = b.
nodes[cn].examples_index[ec_id];
465 uint32_t save_multi_pred = 0;
471 save_multi_pred = b.
examples[ec_pos]->pred.multiclass;
475 multilabels = b.
examples[ec_pos]->l.multilabels;
476 preds = b.
examples[ec_pos]->pred.multilabels;
479 b.
examples[ec_pos]->l.simple = {1.f, 1.f, 0.f};
498 b.
examples[ec_pos]->pred.multiclass = save_multi_pred;
502 b.
examples[ec_pos]->pred.multilabels = preds;
507 b.
nodes[cn].nl = std::max(
double(b.
nodes[left_child].examples_index.
size()), 0.001);
508 b.
nodes[cn].nr = std::max(
double(b.
nodes[right_child].examples_index.
size()), 0.001);
517 int compare_label(
const void*
a,
const void* b) {
return *(uint32_t*)a - *(uint32_t*)b; }
521 uint32_t num_overlap = 0;
528 while (idx1 < array_1.
size() && idx2 < array_2.
size())
530 uint32_t c1 = array_1[idx1];
531 uint32_t c2 = array_2[idx2];
549 uint32_t overlap =
over_lap(array_1, array_2);
550 return (uint32_t)(array_1.
size() + array_2.
size() - 2 * overlap);
555 if (b.
nodes[cn].internal != -1)
556 std::cout <<
"something is wrong, it should be a leaf node" << std::endl;
559 for (
size_t i = 0; i < b.
nodes[cn].examples_index.
size(); i++)
561 uint32_t loc = b.
nodes[cn].examples_index[i];
562 for (uint32_t lab : b.
examples[loc]->l.multilabels.label_v)
576 ec.
l.
simple = {FLT_MAX, 1.f, 0.f};
577 for (
size_t i = 0; i < leaf_labs.
size(); i++)
596 ec.
l.
simple = {FLT_MAX, 1.f, 0.f};
597 for (
size_t i = 0; i < leaf_labs.
size(); i++)
613 if (b.
nodes[cn].examples_index.
size() > 0)
615 float max_score = -FLT_MAX;
616 int64_t max_pos = -1;
617 for (
size_t i = 0; i < b.
nodes[cn].examples_index.
size(); i++)
620 uint32_t loc = b.
nodes[cn].examples_index[i];
635 if (score > max_score)
638 max_pos = (int64_t)loc;
659 if (num_overlaps == 0.
f)
663 return 2.f * (v1 * v2 / (v1 + v2));
669 uint32_t save_multi_pred = 0;
685 while (b.
nodes[cn].internal == 1)
703 int64_t closest_ec = 0;
707 if (closest_ec != -1)
722 if (closest_ec != -1)
737 uint32_t save_multi_pred = 0;
750 ec.
l.
simple = {FLT_MAX, 1., 0.0};
751 while (b.
nodes[cn].internal != -1)
755 cn = prediction < 0 ? b.
nodes[cn].left : b.
nodes[cn].right;
770 int64_t closest_ec = 0;
780 if (closest_ec != -1)
806 if (b.
nodes[leaf_id].examples_index.
size() > 0)
808 uint32_t pos = uint32_t(b.
_random_state->get_and_update_random() * b.
nodes[leaf_id].examples_index.
size());
809 ec_id = b.
nodes[leaf_id].examples_index[pos];
830 uint32_t save_multi_pred = 0;
845 ec.
l.
simple = {FLT_MAX, 1.0, 0.0};
846 while (b.
nodes[cn].internal != -1)
851 if (insertion ==
false)
852 cn = prediction < 0 ? b.
nodes[cn].left : b.
nodes[cn].right;
870 if (insertion ==
true)
882 route_to_leaf(b, base, ec_array_index, 0, path_to_leaf,
false);
884 if (path_to_leaf.
size() > 1)
887 uint32_t random_pos = (uint32_t)(b.
_random_state->get_and_update_random() * (path_to_leaf.
size()));
888 uint64_t cn = path_to_leaf[random_pos];
890 if (b.
nodes[cn].internal != -1)
892 float objective = 0.f;
893 float prob_right = 0.5;
894 float coin = b.
_random_state->get_and_update_random() < prob_right ? 1.f : -1.f;
895 float weight = path_to_leaf.
size() * 1.f / (path_to_leaf.
size() - 1.f);
899 objective = (float)((1. - b.
alpha) * log(b.
nodes[cn].nl / b.
nodes[cn].nr) +
900 b.
alpha * (-reward_left_subtree / (1. - prob_right)) / 2.);
905 objective = (float)((1. - b.
alpha) * log(b.
nodes[cn].nl / b.
nodes[cn].nr) +
906 b.
alpha * (reward_right_subtree / prob_right) / 2.);
909 float ec_input_weight = ec.
weight;
922 ec.
weight = fabs(objective);
925 else if (ec.
weight < .01f)
927 ec.
l.
simple = {objective < 0. ? -1.f : 1.f, 1.f, 0.};
937 ec.
weight = ec_input_weight;
944 b, base, cn, ec, weight);
964 while (b.
nodes[cn].internal == 1)
976 if ((b.
nodes[cn].internal == -1) && (fake_insert ==
false))
1027 if (b.
iter % 5000 == 0)
1034 std::cout <<
"at iter " << b.
iter <<
", avg hamming loss: " << b.
hamming_loss * 1. / b.
iter << std::endl;
1037 clock_t begin = clock();
1041 example* new_ec = &calloc_or_throw<example>();
1061 if (b.
iter % 5000 == 0)
1064 std::cout <<
"at iter " << b.
iter <<
", pred error: " << b.
num_mistakes * 1. / b.
iter << std::endl;
1066 std::cout <<
"at iter " << b.
iter <<
", avg hamming loss: " << b.
hamming_loss * 1. / b.
iter << std::endl;
1068 clock_t begin = clock();
1070 b.
test_time += float(clock() - begin) / CLOCKS_PER_SEC;
1077 std::cout <<
"######### Current Pass: " << b.
current_pass 1078 <<
", with number of memories strored so far: " << b.
examples.
size() << std::endl;
1112 for (uint32_t i = 0; i < tag_number; i++) ec->
tag.
push_back(
'a');
1114 for (uint32_t i = 0; i < tag_number; i++)
writeit(ec->
tag[i],
"tag");
1121 for (uint32_t i = 0; i < namespace_size; i++)
1126 for (uint32_t i = 0; i < namespace_size; i++)
writeit(ec->
indices[i],
"namespace_index");
1136 fs->
values = v_init<feature_value>();
1137 fs->
indicies = v_init<feature_index>();
1138 for (uint32_t f_i = 0; f_i < feat_size; f_i++)
1143 for (uint32_t f_i = 0; f_i < feat_size; f_i++)
writeit(fs->
values[f_i],
"value");
1144 for (uint32_t f_i = 0; f_i < feat_size; f_i++)
writeit(fs->
indicies[f_i],
"index");
1169 std::stringstream msg;
1201 for (uint32_t i = 0; i < n_nodes; i++)
1210 for (uint32_t i = 0; i < n_examples; i++)
1212 example* new_ec = &calloc_or_throw<example>();
1226 auto tree = scoped_calloc_or_throw<memory_tree>();
1233 .help(
"Make a memory tree with at most <n> nodes"))
1236 .help(
"max number of unique label"))
1237 .
add(
make_option(
"leaf_example_multiplier", tree->leaf_example_multiplier)
1239 .help(
"multiplier on examples per leaf (default = log nodes)"))
1240 .
add(
make_option(
"alpha", tree->alpha).default_value(0.1
f).help(
"Alpha"))
1243 .help(
"number of dream operations per example (default = 1)"))
1244 .
add(
make_option(
"top_K", tree->top_K).default_value(1).help(
"top K prediction error (default 1)"))
1245 .
add(
make_option(
"learn_at_leaf", tree->learn_at_leaf).help(
"whether or not learn at leaf (defualt = True)"))
1249 .help(
"turn on dream operations at reward based update as well"))
1250 .
add(
make_option(
"online", tree->online).help(
"turn on dream operations at reward based update as well"));
1252 if (!tree->max_nodes)
1259 tree->current_pass = 0;
1262 tree->max_leaf_examples = (size_t)(tree->leaf_example_multiplier * (log(tree->max_nodes) / log(2)));
1269 <<
"max_nodes = " << tree->max_nodes <<
" " 1270 <<
"max_leaf_examples = " << tree->max_leaf_examples <<
" " 1271 <<
"alpha = " << tree->alpha <<
" " 1272 <<
"oas = " << tree->oas <<
" " 1273 <<
"online =" << tree->online <<
" " << std::endl;
1275 size_t num_learners = 0;
1278 if (tree->oas ==
false)
1280 num_learners = tree->max_nodes + 1;
1291 num_learners = tree->max_nodes + 1 + tree->max_num_labels;
v_array< namespace_index > indices
void update_rew(memory_tree &b, single_learner &base, const uint32_t &ec_array_index, example &ec)
void remove_at_index(v_array< T > &array, uint32_t index)
base_learner * memory_tree_setup(options_i &options, vw &all)
void predict(E &ec, size_t i=0)
void diag_kronecker_prod_fs_test(features &f1, features &f2, features &prod_f, float &total_sum_feat_sq, float norm_sq1, float norm_sq2)
void(* delete_prediction)(void *)
uint32_t total_num_queries
void predict(memory_tree &b, single_learner &base, example &ec)
void push_back(feature_value v, feature_index i)
void learn_at_leaf_random(memory_tree &b, single_learner &base, const uint64_t &leaf_id, example &ec, const float &weight)
void(* delete_label)(void *)
void copy_example_data(bool audit, example *dst, example *src)
void copy_array(v_array< T > &dst, const v_array< T > &src)
v_array< feature_index > indicies
label_type::label_type_t label_type
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
void split_leaf(memory_tree &b, single_learner &base, const uint64_t cn)
the core definition of a set of features.
v_array< uint32_t > examples_index
base_learner * make_base(learner< T, E > &base)
v_array< act_score > path
v_array< feature_value > values
virtual void add_and_parse(const option_group_definition &group)=0
void set_save_load(void(*sl)(T &, io_buf &, bool, bool))
void free_flatten_example(flat_example *fec)
size_t leaf_example_multiplier
uint64_t insert_descent(node &n, const float prediction)
void save_load_example(example *ec, io_buf &model_file, bool &read, bool &text, std::stringstream &msg, bool &oas)
#define writeit(what, str)
uint32_t over_lap(v_array< uint32_t > &array_1, v_array< uint32_t > &array_2)
void train_node(log_multi &b, single_learner &base, example &ec, uint32_t ¤t, uint32_t &class_index, uint32_t)
float return_reward_from_node(memory_tree &b, single_learner &base, uint64_t cn, example &ec, float weight=1.f)
int compare_label(const void *a, const void *b)
void insert_example(memory_tree &b, single_learner &base, const uint32_t &ec_array_index, bool fake_insert=false)
std::shared_ptr< rand_state > get_random_state()
std::array< features, NUM_NAMESPACES > feature_space
single_learner * as_singleline(learner< T, E > *l)
MULTICLASS::label_t multi
float get_overlap_from_two_examples(example &ec1, example &ec2)
void diag_kronecker_product_test(example &ec1, example &ec2, example &ec, bool oas=false)
float normalized_linear_prod(memory_tree &b, example *ec1, example *ec2)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void push_back(const T &new_ele)
void save_load_memory_tree(memory_tree &b, io_buf &model_file, bool read, bool text)
void end_pass(example &ec, vw &all)
uint32_t compute_hamming_loss_via_oas(memory_tree &b, single_learner &base, const uint64_t cn, example &ec, v_array< uint32_t > &selected_labs)
void collect_labels_from_leaf(memory_tree &b, const uint64_t cn, v_array< uint32_t > &leaf_labs)
unsigned char namespace_index
float linear_kernel(const flat_example *fec1, const flat_example *fec2)
void single_query_and_learn(memory_tree &b, single_learner &base, const uint32_t &ec_array_index, example &ec)
void experience_replay(memory_tree &b, single_learner &base)
void free_example(example *ec)
int add(svm_params ¶ms, svm_example *fec)
flat_example * flatten_sort_example(vw &all, example *ec)
MULTILABEL::labels multilabels
v_array< example * > examples
typed_option< T > make_option(std::string name, T &location)
MULTILABEL::labels multilabels
v_array< uint32_t > label_v
void train_one_against_some_at_leaf(memory_tree &b, single_learner &base, const uint64_t cn, example &ec)
void set_end_pass(void(*f)(T &))
void save_load_node(node &cn, io_buf &model_file, bool &read, bool &text, std::stringstream &msg)
void init_tree(log_multi &d)
std::shared_ptr< rand_state > _random_state
int64_t pick_nearest(memory_tree &b, single_learner &base, const uint64_t cn, example &ec)
learner< T, E > & init_multiclass_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), parser *p, size_t ws, prediction_type::prediction_type_t pred_type=prediction_type::multiclass)
void route_to_leaf(memory_tree &b, single_learner &base, const uint32_t &ec_array_index, uint64_t cn, v_array< uint64_t > &path, bool insertion)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void learn(memory_tree &b, single_learner &base, example &ec)
void learn(E &ec, size_t i=0)
bool v_array_contains(v_array< T > &A, T x)
void copy_example_data(example *dst, example *src, bool oas=false)
int random_sample_example_pop(memory_tree &b, uint64_t &cn)
int cmpfunc(const void *a, const void *b)
uint32_t hamming_loss(v_array< uint32_t > &array_1, v_array< uint32_t > &array_2)
#define writeitvar(what, str, mywhat)
float F1_score_for_two_examples(example &ec1, example &ec2)