57 size_t num_features = 0;
62 for (
size_t i = 0; i < (*ec_seq).size(); i++)
64 num_features += (*ec_seq)[i]->num_features;
66 bool labeled_example =
true;
69 for (uint32_t i = 0; i < preds.
size(); i++)
72 loss += l * preds[i].score;
76 labeled_example =
false;
78 bool holdout_example = labeled_example;
79 for (
size_t i = 0; i < ec_seq->size(); i++) holdout_example &= (*ec_seq)[i]->test_only;
81 all.
sd->
update(holdout_example, labeled_example, loss, ec.
weight, num_features);
87 std::string outputString;
88 std::stringstream outputStringStream(outputString);
91 for (
size_t i = 0; i < costs.
size(); i++)
94 outputStringStream <<
' ';
95 outputStringStream << costs[i].action <<
':' << costs[i].partial_prediction;
105 if (ec_seq.size() > 0)
115 if (ec_seq.size() > 0)
123 template <
bool is_learn>
128 if (label_example !=
nullptr)
133 multiline_learn_or_predict<false>(base, ec_seq, data.
offset);
135 if (label_example !=
nullptr)
139 if (label_example !=
nullptr && is_learn)
143 float action_probability = 0;
144 for (
size_t i = 0; i < a_s.
size(); i++)
146 action_probability = a_s[i].score;
155 if (threshold > 1. + 1e-6)
163 if (ec->l.cb.costs.size() == 1 && ec->l.cb.costs[0].cost != FLT_MAX && ec->l.cb.costs[0].probability > 0)
168 ec_found->
l.
cb.
costs[0].probability = action_probability;
170 multiline_learn_or_predict<true>(base, ec_seq, data.
offset);
174 float inv_threshold = 1.f / threshold;
175 for (
auto& ec : ec_seq) ec->weight *= inv_threshold;
188 auto data = scoped_calloc_or_throw<explore_eval>();
189 bool explore_eval_option =
false;
191 new_options.
add(
make_option(
"explore_eval", explore_eval_option).keep().help(
"Evaluate explore_eval adf policies"))
193 .help(
"Multiplier used to make all rejection sample probabilities <= 1"));
196 if (!explore_eval_option)
203 data->fixed_multiplier =
true;
205 data->multiplier = 1;
208 options.
insert(
"cb_explore_adf",
"");
ACTION_SCORE::action_scores a_s
bool ec_is_example_header(example const &ec)
base_learner * explore_eval_setup(options_i &options, vw &all)
void(* delete_prediction)(void *)
void output_example_seq(vw &all, multi_ex &ec_seq)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
void output_example(vw &all, explore_eval &c, example &ec, multi_ex *ec_seq)
label_type::label_type_t label_type
v_array< int > final_prediction_sink
v_array< cb_class > costs
base_learner * make_base(learner< T, E > &base)
example * test_adf_sequence(multi_ex &ec_seq)
virtual void add_and_parse(const option_group_definition &group)=0
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
std::shared_ptr< rand_state > _random_state
CB::cb_class get_observed_cost(multi_ex &examples)
std::shared_ptr< rand_state > get_random_state()
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores)
void set_finish_example(void(*f)(vw &all, T &, E &))
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
virtual bool was_supplied(const std::string &key)=0
void global_print_newline(const v_array< int > &final_prediction_sink)
void(* print_text)(int, std::string, v_array< char >)
void finish_example(vw &, example &)
virtual void insert(const std::string &key, const std::string &value)=0
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
void do_actual_learning(explore_eval &data, multi_learner &base, multi_ex &ec_seq)
typed_option< T > make_option(std::string name, T &location)
bool example_is_newline_not_header(example &ec, vw &all)
void set_finish(void(*f)(T &))
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void finish(explore_eval &data)
void print_action_score(int f, v_array< action_score > &a_s, v_array< char > &tag)
multi_learner * as_multiline(learner< T, E > *l)