38 std::vector<v_array<COST_SENSITIVE::wclass>>
cs_costs;
40 std::vector<ACTION_SCORE::action_scores>
cb_as;
51 adf_data.
ecs[
a]->pred.a_s.delete_v();
55 for (
auto& as : cb_as) as.delete_v();
60 float loss(
cbify& data, uint32_t label, uint32_t final_prediction)
62 if (label != final_prediction)
73 if (wc.class_index == final_prediction)
85 for (
auto costs : cs_costs)
87 if (costs[0].class_index == final_prediction)
102 for (
size_t a = 0;
a < adf_data.num_actions; ++
a)
104 auto& eca = *adf_data.ecs[
a];
106 auto& lab = eca.l.cb;
117 idx = ((((idx >> ss) * 28904713) + 4832917 * (uint64_t)
a) << ss) & mask;
124 eca.tag.push_back(
'n');
129 template <
bool is_learn,
bool use_cs>
148 uint32_t chosen_action;
151 THROW(
"Failed to sample from pdf");
154 cl.
action = chosen_action + 1;
158 THROW(
"No action with non-zero probability found!");
182 template <
bool is_learn,
bool use_cs>
198 uint32_t chosen_action;
201 THROW(
"Failed to sample from pdf");
204 cl.
action = out_ec.pred.a_s[chosen_action].action + 1;
205 cl.
probability = out_ec.pred.a_s[chosen_action].score;
208 THROW(
"No action with non-zero probability found!");
218 lab.costs.push_back(cl);
231 adf_data.ecs.resize(num_actions);
232 for (
size_t a = 0;
a < num_actions; ++
a)
235 auto& lab = adf_data.ecs[
a]->l.cb;
241 template <
bool is_learn>
245 if (data.
cs_costs.size() < ec_seq.size())
246 data.
cs_costs.resize(ec_seq.size());
247 if (data.
cb_costs.size() < ec_seq.size())
248 data.
cb_costs.resize(ec_seq.size());
249 if (data.
cb_as.size() < ec_seq.size())
250 data.
cb_as.resize(ec_seq.size());
251 for (
size_t i = 0; i < ec_seq.size(); ++i)
253 auto& ec = *ec_seq[i];
256 data.
cb_as[i].clear();
258 ec.pred.a_s = data.
cb_as[i];
263 auto& out_ec = *ec_seq[0];
265 uint32_t chosen_action;
268 THROW(
"Failed to sample from pdf");
271 cl.
action = out_ec.pred.a_s[chosen_action].action + 1;
272 cl.
probability = out_ec.pred.a_s[chosen_action].score;
275 THROW(
"No action with non-zero probability found!");
288 for (
size_t i = 0; i < ec_seq.size(); ++i)
290 auto& ec = *ec_seq[i];
291 data.
cb_as[i] = ec.pred.a_s;
298 ec.pred.multiclass = cl.
action;
300 ec.pred.multiclass = 0;
322 for (
auto const& cost : costs)
326 if (predicted_class == cost.class_index)
341 std::string outputString;
342 std::stringstream outputStringStream(outputString);
343 for (
size_t i = 0; i < costs.
size(); i++)
346 outputStringStream <<
' ';
347 outputStringStream << costs[i].class_index <<
':' << costs[i].partial_prediction;
363 bool hit_loss =
false;
385 uint32_t num_actions = 0;
386 auto data = scoped_calloc_or_throw<cbify>();
393 .help(
"Convert multiclass on <k> classes into a contextual bandit problem"))
394 .
add(
make_option(
"cbify_cs", use_cs).help(
"consume cost-sensitive classification examples instead of multiclass"))
395 .
add(
make_option(
"loss0", data->loss0).default_value(0.f).help(
"loss for correct label"))
396 .
add(
make_option(
"loss1", data->loss1).default_value(1.
f).help(
"loss for incorrect label"));
404 data->a_s = v_init<action_score>();
410 if (!options.
was_supplied(
"cb_explore") && !data->use_adf)
412 std::stringstream ss;
414 options.
insert(
"cb_explore", ss.str());
425 std::stringstream ss;
426 ss << std::max(std::abs(data->loss0), std::abs(data->loss1)) / (data->loss1 - data->loss0);
427 options.
insert(
"lr_multiplier", ss.str());
437 data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, all.
p, 1);
440 data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, all.
p, 1);
447 data, base, predict_or_learn<true, true>, predict_or_learn<false, true>, all.
p, 1);
458 auto data = scoped_calloc_or_throw<cbify>();
459 bool cbify_ldf_option =
false;
463 .
add(
make_option(
"cbify_ldf", cbify_ldf_option).keep().help(
"Convert csoaa_ldf into a contextual bandit problem"))
464 .
add(
make_option(
"loss0", data->loss0).default_value(0.f).help(
"loss for correct label"))
465 .
add(
make_option(
"loss1", data->loss1).default_value(1.
f).help(
"loss for incorrect label"));
473 data->use_adf =
true;
477 options.
insert(
"cb_explore_adf",
"");
484 std::stringstream ss;
485 ss << std::max(std::abs(data->loss0), std::abs(data->loss1)) / (data->loss1 - data->loss0);
486 options.
insert(
"lr_multiplier", ss.str());
void do_actual_learning_ldf(cbify &data, multi_learner &base, multi_ex &ec_seq)
bool example_is_newline_not_header(example const &ec)
ACTION_SCORE::action_scores a_s
void predict(E &ec, size_t i=0)
void(* delete_prediction)(void *)
float loss_csldf(cbify &data, std::vector< v_array< COST_SENSITIVE::wclass >> &cs_costs, uint32_t final_prediction)
std::vector< ACTION_SCORE::action_scores > cb_as
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
void(* delete_label)(void *)
void copy_example_data(bool audit, example *dst, example *src)
void output_example_seq(vw &all, multi_ex &ec_seq)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
void(* default_label)(void *)
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
bool(* test_label)(void *)
base_learner * cbify_setup(options_i &options, vw &all)
void init_adf_data(cbify &data, const size_t num_actions)
v_array< int > final_prediction_sink
learner< T, E > & init_cost_sensitive_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), parser *p, size_t ws, prediction_type::prediction_type_t pred_type=prediction_type::multiclass)
the core definition of a set of features.
v_array< cb_class > costs
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
base_learner * make_base(learner< T, E > &base)
void delete_label(void *v)
virtual void add_and_parse(const option_group_definition &group)=0
bool ec_is_example_header(example const &ec)
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
int example_is_newline(example const &ec)
void copy_example_to_adf(cbify &data, example &ec)
score_iterator begin_scores(action_scores &a_s)
example * alloc_examples(size_t, size_t count=1)
double sum_loss_since_last_dump
score_iterator end_scores(action_scores &a_s)
single_learner * as_singleline(learner< T, E > *l)
void predict_or_learn(cbify &data, single_learner &base, example &ec)
MULTICLASS::label_t multi
std::vector< v_array< COST_SENSITIVE::wclass > > cs_costs
void set_finish_example(void(*f)(vw &all, T &, E &))
base_learner * cbifyldf_setup(options_i &options, vw &all)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
virtual bool was_supplied(const std::string &key)=0
float loss_cs(cbify &data, v_array< COST_SENSITIVE::wclass > &costs, uint32_t final_prediction)
void(* print_text)(int, std::string, v_array< char >)
void finish_example(vw &, example &)
virtual void insert(const std::string &key, const std::string &value)=0
option_group_definition & add(T &&op)
int add(svm_params ¶ms, svm_example *fec)
std::vector< example * > multi_ex
typed_option< T > make_option(std::string name, T &location)
std::vector< v_array< CB::cb_class > > cb_costs
std::vector< std::string > interactions
learner< T, E > & init_multiclass_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), parser *p, size_t ws, prediction_type::prediction_type_t pred_type=prediction_type::multiclass)
double weighted_labeled_examples
void output_example(vw &all, example &ec, bool &hit_loss, multi_ex *ec_seq)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict_or_learn_adf(cbify &data, multi_learner &base, example &ec)
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores, uint32_t prediction)
void learn(E &ec, size_t i=0)
void(* print)(int, float, float, v_array< char >)
multi_learner * as_multiline(learner< T, E > *l)
const char * to_string(prediction_type_t prediction_type)