23 #define SUPERVISED_WS 1 31 #define ABS_CENTRAL_ZEROONE 2 32 #define MINIMAX_CENTRAL 3 33 #define MINIMAX_CENTRAL_ZEROONE 4 87 for (
size_t a = 0;
a < num_actions; ++
a)
94 for (
size_t a = 0;
a < num_actions; ++
a)
96 ecs[
a]->pred.a_s.delete_v();
102 for (
size_t i = 0; i < ws_vali.size(); ++i)
113 float loss(
warm_cb& data, uint32_t label, uint32_t final_prediction)
115 if (label != final_prediction)
124 for (
auto wc : costs)
126 if (wc.class_index == final_prediction)
141 for (uint32_t i = 0; i < arr.size(); i++)
143 if (arr[i] < min_val)
158 std::cerr <<
"average variance estimate = " << data.
cumu_var / data.
inter_iter << std::endl;
159 std::cerr <<
"theoretical average variance = " << data.
num_actions / data.
epsilon << std::endl;
160 std::cerr <<
"last lambda chosen = " << data.
lambdas[argmin] <<
" among lambdas ranging from " << data.
lambdas[0]
172 auto& eca = *data.
ecs[
a];
174 auto& lab = eca.l.cb;
185 idx = ((((idx >> ss) * 28904713) + 4832917 * (uint64_t)
a) << ss) & mask;
192 eca.tag.push_back(
'n');
205 std::vector<float>& lambdas = data.
lambdas;
206 for (uint32_t i = 0; i < data.
choices_lambda; i++) lambdas.push_back(0.f);
211 for (uint32_t i = 0; i < data.
choices_lambda; i++) lambdas[i] = 1.0;
218 for (uint32_t i = 0; i < data.
choices_lambda; i++) lambdas[i] = 0.0;
229 for (uint32_t i = mid; i > 0; i--) lambdas[i - 1] = lambdas[i] / 2.0
f;
231 for (uint32_t i = mid + 1; i < data.
choices_lambda; i++) lambdas[i] = 1.
f - (1.
f - lambdas[i - 1]) / 2.0f;
255 uint32_t cor_type =
UAR;
265 if (randf < cor_prob)
289 float weight_multiplier;
292 float total_train_size = ws_train_size + inter_train_size;
293 float total_weight = (1 - data.
lambdas[i]) * ws_train_size + data.
lambdas[i] * inter_train_size;
296 weight_multiplier = (1 - data.
lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
298 weight_multiplier = data.
lambdas[i] * total_train_size / (total_weight + FLT_MIN);
300 return weight_multiplier;
307 return data.
ecs[0]->pred.a_s[0].action + 1;
323 template <
bool use_cs>
334 data.
ws_vali.push_back(ec_copy);
343 template <
bool use_cs>
348 auto& csls = data.
csls;
349 auto& cbls = data.
cbls;
352 csls[
a].
costs[0].class_index =
a + 1;
360 cbls[
a] = data.
ecs[
a]->l.cb;
361 data.
ecs[
a]->l.cs = csls[
a];
364 std::vector<float> old_weights;
365 for (
size_t a = 0;
a < data.
num_actions; ++
a) old_weights.push_back(data.
ecs[
a]->weight);
370 for (
size_t a = 0;
a < data.
num_actions; ++
a) data.
ecs[
a]->weight = old_weights[
a] * weight_multiplier;
380 template <
bool use_cs>
386 learn_sup_adf<use_cs>(data, ec, ec_type);
388 ec.pred.multiclass =
action;
398 auto& out_ec = *data.
ecs[0];
399 uint32_t chosen_action;
402 THROW(
"Failed to sample from pdf");
405 copy_array<action_score>(a_s, out_ec.pred.a_s);
407 return chosen_action;
416 auto& lab = data.
ecs[cl.action - 1]->l.cb;
417 lab.costs.push_back(cl);
419 std::vector<float> old_weights;
420 for (
size_t a = 0;
a < data.
num_actions; ++
a) old_weights.push_back(data.
ecs[
a]->weight);
425 for (
size_t a = 0;
a < data.
num_actions; ++
a) data.
ecs[
a]->weight = old_weights[
a] * weight_multiplier;
432 template <
bool use_cs>
439 cl.action = a_s[chosen_action].action + 1;
440 cl.probability = a_s[chosen_action].score;
443 THROW(
"No action with non-zero probability found!");
462 float temp_var = 0.f;
465 if (pred_best_approx == data.
a_s_adf[
a].action + 1)
466 temp_var = 1.0f / data.
a_s_adf[
a].score;
471 template <
bool is_learn,
bool use_cs>
488 predict_or_learn_sup_adf<use_cs>(data, base, ec,
WARM_START);
490 predict_or_learn_bandit_adf<use_cs>(data, base, ec,
WARM_START);
498 predict_or_learn_bandit_adf<use_cs>(data, base, ec,
INTERACTION);
500 data.a_s_adf.clear();
524 data.
ecs.resize(num_actions);
525 for (
size_t a = 0;
a < num_actions; ++
a)
528 auto& lab = data.
ecs[
a]->l.cb;
533 data.
csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
534 for (uint32_t
a = 0;
a < num_actions; ++
a)
539 data.
cbls = calloc_or_throw<CB::label>(num_actions);
554 uint32_t num_actions = 0;
555 auto data = scoped_calloc_or_throw<warm_cb>();
563 .help(
"Convert multiclass on <k> classes into a contextual bandit problem"))
565 .help(
"consume cost-sensitive classification examples instead of multiclass"))
566 .
add(
make_option(
"loss0", data->loss0).default_value(0.f).help(
"loss for correct label"))
567 .
add(
make_option(
"loss1", data->loss1).default_value(1.
f).help(
"loss for incorrect label"))
570 .help(
"number of training examples for warm start phase"))
571 .
add(
make_option(
"epsilon", data->epsilon).keep().help(
"epsilon-greedy exploration"))
573 .default_value(UINT32_MAX)
574 .help(
"number of examples for the interactive contextual bandit learning phase"))
575 .
add(
make_option(
"warm_start_update", data->upd_ws).help(
"indicator of warm start updates"))
576 .
add(
make_option(
"interaction_update", data->upd_inter).help(
"indicator of interaction updates"))
579 .help(
"type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: " 580 "replacing with overwriting label)"))
583 .help(
"probability of label corruption in the warm start phase"))
586 .help(
"the number of candidate lambdas to aggregate (lambda is the importance weight parameter between " 590 .help(
"The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min " 591 "lambda=0, max lambda=1, 3: center lambda=epsilon/(1+epsilon), 4: center " 592 "lambda=epsilon/(1+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are " 593 "generated using a doubling scheme"))
596 .help(
"the label used by type 3 corruptions (overwriting)"))
598 .help(
"simulate contextual bandit updates on warm start examples"));
604 THROW(
"label corruption on cost-sensitive examples not currently supported");
613 data->a_s = v_init<action_score>();
616 data->use_cs = use_cs;
625 std::stringstream ss;
626 ss << std::max(std::abs(data->loss0), std::abs(data->loss1)) / (data->loss1 - data->loss0);
627 options.
insert(
"lr_multiplier", ss.str());
639 std::cerr <<
"Warning: no epsilon (greedy parameter) specified; resetting to 0.05" << std::endl;
640 data->epsilon = 0.05f;
645 data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, all.
p, data->choices_lambda);
648 data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, all.
p, data->choices_lambda);
void copy_label(void *dst, void *src)
bool example_is_newline_not_header(example const &ec)
void predict_or_learn_sup_adf(warm_cb &data, multi_learner &base, example &ec, int ec_type)
std::vector< float > cumulative_costs
uint32_t predict_sublearner_adf(warm_cb &data, multi_learner &base, example &ec, uint32_t i)
void predict(E &ec, size_t i=0)
LEARNER::base_learner * cost_sensitive
void(* delete_prediction)(void *)
void learn_sup_adf(warm_cb &data, example &ec, int ec_type)
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
void(* delete_label)(void *)
void copy_example_data(bool audit, example *dst, example *src)
void predict_or_learn_bandit_adf(warm_cb &data, multi_learner &base, example &ec, int ec_type)
void(* default_label)(void *)
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
bool(* test_label)(void *)
#define ABS_CENTRAL_ZEROONE
learner< T, E > & init_cost_sensitive_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), parser *p, size_t ws, prediction_type::prediction_type_t pred_type=prediction_type::multiclass)
the core definition of a set of features.
v_array< cb_class > costs
void accumu_costs_iv_adf(warm_cb &data, multi_learner &base, example &ec)
void learn_bandit_adf(warm_cb &data, multi_learner &base, example &ec, int ec_type)
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
base_learner * make_base(learner< T, E > &base)
void delete_label(void *v)
void finish(warm_cb &data)
void setup_lambdas(warm_cb &data)
virtual void add_and_parse(const option_group_definition &group)=0
void predict_or_learn_adf(warm_cb &data, multi_learner &base, example &ec)
std::vector< float > lambdas
float minimax_lambda(float epsilon)
std::shared_ptr< rand_state > _random_state
#define MINIMAX_CENTRAL_ZEROONE
score_iterator begin_scores(action_scores &a_s)
example * alloc_examples(size_t, size_t count=1)
std::shared_ptr< rand_state > get_random_state()
score_iterator end_scores(action_scores &a_s)
MULTICLASS::label_t multi
uint32_t predict_sup_adf(warm_cb &data, multi_learner &base, example &ec)
float compute_weight_multiplier(warm_cb &data, size_t i, int ec_type)
uint32_t predict_bandit_adf(warm_cb &data, multi_learner &base, example &ec)
virtual bool was_supplied(const std::string &key)=0
void add_to_vali(warm_cb &data, example &ec)
std::vector< example * > ws_vali
COST_SENSITIVE::label * csls
void copy_example_to_adf(warm_cb &data, example &ec)
virtual void insert(const std::string &key, const std::string &value)=0
base_learner * warm_cb_setup(options_i &options, vw &all)
void accumu_var_adf(warm_cb &data, multi_learner &base, example &ec)
option_group_definition & add(T &&op)
int add(svm_params ¶ms, svm_example *fec)
std::vector< example * > multi_ex
uint32_t find_min(std::vector< T > arr)
typed_option< T > make_option(std::string name, T &location)
float loss(warm_cb &data, uint32_t label, uint32_t final_prediction)
void set_finish(void(*f)(T &))
COST_SENSITIVE::label cs_label
MULTICLASS::label_t mc_label
learner< T, E > & init_multiclass_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), parser *p, size_t ws, prediction_type::prediction_type_t pred_type=prediction_type::multiclass)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
float loss_cs(warm_cb &data, v_array< COST_SENSITIVE::wclass > &costs, uint32_t final_prediction)
void learn(E &ec, size_t i=0)
bool ind_update(warm_cb &data, int ec_type)
uint32_t corrupt_action(warm_cb &data, uint32_t action, int ec_type)
void init_adf_data(warm_cb &data, const uint32_t num_actions)
multi_learner * as_multiline(learner< T, E > *l)
const char * to_string(prediction_type_t prediction_type)
uint32_t generate_uar_action(warm_cb &data)