50 template <
bool is_learn>
56 if (is_learn && ec.
l.
cb.
costs[0].probability < 1)
72 probs[chosen].score = 1.0;
78 template <
bool is_learn>
100 template <
bool is_learn>
109 for (
size_t i = 0; i < data.
bag_size; i++)
112 if (is_learn && count > 0)
117 probs[chosen].score +=
prob;
119 for (uint32_t j = 1; j < count; j++) base.
learn(ec, i);
127 float additive_probability = 1.f / (float)data.
cover_size;
136 data.
cs->predict(ec, i);
138 data.
cs->predict(ec, i + 1);
140 probs[pred - 1].score += additive_probability;
145 float min_prob = std::min(1.
f / num_actions, 1.
f / (
float)std::sqrt(data.
counter * num_actions));
152 template <
bool is_learn>
164 for (uint32_t j = 0; j < num_actions; j++) data.
cs_label.
costs.push_back({FLT_MAX, j + 1, 0., 0.});
171 float additive_probability = 1.f / (float)cover_size;
173 float min_prob = std::min(1.
f / num_actions, 1.
f / (
float)std::sqrt(counter * num_actions));
189 float norm = min_prob * num_actions;
193 for (uint32_t i = 0; i < num_actions; i++) probabilities[i] = 0;
197 for (
size_t i = 0; i < cover_size; i++)
200 for (uint32_t j = 0; j < num_actions; j++)
203 data.
cs_label.
costs[j].x - data.
psi * min_prob / (std::max(probabilities[j], min_prob) / norm) + 1;
208 data.
cs->learn(ec, i + 1);
209 if (probabilities[predictions[i] - 1] < min_prob)
210 norm += std::max(0.
f, additive_probability - (min_prob - probabilities[predictions[i] - 1]));
212 norm += additive_probability;
213 probabilities[predictions[i] - 1] += additive_probability;
225 std::stringstream label_string;
227 label_string <<
" unknown";
229 label_string << ec.
l.
cb.
costs[0].action;
247 std::stringstream ss;
252 ss << std::fixed << ec.
pred.
a_s[i].score <<
" ";
253 if (ec.
pred.
a_s[i].score > maxprob)
255 maxprob = ec.
pred.
a_s[i].score;
261 std::stringstream sso;
262 sso << maxid <<
":" << std::fixed << maxprob;
276 auto data = scoped_calloc_or_throw<cb_explore>();
279 .add(
make_option(
"cb_explore", data->cbcs.num_actions)
281 .help(
"Online explore-exploit for a <k> action contextual bandit problem"))
282 .
add(
make_option(
"first", data->tau).keep().help(
"tau-first exploration"))
283 .
add(
make_option(
"epsilon", data->epsilon).keep().default_value(0.05
f).help(
"epsilon-greedy exploration"))
284 .
add(
make_option(
"bag", data->bag_size).keep().help(
"bagging-based exploration"))
285 .
add(
make_option(
"cover", data->cover_size).keep().help(
"Online cover based exploration"))
286 .
add(
make_option(
"psi", data->psi).keep().default_value(1.0
f).help(
"disagreement parameter for cover"));
293 uint32_t num_actions = data->cbcs.num_actions;
297 std::stringstream ss;
298 ss << data->cbcs.num_actions;
299 options.
insert(
"cb", ss.str());
306 data->cbcs.scorer = all.
scorer;
312 data->second_cs_label.costs.resize(num_actions);
313 data->second_cs_label.costs.end() = data->second_cs_label.costs.begin() + num_actions;
314 data->cover_probs = v_init<float>();
315 data->cover_probs.resize(num_actions);
316 data->preds = v_init<uint32_t>();
317 data->preds.resize(data->cover_size);
318 l = &
init_learner(data, base, predict_or_learn_cover<true>, predict_or_learn_cover<false>, data->cover_size + 1,
322 l = &
init_learner(data, base, predict_or_learn_bag<true>, predict_or_learn_bag<false>, data->bag_size,
void resize(size_t length)
ACTION_SCORE::action_scores a_s
COST_SENSITIVE::label pred_scores
void predict(E &ec, size_t i=0)
uint32_t weight_gen(std::shared_ptr< rand_state > &state)
LEARNER::base_learner * cost_sensitive
void(* delete_prediction)(void *)
void(* delete_label)(void *)
COST_SENSITIVE::label second_cs_label
COST_SENSITIVE::label cs_label
void predict_or_learn_greedy(cb_explore &data, single_learner &base, example &ec)
v_array< int > final_prediction_sink
v_array< cb_class > costs
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
void print_update_cb_explore(vw &all, bool is_test, example &ec, std::stringstream &pred_string)
score_iterator begin_scores(action_scores &a_s)
CB::cb_class get_observed_cost(multi_ex &examples)
std::shared_ptr< rand_state > get_random_state()
score_iterator end_scores(action_scores &a_s)
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
single_learner * as_singleline(learner< T, E > *l)
base_learner * cb_explore_setup(options_i &options, vw &all)
CB::cb_class * known_cost
void set_finish_example(void(*f)(vw &all, T &, E &))
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
void push_back(const T &new_ele)
std::shared_ptr< rand_state > _random_state
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
virtual bool was_supplied(const std::string &key)=0
void predict_or_learn_cover(cb_explore &data, single_learner &base, example &ec)
void(* print_text)(int, std::string, v_array< char >)
learner< cb_explore, example > * cs
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
void output_example(vw &all, cb_explore &data, example &ec, CB::label &ld)
v_array< float > cover_probs
void finish_example(vw &, example &)
LEARNER::single_learner * scorer
virtual void insert(const std::string &key, const std::string &value)=0
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
int add(svm_params ¶ms, svm_example *fec)
typed_option< T > make_option(std::string name, T &location)
void get_cover_probabilities(cb_explore &data, single_learner &, example &ec, v_array< action_score > &probs)
v_array< uint32_t > preds
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict_or_learn_first(cb_explore &data, single_learner &base, example &ec)
void learn(E &ec, size_t i=0)
void finish_example(vw &all, cb_explore &c, example &ec)
double weighted_examples()
void predict_or_learn_bag(cb_explore &data, single_learner &base, example &ec)