18 #define B_SEARCH_MAX_ITER 20 22 namespace cb_explore_adf
40 std::vector<ACTION_SCORE::action_scores>
_ex_as;
44 cb_explore_adf_regcb(
bool regcbopt,
float c0,
bool first_only,
float min_cb_cost,
float max_cb_cost);
52 template <
bool is_learn>
56 float binary_search(
float fhat,
float delta,
float sens,
float tol = 1e-6);
60 bool regcbopt,
float c0,
bool first_only,
float min_cb_cost,
float max_cb_cost)
68 const float maxw = (std::min)(fhat / sens, FLT_MAX);
70 if (maxw * fhat * fhat <= delta)
80 v = w * (fhat * fhat - (fhat - sens * w) * (fhat - sens * w)) - delta;
85 if (fabs(v) <= tol || u - l <= tol)
94 const size_t num_actions = examples[0]->pred.a_s.size();
102 for (
const auto& ex : examples)
104 _ex_as.push_back(ex->pred.a_s);
109 for (
const auto& as :
_ex_as[0])
111 examples[as.action]->pred.scalar = as.score;
117 for (
size_t a = 0;
a < num_actions; ++
a)
124 if (ec->
pred.
scalar < cmin || std::isnan(sens) || std::isinf(sens))
138 if (ec->
pred.
scalar > cmax || std::isnan(sens) || std::isinf(sens))
153 for (
size_t i = 0; i < examples.size(); ++i)
155 examples[i]->pred.a_s = _ex_as[i];
160 template <
bool is_learn>
165 for (
size_t i = 0; i < examples.size() - 1; ++i)
168 if (ld.
costs.size() == 1)
169 ld.
costs[0].probability = 1.f;
172 LEARNER::multiline_learn_or_predict<true>(base, examples, examples[0]->ft_offset);
176 LEARNER::multiline_learn_or_predict<false>(base, examples, examples[0]->ft_offset);
179 uint32_t num_actions = (uint32_t)preds.
size();
183 const float delta =
_c0 * log((
float)(num_actions *
_counter)) * pow(max_range, 2);
191 float min_cost = FLT_MAX;
193 for (
size_t a = 0;
a < num_actions; ++
a)
201 for (
size_t i = 0; i < preds.size(); ++i)
211 float min_max_cost = FLT_MAX;
212 for (
size_t a = 0;
a < num_actions; ++
a)
215 for (
size_t i = 0; i < preds.size(); ++i)
232 bool cb_explore_adf_option =
false;
234 const std::string mtr =
"mtr";
235 std::string type_string(mtr);
236 bool regcbopt =
false;
238 bool first_only =
false;
239 float min_cb_cost = 0.;
240 float max_cb_cost = 0.;
245 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
246 .
add(
make_option(
"regcb", regcb).keep().help(
"RegCB-elim exploration"))
247 .
add(
make_option(
"regcbopt", regcbopt).keep().help(
"RegCB optimistic exploration"))
248 .
add(
make_option(
"mellowness", c0).keep().default_value(0.1
f).help(
"RegCB mellowness parameter c_0. Default 0.1"))
249 .
add(
make_option(
"cb_min_cost", min_cb_cost).keep().default_value(0.
f).help(
"lower bound on cost"))
250 .
add(
make_option(
"cb_max_cost", max_cb_cost).keep().default_value(1.
f).help(
"upper bound on cost"))
251 .
add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"))
254 .help(
"contextual bandit method to use in {ips,dr,mtr}. Default: mtr"));
263 options.
insert(
"cb_adf",
"");
265 if (type_string != mtr)
267 all.
trace_message <<
"warning: bad cb_type, RegCB only supports mtr; resetting to mtr." << std::endl;
268 options.
replace(
"cb_type", mtr);
274 size_t problem_multiplier = 1;
281 auto data = scoped_calloc_or_throw<explore_type>(regcbopt, c0, first_only, min_cb_cost, max_cb_cost);
void predict_or_learn_impl(LEARNER::multi_learner &base, multi_ex &examples)
std::vector< float > _max_costs
float binary_search(float fhat, float delta, float sens, float tol=1e-6)
void(* delete_prediction)(void *)
void predict(LEARNER::multi_learner &base, multi_ex &examples)
void get_cost_ranges(float delta, LEARNER::multi_learner &base, multi_ex &examples, bool min_only)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
virtual void replace(const std::string &key, const std::string &value)=0
label_type::label_type_t label_type
std::vector< float > _min_costs
v_array< cb_class > costs
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
score_iterator begin_scores(action_scores &a_s)
score_iterator end_scores(action_scores &a_s)
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
virtual bool was_supplied(const std::string &key)=0
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
std::vector< v_array< CB::cb_class > > _ex_costs
virtual void insert(const std::string &key, const std::string &value)=0
cb_explore_adf_regcb(bool regcbopt, float c0, bool first_only, float min_cb_cost, float max_cb_cost)
float sensitivity(example &ec, size_t i=0)
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
typed_option< T > make_option(std::string name, T &location)
void learn(LEARNER::multi_learner &base, multi_ex &examples)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
#define B_SEARCH_MAX_ITER
void learn(bfgs &b, base_learner &base, example &ec)
std::vector< ACTION_SCORE::action_scores > _ex_as
~cb_explore_adf_regcb()=default
multi_learner * as_multiline(learner< T, E > *l)