117 bool cb_explore_adf_option =
false;
120 bool greedify =
false;
121 bool first_only =
false;
122 config::option_group_definition new_options(
"Contextual Bandit Exploration with Action Dependent Features");
124 .add(
make_option(
"cb_explore_adf", cb_explore_adf_option)
126 .help(
"Online explore-exploit for a contextual bandit problem with multiline action dependent features"))
127 .add(
make_option(
"epsilon", epsilon).keep().help(
"epsilon-greedy exploration"))
128 .add(
make_option(
"bag", bag_size).keep().help(
"bagging-based exploration"))
129 .add(
make_option(
"greedify", greedify).keep().help(
"always update first policy once in bagging"))
130 .add(
make_option(
"first_only", first_only).keep().help(
"Only explore the first action in a tie-breaking event"));
133 if (!cb_explore_adf_option || !options.
was_supplied(
"bag"))
139 options.
insert(
"cb_adf",
"");
144 size_t problem_multiplier = bag_size;
149 using explore_type = cb_explore_adf_base<cb_explore_adf_bag>;
150 auto data = scoped_calloc_or_throw<explore_type>(epsilon, bag_size, greedify, first_only, all.
get_random_state());
void(* delete_prediction)(void *)
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
label_type::label_type_t label_type
base_learner * make_base(learner< T, E > &base)
virtual void add_and_parse(const option_group_definition &group)=0
std::shared_ptr< rand_state > get_random_state()
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
virtual bool was_supplied(const std::string &key)=0
virtual void insert(const std::string &key, const std::string &value)=0
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void predict(bfgs &b, base_learner &, example &ec)
void learn(bfgs &b, base_learner &base, example &ec)
multi_learner * as_multiline(learner< T, E > *l)