15 #include <unordered_set> 33 std::vector<example*> actions,
slots;
73 for (
auto ex : examples)
75 switch (ex->l.conditional_contextual_bandit.type)
84 data.
slots.push_back(ex);
87 std::cout <<
"ccb_adf_explore: badly formatted example - invalid example type";
92 data.
stored_labels.push_back({ex->l.conditional_contextual_bandit.type, ex->l.conditional_contextual_bandit.outcome,
93 ex->l.conditional_contextual_bandit.explicit_included_actions, 0.});
99 template <
bool is_learn>
104 std::cerr <<
"ccb_adf_explore: badly formatted example - number of actions " << data.
actions.size()
105 <<
" must be greater than the number of slots " << data.
slots.size();
113 if (
slot->l.conditional_contextual_bandit.outcome !=
nullptr &&
114 slot->l.conditional_contextual_bandit.outcome->probabilities.size() == 0)
116 std::cerr <<
"ccb_adf_explore: badly formatted example - missing label probability";
165 for (
auto& action_score : pred)
167 action_score.action = data.
origin_index[action_score.action];
171 auto original_index_of_chosen_action = pred[0].action;
172 data.
exclude_list[original_index_of_chosen_action] =
true;
192 for (
auto index : slot->
indices)
210 template <
bool audit>
246 template <
bool audit>
261 for (
auto index : slot->
indices)
282 example*
shared, std::vector<example*> actions, std::vector<std::string>& generated_interactions)
284 std::bitset<INTERACTIONS::printable_ns_size> found_namespaces;
286 const auto original_size = generated_interactions.size();
287 for (
size_t i = 0; i < original_size; i++)
289 auto interaction_copy = generated_interactions[i];
291 generated_interactions.push_back(interaction_copy);
294 for (
const auto&
action : actions)
296 for (
const auto& action_index :
action->indices)
302 generated_interactions.push_back({(char)action_index, (
char)
ccb_id_namespace});
307 for (
const auto& shared_index : shared->
indices)
313 generated_interactions.push_back({(char)shared_index, (
char)
ccb_id_namespace});
319 template <
bool is_learn>
327 cb_ex.push_back(data.
shared);
331 if (explicit_includes.size() != 0)
339 for (uint32_t included_action_id : explicit_includes)
349 for (
size_t i = 0; i < data.
actions.size(); i++)
360 cb_ex.push_back(data.
actions[i]);
384 template <
bool is_learn>
392 if (!sanity_checks<is_learn>(data))
402 auto decision_scores = examples[0]->pred.decision_scores;
420 build_cb_example<is_learn>(data.
cb_ex,
slot, data);
423 inject_slot_id<true>(data, data.
shared, slot_id);
425 inject_slot_id<false>(data, data.shared, slot_id);
430 multiline_learn_or_predict<is_learn>(base, data.cb_ex, examples[0]->ft_offset);
437 decision_scores.push_back(data.action_score_pool.get_object());
440 data.shared->interactions = data.original_interactions;
441 for (
auto ex : data.actions)
443 ex->interactions = data.original_interactions;
448 remove_slot_id<true>(data.shared);
450 remove_slot_id<false>(data.shared);
453 std::swap(data.shared->tag, slot->
tag);
461 for (
size_t i = 0; i < examples.size(); i++)
463 examples[i]->l.conditional_contextual_bandit = {
468 examples[0]->pred.decision_scores = decision_scores;
475 std::stringstream ss;
476 for (
auto slot : decision_scores)
478 std::string delimiter =
"";
479 for (
auto action_score :
slot)
481 ss << delimiter << action_score.action <<
':' << action_score.score;
486 ssize_t len = ss.str().size();
489 std::cerr <<
"write error: " << strerror(errno) << std::endl;
497 std::string label_str =
"";
498 std::string delim =
"";
500 for (
auto slot : slots)
504 auto outcome =
slot->l.conditional_contextual_bandit.outcome;
505 if (outcome ==
nullptr)
521 if (counter > 1 && slots.size() > 2)
528 std::ostringstream label_buf;
529 label_buf << std::setw(all.
sd->
col_current_label) << std::right << std::setfill(
' ') << label_str;
531 std::string pred_str =
"";
534 for (
auto slot : decision_scores)
549 std::ostringstream pred_buf;
559 if (ec_seq.size() <= 0)
562 std::vector<example*> slots;
563 size_t num_features = 0;
567 for (
auto ec : ec_seq)
569 num_features += ec->num_features;
578 size_t num_labelled = 0;
579 auto preds = ec_seq[0]->pred.decision_scores;
580 for (
size_t i = 0; i < slots.size(); i++)
582 auto outcome = slots[i]->l.conditional_contextual_bandit.outcome;
583 if (outcome !=
nullptr)
587 outcome->probabilities[TOP_ACTION_INDEX], outcome->cost, preds[i][TOP_ACTION_INDEX].action);
592 if (num_labelled > 0 && num_labelled < slots.size())
594 std::cerr <<
"Warning: Unlabeled example in train set, was this intentional?\n";
597 bool holdout_example = num_labelled > 0;
598 for (
size_t i = 0; i < ec_seq.size(); i++) holdout_example &= ec_seq[i]->test_only;
601 all.
sd->
update(holdout_example, num_labelled > 0, loss, ec_seq[SHARED_EX_INDEX]->
weight, num_features);
611 if (ec_seq.size() > 0)
617 for (
auto& a_s : ec_seq[0]->pred.decision_scores)
621 ec_seq[0]->pred.decision_scores.clear();
628 auto data = scoped_calloc_or_throw<ccb>();
629 bool ccb_explore_adf_option =
false;
632 "EXPERIMENTAL: Conditional Contextual Bandit Exploration with Action Dependent Features");
634 make_option(
"ccb_explore_adf", ccb_explore_adf_option)
636 .help(
"EXPERIMENTAL: Do Conditional Contextual Bandit learning with multiline action dependent features."));
637 new_options.
add(
make_option(
"slate", slate).keep().help(
"EXPERIMENTAL - MAY CHANGE: Enable slate mode in CCB."));
640 if (!ccb_explore_adf_option)
645 options.
insert(
"cb_explore_adf",
"");
651 THROW(
"--slate and --cb_sample cannot be supplied together");
656 options.
insert(
"cb_sample",
"");
668 data->default_cb_label = {FLT_MAX, 0, -1.f, 0.f};
669 data->shared =
nullptr;
674 data->id_namespace_str.append(
"_id");
675 data->id_namespace_hash =
VW::hash_space(all, data->id_namespace_str);
void remove_slot_id(example *shared)
VW::v_array_pool< CB::cb_class > cb_label_pool
void return_v_array(v_array< T > &array, VW::v_array_pool< T > &pool)
v_array< namespace_index > indices
ACTION_SCORE::action_scores a_s
void attach_label_to_example(uint32_t action_index_one_based, example *example, conditional_contextual_bandit_outcome *outcome, ccb &data)
void(* delete_prediction)(void *)
CB::cb_class default_cb_label
void clear_all(ccb &data)
constexpr unsigned char default_namespace
static ssize_t write_file_or_socket(int f, const void *buf, size_t nbytes)
std::shared_ptr< audit_strings > audit_strings_ptr
static constexpr int col_current_label
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
VW::v_array_pool< ACTION_SCORE::action_score > action_score_pool
std::vector< std::string > * interactions
void print_decision_scores(int f, decision_scores_t &decision_scores)
label_type::label_type_t label_type
void del_example_namespace(example &ec, namespace_index ns, features &fs)
v_array< int > final_prediction_sink
constexpr unsigned char ccb_slot_namespace
v_array< cb_class > costs
std::vector< CCB::label > stored_labels
base_learner * make_base(learner< T, E > &base)
std::vector< example * > actions
virtual void add_and_parse(const option_group_definition &group)=0
std::vector< bool > exclude_list
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
constexpr bool is_printable_namespace(const unsigned char ns)
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
label_parser ccb_label_parser
void print_update(vw &all, std::vector< example *> &slots, decision_scores_t &decision_scores, size_t num_features)
void learn_or_predict(cb_sample_data &data, multi_learner &base, multi_ex &examples)
void return_object(T obj)
bool sanity_checks(ccb &data)
ACTION_SCORE::action_scores probabilities
std::vector< example * > slots
std::array< features, NUM_NAMESPACES > feature_space
std::vector< uint32_t > origin_index
static constexpr uint32_t TOP_ACTION_INDEX
void set_finish_example(void(*f)(vw &all, T &, E &))
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void delete_action_scores(void *v)
bool ec_is_example_header(example const &ec)
void push_back(const T &new_ele)
bool split_multi_example_and_stash_labels(const multi_ex &examples, ccb &data)
void delete_cb_labels(ccb &data)
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
bool has_action(multi_ex &cb_ex)
virtual bool was_supplied(const std::string &key)=0
void global_print_newline(const v_array< int > &final_prediction_sink)
void build_cb_example(multi_ex &cb_ex, example *slot, ccb &data)
constexpr unsigned char printable_start
void clear_pred_and_label(ccb &data)
void finish_example(vw &, example &)
virtual void insert(const std::string &key, const std::string &value)=0
void create_cb_labels(ccb &data)
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
static constexpr uint32_t SHARED_EX_INDEX
option_group_definition & add(T &&op)
std::vector< example * > multi_ex
uint64_t id_namespace_hash
size_t base_learner_stride_shift
base_learner * ccb_explore_adf_setup(options_i &options, vw &all)
typed_option< T > make_option(std::string name, T &location)
v_array< uint32_t > explicit_included_actions
void add_example_namespace(example &ec, namespace_index ns, features &fs)
void save_action_scores(ccb &data, decision_scores_t &decision_scores)
CCB::label conditional_contextual_bandit
std::vector< std::string > interactions
void inject_slot_features(example *shared, example *slot)
std::vector< bool > include_list
uint64_t hash_feature(vw &all, const std::string &s, uint64_t u)
uint64_t hash_space(vw &all, const std::string &s)
void inject_slot_id(ccb &data, example *shared, size_t id)
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void output_example(vw &all, ccb &, multi_ex &ec_seq)
static constexpr int col_current_predict
std::vector< uint64_t > slot_id_hashes
constexpr unsigned char constant_namespace
std::vector< std::string > * original_interactions
std::string id_namespace_str
double weighted_examples()
void remove_slot_features(example *shared, example *slot)
conditional_contextual_bandit_outcome * outcome
std::vector< std::string > generated_interactions
multi_learner * as_multiline(learner< T, E > *l)
const char * to_string(prediction_type_t prediction_type)
constexpr unsigned char ccb_id_namespace
void calculate_and_insert_interactions(example *shared, std::vector< example *> actions, std::vector< std::string > &generated_interactions)
std::pair< std::string, std::string > audit_strings