cpp/8.8.1/explore__eval_8cc_source.html

 #include "reductions.h"
 #include "cb_algs.h"
 #include "vw.h"
 #include "cb_adf.h"
 #include "rand48.h"
 #include "gen_cs_example.h"
 #include <memory>

 // Do evaluation of nonstationary policies.
 // input = contextual bandit label
 // output = chosen ranking

 using namespace LEARNER;
 using namespace CB_ALGS;
 using namespace VW::config;

 namespace EXPLORE_EVAL
 {
 struct explore_eval
 {
   CB::cb_class known_cost;
   vw* all;
   std::shared_ptr<rand_state> _random_state;
   uint64_t offset;
   CB::label action_label;
   CB::label empty_label;
   size_t example_counter;

   size_t update_count;
   size_t violations;
   float multiplier;

   bool fixed_multiplier;
 };

 void finish(explore_eval& data)
 {
   if (!data.all->quiet)
   {
     data.all->trace_message << "update count = " << data.update_count << std::endl;
     if (data.violations > 0)
       data.all->trace_message << "violation count = " << data.violations << std::endl;
     if (!data.fixed_multiplier)
       data.all->trace_message << "final multiplier = " << data.multiplier << std::endl;
   }
 }

 // Semantics: Currently we compute the IPS loss no matter what flags
 // are specified. We print the first action and probability, based on
 // ordering by scores in the final output.

 void output_example(vw& all, explore_eval& c, example& ec, multi_ex* ec_seq)
 {
   if (example_is_newline_not_header(ec))
     return;

   size_t num_features = 0;

   float loss = 0.;
   ACTION_SCORE::action_scores preds = (*ec_seq)[0]->pred.a_s;

   for (size_t i = 0; i < (*ec_seq).size(); i++)
     if (!CB::ec_is_example_header(*(*ec_seq)[i]))
       num_features += (*ec_seq)[i]->num_features;

   bool labeled_example = true;
   if (c.known_cost.probability > 0)
   {
     for (uint32_t i = 0; i < preds.size(); i++)
     {
       float l = get_cost_estimate(&c.known_cost, preds[i].action);
       loss += l * preds[i].score;
     }
   }
   else
     labeled_example = false;

   bool holdout_example = labeled_example;
   for (size_t i = 0; i < ec_seq->size(); i++) holdout_example &= (*ec_seq)[i]->test_only;

   all.sd->update(holdout_example, labeled_example, loss, ec.weight, num_features);

   for (int sink : all.final_prediction_sink) print_action_score(sink, ec.pred.a_s, ec.tag);

   if (all.raw_prediction > 0)
   {
     std::string outputString;
     std::stringstream outputStringStream(outputString);
     v_array<CB::cb_class> costs = ec.l.cb.costs;

     for (size_t i = 0; i < costs.size(); i++)
     {
       if (i > 0)
         outputStringStream << ' ';
       outputStringStream << costs[i].action << ':' << costs[i].partial_prediction;
     }
     all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
   }

   CB::print_update(all, !labeled_example, ec, ec_seq, true);
 }

 void output_example_seq(vw& all, explore_eval& data, multi_ex& ec_seq)
 {
   if (ec_seq.size() > 0)
   {
     output_example(all, data, **(ec_seq.begin()), &(ec_seq));
     if (all.raw_prediction > 0)
       all.print_text(all.raw_prediction, "", ec_seq[0]->tag);
   }
 }

 void finish_multiline_example(vw& all, explore_eval& data, multi_ex& ec_seq)
 {
   if (ec_seq.size() > 0)
   {
     output_example_seq(all, data, ec_seq);
     CB_ADF::global_print_newline(all.final_prediction_sink);
   }
   VW::finish_example(all, ec_seq);
 }

 template <bool is_learn>
 void do_actual_learning(explore_eval& data, multi_learner& base, multi_ex& ec_seq)
 {
   example* label_example = CB_ADF::test_adf_sequence(ec_seq);

   if (label_example != nullptr)  // extract label
   {
     data.action_label = label_example->l.cb;
     label_example->l.cb = data.empty_label;
   }
   multiline_learn_or_predict<false>(base, ec_seq, data.offset);

   if (label_example != nullptr)  // restore label
     label_example->l.cb = data.action_label;

   data.known_cost = CB_ADF::get_observed_cost(ec_seq);
   if (label_example != nullptr && is_learn)
   {
     ACTION_SCORE::action_scores& a_s = ec_seq[0]->pred.a_s;

     float action_probability = 0;
     for (size_t i = 0; i < a_s.size(); i++)
       if (data.known_cost.action == a_s[i].action)
         action_probability = a_s[i].score;

     float threshold = action_probability / data.known_cost.probability;

     if (!data.fixed_multiplier)
       data.multiplier = std::min(data.multiplier, 1 / threshold);
     else
       threshold *= data.multiplier;

     if (threshold > 1. + 1e-6)
       data.violations++;

     if (data._random_state->get_and_update_random() < threshold)
     {
       example* ec_found = nullptr;
       for (example*& ec : ec_seq)
       {
         if (ec->l.cb.costs.size() == 1 && ec->l.cb.costs[0].cost != FLT_MAX && ec->l.cb.costs[0].probability > 0)
           ec_found = ec;
         if (threshold > 1)
           ec->weight *= threshold;
       }
       ec_found->l.cb.costs[0].probability = action_probability;

       multiline_learn_or_predict<true>(base, ec_seq, data.offset);

       if (threshold > 1)
       {
         float inv_threshold = 1.f / threshold;
         for (auto& ec : ec_seq) ec->weight *= inv_threshold;
       }
       ec_found->l.cb.costs[0].probability = data.known_cost.probability;
       data.update_count++;
     }
   }
 }
 }  // namespace EXPLORE_EVAL

 using namespace EXPLORE_EVAL;

 base_learner* explore_eval_setup(options_i& options, vw& all)
 {
   auto data = scoped_calloc_or_throw<explore_eval>();
   bool explore_eval_option = false;
   option_group_definition new_options("Explore evaluation");
   new_options.add(make_option("explore_eval", explore_eval_option).keep().help("Evaluate explore_eval adf policies"))
       .add(make_option("multiplier", data->multiplier)
                .help("Multiplier used to make all rejection sample probabilities <= 1"));
   options.add_and_parse(new_options);

   if (!explore_eval_option)
     return nullptr;

   data->all = &all;
   data->_random_state = all.get_random_state();

   if (options.was_supplied("multiplier"))
     data->fixed_multiplier = true;
   else
     data->multiplier = 1;

   if (!options.was_supplied("cb_explore_adf"))
     options.insert("cb_explore_adf", "");

   all.delete_prediction = nullptr;

   multi_learner* base = as_multiline(setup_base(options, all));
   all.p->lp = CB::cb_label;
   all.label_type = label_type::cb;

   learner<explore_eval, multi_ex>& l =
       init_learner(data, base, do_actual_learning<true>, do_actual_learning<false>, 1, prediction_type::action_probs);

   l.set_finish_example(finish_multiline_example);
   l.set_finish(finish);
   return make_base(l);
 }
example::tag
v_array< char > tag
Definition: example.h:63

EXPLORE_EVAL::explore_eval::all
vw * all
Definition: explore_eval.cc:22

vw::raw_prediction
int raw_prediction
Definition: global_data.h:519

polyprediction::a_s
ACTION_SCORE::action_scores a_s
Definition: example.h:47

CB::ec_is_example_header
bool ec_is_example_header(example const &ec)
Definition: cb.cc:170

explore_eval_setup
base_learner * explore_eval_setup(options_i &options, vw &all)
Definition: explore_eval.cc:186

EXPLORE_EVAL::explore_eval::fixed_multiplier
bool fixed_multiplier
Definition: explore_eval.cc:33

vw::delete_prediction
void(* delete_prediction)(void *)
Definition: global_data.h:485

output_example_seq
void output_example_seq(vw &all, multi_ex &ec_seq)
Definition: cbify.cc:356

finish_multiline_example
void finish_multiline_example(vw &all, cbify &, multi_ex &ec_seq)
Definition: cbify.cc:373

polylabel::cb
CB::label cb
Definition: example.h:31

EXPLORE_EVAL::explore_eval::empty_label
CB::label empty_label
Definition: explore_eval.cc:26

EXPLORE_EVAL::output_example
void output_example(vw &all, explore_eval &c, example &ec, multi_ex *ec_seq)
Definition: explore_eval.cc:52

vw::label_type
label_type::label_type_t label_type
Definition: global_data.h:550

EXPLORE_EVAL::explore_eval::offset
uint64_t offset
Definition: explore_eval.cc:24

vw::final_prediction_sink
v_array< int > final_prediction_sink
Definition: global_data.h:518

CB::label::costs
v_array< cb_class > costs
Definition: cb.h:27

LEARNER::make_base
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462

CB_ADF::test_adf_sequence
example * test_adf_sequence(multi_ex &ec_seq)
Definition: cb_adf.cc:268

VW::config::option_group_definition
Definition: options.h:85

vw::quiet
bool quiet
Definition: global_data.h:487

EXPLORE_EVAL::explore_eval::update_count
size_t update_count
Definition: explore_eval.cc:29

VW::config::options_i::add_and_parse
virtual void add_and_parse(const option_group_definition &group)=0

loss
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60

CB_ALGS::get_cost_estimate
float get_cost_estimate(CB::cb_class *observation, uint32_t action, float offset=0.)
Definition: cb_algs.h:58

EXPLORE_EVAL::explore_eval::example_counter
size_t example_counter
Definition: explore_eval.cc:27

LEARNER::learner
Definition: cb_explore.h:11

v_array::size
size_t size() const
Definition: v_array.h:68

EXPLORE_EVAL::explore_eval::_random_state
std::shared_ptr< rand_state > _random_state
Definition: explore_eval.cc:23

vw
Definition: global_data.h:369

CB_ADF::get_observed_cost
CB::cb_class get_observed_cost(multi_ex &examples)
Definition: cb_adf.cc:99

vw::p
parser * p
Definition: global_data.h:377

EXPLORE_EVAL::explore_eval
Definition: explore_eval.cc:19

vw::get_random_state
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553

CB::cb_class
Definition: cb.h:15

CB::print_update
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores)
Definition: cb.cc:180

LEARNER::learner::set_finish_example
void set_finish_example(void(*f)(vw &all, T &, E &))
Definition: learner.h:307

CB::cb_class::action
uint32_t action
Definition: cb.h:18

EXPLORE_EVAL
Definition: explore_eval.cc:17

LEARNER::init_learner
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369

vw::sd
shared_data * sd
Definition: global_data.h:375

CB::cb_class::probability
float probability
Definition: cb.h:19

EXPLORE_EVAL::explore_eval::violations
size_t violations
Definition: explore_eval.cc:30

vw::trace_message
vw_ostream trace_message
Definition: global_data.h:424

VW::config::options_i::was_supplied
virtual bool was_supplied(const std::string &key)=0

CB_ADF::global_print_newline
void global_print_newline(const v_array< int > &final_prediction_sink)
Definition: cb_adf.cc:342

CB_ALGS
Definition: cb_algs.cc:19

VW::config::options_i
Definition: options.h:107

rand48.h

example
Definition: example.h:54

vw::print_text
void(* print_text)(int, std::string, v_array< char >)
Definition: global_data.h:522

EXPLORE_EVAL::explore_eval::action_label
CB::label action_label
Definition: explore_eval.cc:25

VW::finish_example
void finish_example(vw &, example &)
Definition: parser.cc:881

reductions.h

VW::config::options_i::insert
virtual void insert(const std::string &key, const std::string &value)=0

shared_data::update
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190

VW::config::option_group_definition::add
option_group_definition & add(T &&op)
Definition: options.h:90

cb_adf.h

multi_ex
std::vector< example * > multi_ex
Definition: example.h:122

CB::cb_label
label_parser cb_label
Definition: cb.cc:167

VW::config
Definition: options.h:11

example::l
polylabel l
Definition: example.h:57

EXPLORE_EVAL::do_actual_learning
void do_actual_learning(explore_eval &data, multi_learner &base, multi_ex &ec_seq)
Definition: explore_eval.cc:124

VW::config::make_option
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80

LEARNER::example_is_newline_not_header
bool example_is_newline_not_header(example &ec, vw &all)
Definition: learner.cc:68

prediction_type::action_probs
Definition: learner.h:23

CB::label
Definition: cb.h:25

LEARNER::learner::set_finish
void set_finish(void(*f)(T &))
Definition: learner.h:265

vw.h

setup_base
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222

label_type::cb
Definition: global_data.h:346

EXPLORE_EVAL::finish
void finish(explore_eval &data)
Definition: explore_eval.cc:36

example::pred
polyprediction pred
Definition: example.h:60

EXPLORE_EVAL::explore_eval::known_cost
CB::cb_class known_cost
Definition: explore_eval.cc:21

LEARNER
Definition: cb_explore.h:8

cb_algs.h

EXPLORE_EVAL::explore_eval::multiplier
float multiplier
Definition: explore_eval.cc:31

ACTION_SCORE::print_action_score
void print_action_score(int f, v_array< action_score > &a_s, v_array< char > &tag)
Definition: action_score.cc:8

example::weight
float weight
Definition: example.h:62

v_array< action_score >

gen_cs_example.h

c
constexpr uint64_t c
Definition: rand48.cc:12

LEARNER::as_multiline
multi_learner * as_multiline(learner< T, E > *l)
Definition: learner.h:468

parser::lp
label_parser lp
Definition: parser.h:102