#include <cmath>
#include <errno.h>
#include <memory>
#include "reductions.h"
#include "rand48.h"
#include "float.h"
#include "vw.h"

Classes
struct	active_cover

Functions
float	sign (float w)

bool	dis_test (vw &all, example &ec, single_learner &base, float, float threshold)

float	get_threshold (float sum_loss, float t, float c0, float alpha)

float	get_pmin (float sum_loss, float t)

float	query_decision (active_cover &a, single_learner &l, example &ec, float prediction, float pmin, bool in_dis)

template<bool is_learn>
void	predict_or_learn_active_cover (active_cover &a, single_learner &base, example &ec)

base_learner *	active_cover_setup (options_i &options, vw &all)

Function Documentation

◆ active_cover_setup()

base_learner* active_cover_setup	(	options_i &	options,
		vw &	all
	)

Definition at line 230 of file active_cover.cc.

References add(), VW::config::options_i::add_and_parse(), LEARNER::as_singleline(), f, vw::get_random_state(), LEARNER::init_learner(), LEARNER::make_base(), VW::config::make_option(), setup_base(), THROW, and VW::config::options_i::was_supplied().

Referenced by parse_reductions().

 {
   auto data = scoped_calloc_or_throw<active_cover>();
   option_group_definition new_options("Active Learning with Cover");
 
   bool active_cover_option = false;
   new_options.add(make_option("active_cover", active_cover_option).keep().help("enable active learning with cover"))
       .add(make_option("mellowness", data->active_c0)
                .default_value(8.f)
                .help("active learning mellowness parameter c_0. Default 8."))
       .add(make_option("alpha", data->alpha)
                .default_value(1.f)
                .help("active learning variance upper bound parameter alpha. Default 1."))
       .add(make_option("beta_scale", data->beta_scale)
                .default_value(sqrtf(10.f))
                .help("active learning variance upper bound parameter beta_scale. Default std::sqrt(10)."))
       .add(make_option("cover", data->cover_size).keep().default_value(12).help("cover size. Default 12."))
       .add(make_option("oracular", data->oracular).help("Use Oracular-CAL style query or not. Default false."));
   options.add_and_parse(new_options);
 
   if (!active_cover_option)
     return nullptr;
 
   data->all = &all;
   data->_random_state = all.get_random_state();
   data->beta_scale *= data->beta_scale;
 
   if (data->oracular)
     data->cover_size = 0;
 
   if (options.was_supplied("lda"))
     THROW("error: you can't combine lda and active learning");
 
   if (options.was_supplied("active"))
     THROW("error: you can't use --active_cover and --active at the same time");
 
   auto base = as_singleline(setup_base(options, all));
 
   data->lambda_n = new float[data->cover_size];
   data->lambda_d = new float[data->cover_size];
 
   for (size_t i = 0; i < data->cover_size; i++)
   {
     data->lambda_n[i] = 0.f;
     data->lambda_d[i] = 1.f / 8.f;
   }
 
   // Create new learner
   learner<active_cover, example>& l = init_learner(
       data, base, predict_or_learn_active_cover<true>, predict_or_learn_active_cover<false>, data->cover_size + 1);
 
   return make_base(l);
 }

◆ dis_test()

bool dis_test	(	vw &	all,
		example &	ec,
		single_learner &	base,
		float	,
		float	threshold
	)

Definition at line 43 of file active_cover.cc.

References example::confidence, example::pred, polyprediction::scalar, vw::sd, LEARNER::learner< T, E >::sensitivity(), shared_data::t, and example::weight.

Referenced by predict_or_learn_active_cover().

 {
   if (all.sd->t + ec.weight <= 3)
   {
     return true;
   }
 
   // Get loss difference
   float middle = 0.f;
   ec.confidence = fabsf(ec.pred.scalar - middle) / base.sensitivity(ec);
 
   float k = (float)all.sd->t;
   float loss_delta = ec.confidence / k;
 
   bool result = (loss_delta <= threshold);
 
   return result;
 }

◆ get_pmin()

float get_pmin	(	float	sum_loss,
		float	t
	)

Definition at line 76 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover().

 {
   // t = ec.example_t - 1
   if (t <= 2.f)
   {
     return 1.f;
   }
 
   float avg_loss = sum_loss / t;
   float pmin = fmin(1.f / (std::sqrt(t * avg_loss) + log(t)), 0.5f);
   return pmin;  // treating n*eps_n = 1
 }

◆ get_threshold()

float get_threshold	(	float	sum_loss,
		float	t,
		float	c0,
		float	alpha
	)

Definition at line 62 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover().

 {
   if (t < 3.f)
   {
     return 1.f;
   }
   else
   {
     float avg_loss = sum_loss / t;
     float threshold = std::sqrt(c0 * avg_loss / t) + fmax(2.f * alpha, 4.f) * c0 * log(t) / t;
     return threshold;
   }
 }

◆ predict_or_learn_active_cover()

template<bool is_learn>

void predict_or_learn_active_cover	(	active_cover &	a,
		single_learner &	base,
		example &	ec
	)

Definition at line 132 of file active_cover.cc.

References active_cover::active_c0, active_cover::all, active_cover::alpha, active_cover::beta_scale, active_cover::cover_size, dis_test(), f, get_pmin(), get_threshold(), example::l, label_data::label, active_cover::lambda_d, active_cover::lambda_n, LEARNER::learner< T, E >::learn(), example::pred, LEARNER::learner< T, E >::predict(), shared_data::queries, query_decision(), polyprediction::scalar, vw::sd, sign(), polylabel::simple, shared_data::sum_loss, shared_data::t, and example::weight.

 {
   base.predict(ec, 0);
 
   if (is_learn)
   {
     vw& all = *a.all;
 
     float prediction = ec.pred.scalar;
     float t = (float)a.all->sd->t;
     float ec_input_weight = ec.weight;
     float ec_input_label = ec.l.simple.label;
 
     // Compute threshold defining allowed set A
     float threshold = get_threshold((float)all.sd->sum_loss, t, a.active_c0, a.alpha);
     bool in_dis = dis_test(all, ec, base, prediction, threshold);
     float pmin = get_pmin((float)all.sd->sum_loss, t);
     float importance = query_decision(a, base, ec, prediction, pmin, in_dis);
 
     // Query (or not)
     if (!in_dis)  // Use predicted label
     {
       ec.l.simple.label = sign(prediction);
       ec.weight = ec_input_weight;
       base.learn(ec, 0);
     }
     else if (importance > 0)  // Use importance-weighted example
     {
       all.sd->queries += 1;
       ec.weight = ec_input_weight * importance;
       ec.l.simple.label = ec_input_label;
       base.learn(ec, 0);
     }
     else  // skipped example
     {
       // Make sure the loss computation does not include
       // skipped examples
       ec.l.simple.label = FLT_MAX;
       ec.weight = 0;
     }
 
     // Update the learners in the cover and their weights
     float q2 = 4.f * pmin * pmin;
     float p, s, cost, cost_delta = 0;
     float ec_output_label = ec.l.simple.label;
     float ec_output_weight = ec.weight;
     float r = 2.f * threshold * t * a.alpha / a.active_c0 / a.beta_scale;
 
     // Set up costs
     // cost = cost of predicting erm's prediction
     // cost_delta = cost - cost of predicting the opposite label
     if (in_dis)
     {
       cost = r * (fmax(importance, 0.f)) * ((float)(sign(prediction) != sign(ec_input_label)));
     }
     else
     {
       cost = 0.f;
       cost_delta = -r;
     }
 
     for (size_t i = 0; i < a.cover_size; i++)
     {
       // Update cost
       if (in_dis)
       {
         p = std::sqrt(q2) / (1.f + std::sqrt(q2));
         s = 2.f * a.alpha * a.alpha - 1.f / p;
         cost_delta = 2.f * cost - r * (fmax(importance, 0.f)) - s;
       }
 
       // Choose min-cost label as the label
       // Set importance weight to be the cost difference
       ec.l.simple.label = -1.f * sign(cost_delta) * sign(prediction);
       ec.weight = ec_input_weight * fabs(cost_delta);
 
       // Update learner
       base.learn(ec, i + 1);
       base.predict(ec, i + 1);
 
       // Update numerator of lambda
       a.lambda_n[i] += 2.f * ((float)(sign(ec.pred.scalar) != sign(prediction))) * cost_delta;
       a.lambda_n[i] = fmax(a.lambda_n[i], 0.f);
 
       // Update denominator of lambda
       a.lambda_d[i] += ((float)(sign(ec.pred.scalar) != sign(prediction) && in_dis)) / (float)pow(q2, 1.5);
 
       // Accumulating weights of learners in the cover
       q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
     }
 
     // Restoring the weight, the label, and the prediction
     ec.weight = ec_output_weight;
     ec.l.simple.label = ec_output_label;
     ec.pred.scalar = prediction;
   }
 }

◆ query_decision()

float query_decision	(	active_cover &	a,
		single_learner &	l,
		example &	ec,
		float	prediction,
		float	pmin,
		bool	in_dis
	)

Definition at line 89 of file active_cover.cc.

References active_cover::_random_state, active_cover::all, active_cover::cover_size, active_cover::lambda_d, active_cover::lambda_n, active_cover::oracular, example::pred, LEARNER::learner< T, E >::predict(), polyprediction::scalar, vw::sd, sign(), shared_data::t, and example::weight.

Referenced by predict_or_learn_active_cover().

 {
   if (a.all->sd->t + ec.weight <= 3)
   {
     return 1.f;
   }
 
   if (!in_dis)
   {
     return -1.f;
   }
 
   if (a.oracular)
   {
     return 1.f;
   }
 
   float p, q2 = 4.f * pmin * pmin;
 
   for (size_t i = 0; i < a.cover_size; i++)
   {
     l.predict(ec, i + 1);
     q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
   }
 
   p = std::sqrt(q2) / (1 + std::sqrt(q2));
 
   if (std::isnan(p))
   {
     p = 1.f;
   }
 
   if (a._random_state->get_and_update_random() <= p)
   {
     return 1.f / p;
   }
   else
   {
     return -1.f;
   }
 }

◆ sign()

float sign ( float w )

inline

Definition at line 12 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover(), and query_decision().

 {
   if (w <= 0.f)
     return -1.f;
   else
     return 1.f;
 }

Classes

Functions

Function Documentation

◆ active_cover_setup()

◆ dis_test()

◆ get_pmin()

◆ get_threshold()

◆ predict_or_learn_active_cover()

◆ query_decision()

◆ sign()