Vowpal Wabbit
Classes | Functions
active_cover.cc File Reference
#include <cmath>
#include <errno.h>
#include <memory>
#include "reductions.h"
#include "rand48.h"
#include "float.h"
#include "vw.h"

Go to the source code of this file.

Classes

struct  active_cover
 

Functions

float sign (float w)
 
bool dis_test (vw &all, example &ec, single_learner &base, float, float threshold)
 
float get_threshold (float sum_loss, float t, float c0, float alpha)
 
float get_pmin (float sum_loss, float t)
 
float query_decision (active_cover &a, single_learner &l, example &ec, float prediction, float pmin, bool in_dis)
 
template<bool is_learn>
void predict_or_learn_active_cover (active_cover &a, single_learner &base, example &ec)
 
base_learneractive_cover_setup (options_i &options, vw &all)
 

Function Documentation

◆ active_cover_setup()

base_learner* active_cover_setup ( options_i options,
vw all 
)

Definition at line 230 of file active_cover.cc.

References add(), VW::config::options_i::add_and_parse(), LEARNER::as_singleline(), f, vw::get_random_state(), LEARNER::init_learner(), LEARNER::make_base(), VW::config::make_option(), setup_base(), THROW, and VW::config::options_i::was_supplied().

Referenced by parse_reductions().

231 {
232  auto data = scoped_calloc_or_throw<active_cover>();
233  option_group_definition new_options("Active Learning with Cover");
234 
235  bool active_cover_option = false;
236  new_options.add(make_option("active_cover", active_cover_option).keep().help("enable active learning with cover"))
237  .add(make_option("mellowness", data->active_c0)
238  .default_value(8.f)
239  .help("active learning mellowness parameter c_0. Default 8."))
240  .add(make_option("alpha", data->alpha)
241  .default_value(1.f)
242  .help("active learning variance upper bound parameter alpha. Default 1."))
243  .add(make_option("beta_scale", data->beta_scale)
244  .default_value(sqrtf(10.f))
245  .help("active learning variance upper bound parameter beta_scale. Default std::sqrt(10)."))
246  .add(make_option("cover", data->cover_size).keep().default_value(12).help("cover size. Default 12."))
247  .add(make_option("oracular", data->oracular).help("Use Oracular-CAL style query or not. Default false."));
248  options.add_and_parse(new_options);
249 
250  if (!active_cover_option)
251  return nullptr;
252 
253  data->all = &all;
254  data->_random_state = all.get_random_state();
255  data->beta_scale *= data->beta_scale;
256 
257  if (data->oracular)
258  data->cover_size = 0;
259 
260  if (options.was_supplied("lda"))
261  THROW("error: you can't combine lda and active learning");
262 
263  if (options.was_supplied("active"))
264  THROW("error: you can't use --active_cover and --active at the same time");
265 
266  auto base = as_singleline(setup_base(options, all));
267 
268  data->lambda_n = new float[data->cover_size];
269  data->lambda_d = new float[data->cover_size];
270 
271  for (size_t i = 0; i < data->cover_size; i++)
272  {
273  data->lambda_n[i] = 0.f;
274  data->lambda_d[i] = 1.f / 8.f;
275  }
276 
277  // Create new learner
279  data, base, predict_or_learn_active_cover<true>, predict_or_learn_active_cover<false>, data->cover_size + 1);
280 
281  return make_base(l);
282 }
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
virtual void add_and_parse(const option_group_definition &group)=0
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
single_learner * as_singleline(learner< T, E > *l)
Definition: learner.h:476
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
virtual bool was_supplied(const std::string &key)=0
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
#define THROW(args)
Definition: vw_exception.h:181
float f
Definition: cache.cc:40

◆ dis_test()

bool dis_test ( vw all,
example ec,
single_learner base,
float  ,
float  threshold 
)

Definition at line 43 of file active_cover.cc.

References example::confidence, example::pred, polyprediction::scalar, vw::sd, LEARNER::learner< T, E >::sensitivity(), shared_data::t, and example::weight.

Referenced by predict_or_learn_active_cover().

44 {
45  if (all.sd->t + ec.weight <= 3)
46  {
47  return true;
48  }
49 
50  // Get loss difference
51  float middle = 0.f;
52  ec.confidence = fabsf(ec.pred.scalar - middle) / base.sensitivity(ec);
53 
54  float k = (float)all.sd->t;
55  float loss_delta = ec.confidence / k;
56 
57  bool result = (loss_delta <= threshold);
58 
59  return result;
60 }
float scalar
Definition: example.h:45
float confidence
Definition: example.h:72
shared_data * sd
Definition: global_data.h:375
float sensitivity(example &ec, size_t i=0)
Definition: learner.h:242
polyprediction pred
Definition: example.h:60
float weight
Definition: example.h:62

◆ get_pmin()

float get_pmin ( float  sum_loss,
float  t 
)

Definition at line 76 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover().

77 {
78  // t = ec.example_t - 1
79  if (t <= 2.f)
80  {
81  return 1.f;
82  }
83 
84  float avg_loss = sum_loss / t;
85  float pmin = fmin(1.f / (std::sqrt(t * avg_loss) + log(t)), 0.5f);
86  return pmin; // treating n*eps_n = 1
87 }
float f
Definition: cache.cc:40

◆ get_threshold()

float get_threshold ( float  sum_loss,
float  t,
float  c0,
float  alpha 
)

Definition at line 62 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover().

63 {
64  if (t < 3.f)
65  {
66  return 1.f;
67  }
68  else
69  {
70  float avg_loss = sum_loss / t;
71  float threshold = std::sqrt(c0 * avg_loss / t) + fmax(2.f * alpha, 4.f) * c0 * log(t) / t;
72  return threshold;
73  }
74 }
float f
Definition: cache.cc:40

◆ predict_or_learn_active_cover()

template<bool is_learn>
void predict_or_learn_active_cover ( active_cover a,
single_learner base,
example ec 
)

Definition at line 132 of file active_cover.cc.

References active_cover::active_c0, active_cover::all, active_cover::alpha, active_cover::beta_scale, active_cover::cover_size, dis_test(), f, get_pmin(), get_threshold(), example::l, label_data::label, active_cover::lambda_d, active_cover::lambda_n, LEARNER::learner< T, E >::learn(), example::pred, LEARNER::learner< T, E >::predict(), shared_data::queries, query_decision(), polyprediction::scalar, vw::sd, sign(), polylabel::simple, shared_data::sum_loss, shared_data::t, and example::weight.

133 {
134  base.predict(ec, 0);
135 
136  if (is_learn)
137  {
138  vw& all = *a.all;
139 
140  float prediction = ec.pred.scalar;
141  float t = (float)a.all->sd->t;
142  float ec_input_weight = ec.weight;
143  float ec_input_label = ec.l.simple.label;
144 
145  // Compute threshold defining allowed set A
146  float threshold = get_threshold((float)all.sd->sum_loss, t, a.active_c0, a.alpha);
147  bool in_dis = dis_test(all, ec, base, prediction, threshold);
148  float pmin = get_pmin((float)all.sd->sum_loss, t);
149  float importance = query_decision(a, base, ec, prediction, pmin, in_dis);
150 
151  // Query (or not)
152  if (!in_dis) // Use predicted label
153  {
154  ec.l.simple.label = sign(prediction);
155  ec.weight = ec_input_weight;
156  base.learn(ec, 0);
157  }
158  else if (importance > 0) // Use importance-weighted example
159  {
160  all.sd->queries += 1;
161  ec.weight = ec_input_weight * importance;
162  ec.l.simple.label = ec_input_label;
163  base.learn(ec, 0);
164  }
165  else // skipped example
166  {
167  // Make sure the loss computation does not include
168  // skipped examples
169  ec.l.simple.label = FLT_MAX;
170  ec.weight = 0;
171  }
172 
173  // Update the learners in the cover and their weights
174  float q2 = 4.f * pmin * pmin;
175  float p, s, cost, cost_delta = 0;
176  float ec_output_label = ec.l.simple.label;
177  float ec_output_weight = ec.weight;
178  float r = 2.f * threshold * t * a.alpha / a.active_c0 / a.beta_scale;
179 
180  // Set up costs
181  // cost = cost of predicting erm's prediction
182  // cost_delta = cost - cost of predicting the opposite label
183  if (in_dis)
184  {
185  cost = r * (fmax(importance, 0.f)) * ((float)(sign(prediction) != sign(ec_input_label)));
186  }
187  else
188  {
189  cost = 0.f;
190  cost_delta = -r;
191  }
192 
193  for (size_t i = 0; i < a.cover_size; i++)
194  {
195  // Update cost
196  if (in_dis)
197  {
198  p = std::sqrt(q2) / (1.f + std::sqrt(q2));
199  s = 2.f * a.alpha * a.alpha - 1.f / p;
200  cost_delta = 2.f * cost - r * (fmax(importance, 0.f)) - s;
201  }
202 
203  // Choose min-cost label as the label
204  // Set importance weight to be the cost difference
205  ec.l.simple.label = -1.f * sign(cost_delta) * sign(prediction);
206  ec.weight = ec_input_weight * fabs(cost_delta);
207 
208  // Update learner
209  base.learn(ec, i + 1);
210  base.predict(ec, i + 1);
211 
212  // Update numerator of lambda
213  a.lambda_n[i] += 2.f * ((float)(sign(ec.pred.scalar) != sign(prediction))) * cost_delta;
214  a.lambda_n[i] = fmax(a.lambda_n[i], 0.f);
215 
216  // Update denominator of lambda
217  a.lambda_d[i] += ((float)(sign(ec.pred.scalar) != sign(prediction) && in_dis)) / (float)pow(q2, 1.5);
218 
219  // Accumulating weights of learners in the cover
220  q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
221  }
222 
223  // Restoring the weight, the label, and the prediction
224  ec.weight = ec_output_weight;
225  ec.l.simple.label = ec_output_label;
226  ec.pred.scalar = prediction;
227  }
228 }
double sum_loss
Definition: global_data.h:145
void predict(E &ec, size_t i=0)
Definition: learner.h:169
float scalar
Definition: example.h:45
size_t cover_size
Definition: active_cover.cc:27
float label
Definition: simple_label.h:14
label_data simple
Definition: example.h:28
float * lambda_n
Definition: active_cover.cc:29
shared_data * sd
Definition: global_data.h:375
bool dis_test(vw &all, example &ec, single_learner &base, float, float threshold)
Definition: active_cover.cc:43
float query_decision(active_cover &a, single_learner &l, example &ec, float prediction, float pmin, bool in_dis)
Definition: active_cover.cc:89
float get_pmin(float sum_loss, float t)
Definition: active_cover.cc:76
polylabel l
Definition: example.h:57
float beta_scale
Definition: active_cover.cc:25
float get_threshold(float sum_loss, float t, float c0, float alpha)
Definition: active_cover.cc:62
polyprediction pred
Definition: example.h:60
float * lambda_d
Definition: active_cover.cc:30
void learn(E &ec, size_t i=0)
Definition: learner.h:160
float weight
Definition: example.h:62
size_t queries
Definition: global_data.h:135
float f
Definition: cache.cc:40
float sign(float w)
Definition: active_cover.cc:12

◆ query_decision()

float query_decision ( active_cover a,
single_learner l,
example ec,
float  prediction,
float  pmin,
bool  in_dis 
)

Definition at line 89 of file active_cover.cc.

References active_cover::_random_state, active_cover::all, active_cover::cover_size, active_cover::lambda_d, active_cover::lambda_n, active_cover::oracular, example::pred, LEARNER::learner< T, E >::predict(), polyprediction::scalar, vw::sd, sign(), shared_data::t, and example::weight.

Referenced by predict_or_learn_active_cover().

90 {
91  if (a.all->sd->t + ec.weight <= 3)
92  {
93  return 1.f;
94  }
95 
96  if (!in_dis)
97  {
98  return -1.f;
99  }
100 
101  if (a.oracular)
102  {
103  return 1.f;
104  }
105 
106  float p, q2 = 4.f * pmin * pmin;
107 
108  for (size_t i = 0; i < a.cover_size; i++)
109  {
110  l.predict(ec, i + 1);
111  q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
112  }
113 
114  p = std::sqrt(q2) / (1 + std::sqrt(q2));
115 
116  if (std::isnan(p))
117  {
118  p = 1.f;
119  }
120 
121  if (a._random_state->get_and_update_random() <= p)
122  {
123  return 1.f / p;
124  }
125  else
126  {
127  return -1.f;
128  }
129 }
void predict(E &ec, size_t i=0)
Definition: learner.h:169
std::shared_ptr< rand_state > _random_state
Definition: active_cover.cc:33
float scalar
Definition: example.h:45
size_t cover_size
Definition: active_cover.cc:27
float * lambda_n
Definition: active_cover.cc:29
shared_data * sd
Definition: global_data.h:375
polyprediction pred
Definition: example.h:60
float * lambda_d
Definition: active_cover.cc:30
float weight
Definition: example.h:62
float sign(float w)
Definition: active_cover.cc:12

◆ sign()

float sign ( float  w)
inline

Definition at line 12 of file active_cover.cc.

References f.

Referenced by predict_or_learn_active_cover(), and query_decision().

13 {
14  if (w <= 0.f)
15  return -1.f;
16  else
17  return 1.f;
18 }
float f
Definition: cache.cc:40