Vowpal Wabbit
active_cover.cc
Go to the documentation of this file.
1 #include <cmath>
2 #include <errno.h>
3 #include <memory>
4 #include "reductions.h"
5 #include "rand48.h"
6 #include "float.h"
7 #include "vw.h"
8 
9 using namespace LEARNER;
10 using namespace VW::config;
11 
12 inline float sign(float w)
13 {
14  if (w <= 0.f)
15  return -1.f;
16  else
17  return 1.f;
18 }
19 
21 {
22  // active learning algorithm parameters
23  float active_c0;
24  float alpha;
25  float beta_scale;
26  bool oracular;
27  size_t cover_size;
28 
29  float* lambda_n;
30  float* lambda_d;
31 
32  vw* all; // statistics, loss
33  std::shared_ptr<rand_state> _random_state;
35 
37  {
38  delete[] lambda_n;
39  delete[] lambda_d;
40  }
41 };
42 
43 bool dis_test(vw& all, example& ec, single_learner& base, float /* prediction */, float threshold)
44 {
45  if (all.sd->t + ec.weight <= 3)
46  {
47  return true;
48  }
49 
50  // Get loss difference
51  float middle = 0.f;
52  ec.confidence = fabsf(ec.pred.scalar - middle) / base.sensitivity(ec);
53 
54  float k = (float)all.sd->t;
55  float loss_delta = ec.confidence / k;
56 
57  bool result = (loss_delta <= threshold);
58 
59  return result;
60 }
61 
62 float get_threshold(float sum_loss, float t, float c0, float alpha)
63 {
64  if (t < 3.f)
65  {
66  return 1.f;
67  }
68  else
69  {
70  float avg_loss = sum_loss / t;
71  float threshold = std::sqrt(c0 * avg_loss / t) + fmax(2.f * alpha, 4.f) * c0 * log(t) / t;
72  return threshold;
73  }
74 }
75 
76 float get_pmin(float sum_loss, float t)
77 {
78  // t = ec.example_t - 1
79  if (t <= 2.f)
80  {
81  return 1.f;
82  }
83 
84  float avg_loss = sum_loss / t;
85  float pmin = fmin(1.f / (std::sqrt(t * avg_loss) + log(t)), 0.5f);
86  return pmin; // treating n*eps_n = 1
87 }
88 
89 float query_decision(active_cover& a, single_learner& l, example& ec, float prediction, float pmin, bool in_dis)
90 {
91  if (a.all->sd->t + ec.weight <= 3)
92  {
93  return 1.f;
94  }
95 
96  if (!in_dis)
97  {
98  return -1.f;
99  }
100 
101  if (a.oracular)
102  {
103  return 1.f;
104  }
105 
106  float p, q2 = 4.f * pmin * pmin;
107 
108  for (size_t i = 0; i < a.cover_size; i++)
109  {
110  l.predict(ec, i + 1);
111  q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
112  }
113 
114  p = std::sqrt(q2) / (1 + std::sqrt(q2));
115 
116  if (std::isnan(p))
117  {
118  p = 1.f;
119  }
120 
121  if (a._random_state->get_and_update_random() <= p)
122  {
123  return 1.f / p;
124  }
125  else
126  {
127  return -1.f;
128  }
129 }
130 
131 template <bool is_learn>
133 {
134  base.predict(ec, 0);
135 
136  if (is_learn)
137  {
138  vw& all = *a.all;
139 
140  float prediction = ec.pred.scalar;
141  float t = (float)a.all->sd->t;
142  float ec_input_weight = ec.weight;
143  float ec_input_label = ec.l.simple.label;
144 
145  // Compute threshold defining allowed set A
146  float threshold = get_threshold((float)all.sd->sum_loss, t, a.active_c0, a.alpha);
147  bool in_dis = dis_test(all, ec, base, prediction, threshold);
148  float pmin = get_pmin((float)all.sd->sum_loss, t);
149  float importance = query_decision(a, base, ec, prediction, pmin, in_dis);
150 
151  // Query (or not)
152  if (!in_dis) // Use predicted label
153  {
154  ec.l.simple.label = sign(prediction);
155  ec.weight = ec_input_weight;
156  base.learn(ec, 0);
157  }
158  else if (importance > 0) // Use importance-weighted example
159  {
160  all.sd->queries += 1;
161  ec.weight = ec_input_weight * importance;
162  ec.l.simple.label = ec_input_label;
163  base.learn(ec, 0);
164  }
165  else // skipped example
166  {
167  // Make sure the loss computation does not include
168  // skipped examples
169  ec.l.simple.label = FLT_MAX;
170  ec.weight = 0;
171  }
172 
173  // Update the learners in the cover and their weights
174  float q2 = 4.f * pmin * pmin;
175  float p, s, cost, cost_delta = 0;
176  float ec_output_label = ec.l.simple.label;
177  float ec_output_weight = ec.weight;
178  float r = 2.f * threshold * t * a.alpha / a.active_c0 / a.beta_scale;
179 
180  // Set up costs
181  // cost = cost of predicting erm's prediction
182  // cost_delta = cost - cost of predicting the opposite label
183  if (in_dis)
184  {
185  cost = r * (fmax(importance, 0.f)) * ((float)(sign(prediction) != sign(ec_input_label)));
186  }
187  else
188  {
189  cost = 0.f;
190  cost_delta = -r;
191  }
192 
193  for (size_t i = 0; i < a.cover_size; i++)
194  {
195  // Update cost
196  if (in_dis)
197  {
198  p = std::sqrt(q2) / (1.f + std::sqrt(q2));
199  s = 2.f * a.alpha * a.alpha - 1.f / p;
200  cost_delta = 2.f * cost - r * (fmax(importance, 0.f)) - s;
201  }
202 
203  // Choose min-cost label as the label
204  // Set importance weight to be the cost difference
205  ec.l.simple.label = -1.f * sign(cost_delta) * sign(prediction);
206  ec.weight = ec_input_weight * fabs(cost_delta);
207 
208  // Update learner
209  base.learn(ec, i + 1);
210  base.predict(ec, i + 1);
211 
212  // Update numerator of lambda
213  a.lambda_n[i] += 2.f * ((float)(sign(ec.pred.scalar) != sign(prediction))) * cost_delta;
214  a.lambda_n[i] = fmax(a.lambda_n[i], 0.f);
215 
216  // Update denominator of lambda
217  a.lambda_d[i] += ((float)(sign(ec.pred.scalar) != sign(prediction) && in_dis)) / (float)pow(q2, 1.5);
218 
219  // Accumulating weights of learners in the cover
220  q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]);
221  }
222 
223  // Restoring the weight, the label, and the prediction
224  ec.weight = ec_output_weight;
225  ec.l.simple.label = ec_output_label;
226  ec.pred.scalar = prediction;
227  }
228 }
229 
231 {
232  auto data = scoped_calloc_or_throw<active_cover>();
233  option_group_definition new_options("Active Learning with Cover");
234 
235  bool active_cover_option = false;
236  new_options.add(make_option("active_cover", active_cover_option).keep().help("enable active learning with cover"))
237  .add(make_option("mellowness", data->active_c0)
238  .default_value(8.f)
239  .help("active learning mellowness parameter c_0. Default 8."))
240  .add(make_option("alpha", data->alpha)
241  .default_value(1.f)
242  .help("active learning variance upper bound parameter alpha. Default 1."))
243  .add(make_option("beta_scale", data->beta_scale)
244  .default_value(sqrtf(10.f))
245  .help("active learning variance upper bound parameter beta_scale. Default std::sqrt(10)."))
246  .add(make_option("cover", data->cover_size).keep().default_value(12).help("cover size. Default 12."))
247  .add(make_option("oracular", data->oracular).help("Use Oracular-CAL style query or not. Default false."));
248  options.add_and_parse(new_options);
249 
250  if (!active_cover_option)
251  return nullptr;
252 
253  data->all = &all;
254  data->_random_state = all.get_random_state();
255  data->beta_scale *= data->beta_scale;
256 
257  if (data->oracular)
258  data->cover_size = 0;
259 
260  if (options.was_supplied("lda"))
261  THROW("error: you can't combine lda and active learning");
262 
263  if (options.was_supplied("active"))
264  THROW("error: you can't use --active_cover and --active at the same time");
265 
266  auto base = as_singleline(setup_base(options, all));
267 
268  data->lambda_n = new float[data->cover_size];
269  data->lambda_d = new float[data->cover_size];
270 
271  for (size_t i = 0; i < data->cover_size; i++)
272  {
273  data->lambda_n[i] = 0.f;
274  data->lambda_d[i] = 1.f / 8.f;
275  }
276 
277  // Create new learner
279  data, base, predict_or_learn_active_cover<true>, predict_or_learn_active_cover<false>, data->cover_size + 1);
280 
281  return make_base(l);
282 }
double sum_loss
Definition: global_data.h:145
void predict(E &ec, size_t i=0)
Definition: learner.h:169
std::shared_ptr< rand_state > _random_state
Definition: active_cover.cc:33
float scalar
Definition: example.h:45
base_learner * active_cover_setup(options_i &options, vw &all)
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
float confidence
Definition: example.h:72
virtual void add_and_parse(const option_group_definition &group)=0
size_t cover_size
Definition: active_cover.cc:27
float label
Definition: simple_label.h:14
label_data simple
Definition: example.h:28
float * lambda_n
Definition: active_cover.cc:29
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
single_learner * as_singleline(learner< T, E > *l)
Definition: learner.h:476
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
shared_data * sd
Definition: global_data.h:375
bool dis_test(vw &all, example &ec, single_learner &base, float, float threshold)
Definition: active_cover.cc:43
virtual bool was_supplied(const std::string &key)=0
LEARNER::base_learner * l
Definition: active_cover.cc:34
void predict_or_learn_active_cover(active_cover &a, single_learner &base, example &ec)
float query_decision(active_cover &a, single_learner &l, example &ec, float prediction, float pmin, bool in_dis)
Definition: active_cover.cc:89
float sensitivity(example &ec, size_t i=0)
Definition: learner.h:242
float get_pmin(float sum_loss, float t)
Definition: active_cover.cc:76
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
polylabel l
Definition: example.h:57
constexpr uint64_t a
Definition: rand48.cc:11
float beta_scale
Definition: active_cover.cc:25
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
float get_threshold(float sum_loss, float t, float c0, float alpha)
Definition: active_cover.cc:62
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
polyprediction pred
Definition: example.h:60
float * lambda_d
Definition: active_cover.cc:30
void learn(E &ec, size_t i=0)
Definition: learner.h:160
float weight
Definition: example.h:62
#define THROW(args)
Definition: vw_exception.h:181
size_t queries
Definition: global_data.h:135
float f
Definition: cache.cc:40
float sign(float w)
Definition: active_cover.cc:12