Vowpal Wabbit
cs_active.cc
Go to the documentation of this file.
1 #include <cmath>
2 #include <errno.h>
3 #include "reductions.h"
4 #include "v_hashmap.h"
5 #include "rand48.h"
6 #include "float.h"
7 #include "vw.h"
8 #include "vw_exception.h"
9 #include "csoaa.h"
10 //#define B_SEARCH_MAX_ITER 50
11 #define B_SEARCH_MAX_ITER 20
12 
13 using namespace LEARNER;
14 using namespace COST_SENSITIVE;
15 using namespace VW::config;
16 
17 using std::cerr;
18 using std::endl;
19 
20 struct lq_data
21 {
22  // The following are used by cost-sensitive active learning
23  float max_pred; // The max cost for this label predicted by the current set of good regressors
24  float min_pred; // The min cost for this label predicted by the current set of good regressors
25  bool is_range_large; // Indicator of whether this label's cost range was large
26  bool is_range_overlapped; // Indicator of whether this label's cost range overlaps with the cost range that has the
27  // minimnum max_pred
28  bool query_needed; // Used in reduction mode: tell upper-layer whether a query is needed for this label
30 };
31 
32 struct cs_active
33 {
34  // active learning algorithm parameters
35  float c0; // mellowness controlling the width of the set of good functions
36  float c1; // multiplier on the threshold for the cost range test
37  float cost_max; // max cost
38  float cost_min; // min cost
39 
40  uint32_t num_classes;
41  size_t t;
42 
43  size_t min_labels;
44  size_t max_labels;
45 
49 
50  vw* all; // statistics, loss
52 
54 
55  size_t num_any_queries; // examples where at least one label is queried
60  float range;
61 
62  ~cs_active() { examples_by_queries.delete_v(); }
63 };
64 
65 float binarySearch(float fhat, float delta, float sens, float tol)
66 {
67  float maxw = std::min(fhat / sens, FLT_MAX);
68 
69  if (maxw * fhat * fhat <= delta)
70  return maxw;
71 
72  float l = 0, u = maxw, w, v;
73 
74  for (int iter = 0; iter < B_SEARCH_MAX_ITER; iter++)
75  {
76  w = (u + l) / 2.f;
77  v = w * (fhat * fhat - (fhat - sens * w) * (fhat - sens * w)) - delta;
78  if (v > 0)
79  u = w;
80  else
81  l = w;
82  if (fabs(v) <= tol || u - l <= tol)
83  break;
84  }
85 
86  return l;
87 }
88 
89 template <bool is_learn, bool is_simulation>
90 inline void inner_loop(cs_active& cs_a, single_learner& base, example& ec, uint32_t i, float cost, uint32_t& prediction,
91  float& score, float& partial_prediction, bool query_this_label, bool& query_needed)
92 {
93  base.predict(ec, i - 1);
94  // cerr << "base.predict ==> partial_prediction=" << ec.partial_prediction << endl;
95  if (is_learn)
96  {
97  vw& all = *cs_a.all;
98  ec.l.simple.weight = 1.;
99  ec.weight = 1.;
100  if (is_simulation)
101  {
102  // In simulation mode
103  if (query_this_label)
104  {
105  ec.l.simple.label = cost;
106  all.sd->queries += 1;
107  }
108  else
109  ec.l.simple.label = FLT_MAX;
110  }
111  else
112  {
113  // In reduction mode.
114  // If the cost of this label was previously queried, then it should be available for learning now.
115  // If the cost of this label was not queried, then skip it.
116  if (query_needed)
117  {
118  ec.l.simple.label = cost;
119  if ((cost < cs_a.cost_min) || (cost > cs_a.cost_max))
120  cerr << "warning: cost " << cost << " outside of cost range [" << cs_a.cost_min << ", " << cs_a.cost_max
121  << "]!" << endl;
122  }
123  else
124  ec.l.simple.label = FLT_MAX;
125  }
126 
127  if (ec.l.simple.label != FLT_MAX)
128  base.learn(ec, i - 1);
129  }
130  else if (!is_simulation)
131  // Prediction in reduction mode could be used by upper layer to ask whether this label needs to be queried.
132  // So we return that.
133  query_needed = query_this_label;
134 
135  partial_prediction = ec.partial_prediction;
136  if (ec.partial_prediction < score || (ec.partial_prediction == score && i < prediction))
137  {
138  score = ec.partial_prediction;
139  prediction = i;
140  }
142 }
143 
144 inline void find_cost_range(cs_active& cs_a, single_learner& base, example& ec, uint32_t i, float delta, float eta,
145  float& min_pred, float& max_pred, bool& is_range_large)
146 {
147  float tol = 1e-6f;
148 
149  base.predict(ec, i - 1);
150  float sens = base.sensitivity(ec, i - 1);
151 
152  if (cs_a.t <= 1 || std::isnan(sens) || std::isinf(sens))
153  {
154  min_pred = cs_a.cost_min;
155  max_pred = cs_a.cost_max;
156  is_range_large = true;
157  if (cs_a.print_debug_stuff)
158  cerr << " find_cost_rangeA: i=" << i << " pp=" << ec.partial_prediction << " sens=" << sens << " eta=" << eta
159  << " [" << min_pred << ", " << max_pred << "] = " << (max_pred - min_pred) << endl;
160  }
161  else
162  {
163  // finding max_pred and min_pred by binary search
164  max_pred =
165  std::min(ec.pred.scalar + sens * binarySearch(cs_a.cost_max - ec.pred.scalar, delta, sens, tol), cs_a.cost_max);
166  min_pred =
167  std::max(ec.pred.scalar - sens * binarySearch(ec.pred.scalar - cs_a.cost_min, delta, sens, tol), cs_a.cost_min);
168  is_range_large = (max_pred - min_pred > eta);
169  if (cs_a.print_debug_stuff)
170  cerr << " find_cost_rangeB: i=" << i << " pp=" << ec.partial_prediction << " sens=" << sens << " eta=" << eta
171  << " [" << min_pred << ", " << max_pred << "] = " << (max_pred - min_pred) << endl;
172  }
173 }
174 
175 template <bool is_learn, bool is_simulation>
177 {
178  // cerr << "------------- passthrough" << endl;
179  COST_SENSITIVE::label ld = ec.l.cs;
180 
181  // cerr << "is_learn=" << is_learn << " ld.costs.size()=" << ld.costs.size() << endl;
182  if (cs_a.all->sd->queries >= cs_a.min_labels * cs_a.num_classes)
183  {
184  // save regressor
185  std::stringstream filename;
186  filename << cs_a.all->final_regressor_name << "." << ec.example_counter << "." << cs_a.all->sd->queries << "."
187  << cs_a.num_any_queries;
188  VW::save_predictor(*(cs_a.all), filename.str());
189  cerr << endl << "Number of examples with at least one query = " << cs_a.num_any_queries;
190  // Double label query budget
191  cs_a.min_labels *= 2;
192  for (size_t i = 0; i < cs_a.examples_by_queries.size(); i++)
193  {
194  cerr << endl << "examples with " << i << " labels queried = " << cs_a.examples_by_queries[i];
195  }
196 
197  cerr << endl << "labels outside of cost range = " << cs_a.labels_outside_range;
198  cerr << endl << "average distance to range = " << cs_a.distance_to_range / ((float)cs_a.labels_outside_range);
199  cerr << endl << "average range = " << cs_a.range / ((float)cs_a.labels_outside_range);
200 
201  /*
202  for (size_t i=0; i<cs_a.all->sd->distance_to_range.size(); i++)
203  { cerr << endl << "label " << i << ", average distance to range = " <<
204  cs_a.all->sd->distance_to_range[i]/((float)(cs_a.t-1));
205  }*/
206 
207  cerr << endl << endl;
208  }
209 
210  if (cs_a.all->sd->queries >= cs_a.max_labels * cs_a.num_classes)
211  return;
212 
213  uint32_t prediction = 1;
214  float score = FLT_MAX;
215  ec.l.simple = {0., 0., 0.};
216 
217  float min_max_cost = FLT_MAX;
218  float t = (float)cs_a.t; // ec.example_t; // current round
219  float t_prev = t - 1.f; // ec.weight; // last round
220 
221  float eta = cs_a.c1 * (cs_a.cost_max - cs_a.cost_min) / std::sqrt(t); // threshold on cost range
222  float delta = cs_a.c0 * log((float)(cs_a.num_classes * std::max(t_prev, 1.f))) *
223  pow(cs_a.cost_max - cs_a.cost_min, 2); // threshold on empirical loss difference
224 
225  if (ld.costs.size() > 0)
226  {
227  // Create metadata structure
228  for (COST_SENSITIVE::wclass& cl : ld.costs)
229  {
230  lq_data f = {0.0, 0.0, 0, 0, 0, cl};
231  cs_a.query_data.push_back(f);
232  }
233  uint32_t n_overlapped = 0;
234  for (lq_data& lqd : cs_a.query_data)
235  {
236  find_cost_range(cs_a, base, ec, lqd.cl.class_index, delta, eta, lqd.min_pred, lqd.max_pred, lqd.is_range_large);
237  min_max_cost = std::min(min_max_cost, lqd.max_pred);
238  }
239  for (lq_data& lqd : cs_a.query_data)
240  {
241  lqd.is_range_overlapped = (lqd.min_pred <= min_max_cost);
242  n_overlapped += (uint32_t)(lqd.is_range_overlapped);
243  // large_range = large_range || (cl.is_range_overlapped && cl.is_range_large);
244  // if(cl.is_range_overlapped && is_learn)
245  //{ std::cout << "label " << cl.class_index << ", min_pred = " << cl.min_pred << ", max_pred = " << cl.max_pred <<
246  //",
247  // is_range_large = " << cl.is_range_large << ", eta = " << eta << ", min_max_cost = " << min_max_cost << endl;
248  //}
249  cs_a.overlapped_and_range_small += (size_t)(lqd.is_range_overlapped && !lqd.is_range_large);
250  if (lqd.cl.x > lqd.max_pred || lqd.cl.x < lqd.min_pred)
251  {
252  cs_a.labels_outside_range++;
253  // cs_a.all->sd->distance_to_range[cl.class_index-1] += std::max(cl.x - cl.max_pred, cl.min_pred - cl.x);
254  cs_a.distance_to_range += std::max(lqd.cl.x - lqd.max_pred, lqd.min_pred - lqd.cl.x);
255  cs_a.range += lqd.max_pred - lqd.min_pred;
256  }
257  }
258 
259  bool query = (n_overlapped > 1);
260  size_t queries = cs_a.all->sd->queries;
261  // bool any_query = false;
262  for (lq_data& lqd : cs_a.query_data)
263  {
264  bool query_label = ((query && cs_a.is_baseline) || (!cs_a.use_domination && lqd.is_range_large) ||
265  (query && lqd.is_range_overlapped && lqd.is_range_large));
266  inner_loop<is_learn, is_simulation>(cs_a, base, ec, lqd.cl.class_index, lqd.cl.x, prediction, score,
267  lqd.cl.partial_prediction, query_label, lqd.query_needed);
268  if (lqd.query_needed)
269  ec.pred.multilabels.label_v.push_back(lqd.cl.class_index);
270  if (cs_a.print_debug_stuff)
271  cerr << "label=" << lqd.cl.class_index << " x=" << lqd.cl.x << " prediction=" << prediction
272  << " score=" << score << " pp=" << lqd.cl.partial_prediction << " ql=" << query_label
273  << " qn=" << lqd.query_needed << " ro=" << lqd.is_range_overlapped << " rl=" << lqd.is_range_large << " ["
274  << lqd.min_pred << ", " << lqd.max_pred << "] vs delta=" << delta << " n_overlapped=" << n_overlapped
275  << " is_baseline=" << cs_a.is_baseline << endl;
276  }
277 
278  // Need to pop metadata
279  cs_a.query_data.delete_v();
280 
281  if (cs_a.all->sd->queries - queries > 0)
282  cs_a.num_any_queries++;
283 
284  cs_a.examples_by_queries[cs_a.all->sd->queries - queries] += 1;
285  // if(any_query)
286  // cs_a.num_any_queries++;
287 
288  ec.partial_prediction = score;
289  if (is_learn)
290  {
291  cs_a.t++;
292  }
293  }
294  else
295  {
296  float temp = 0.f;
297  bool temp2 = false, temp3 = false;
298  for (uint32_t i = 1; i <= cs_a.num_classes; i++)
299  {
300  inner_loop<false, is_simulation>(cs_a, base, ec, i, FLT_MAX, prediction, score, temp, temp2, temp3);
301  }
302  }
303 
304  ec.pred.multiclass = prediction;
305  ec.l.cs = ld;
306 }
307 
308 void finish_example(vw& all, cs_active& cs_a, example& ec) { CSOAA::finish_example(all, *(CSOAA::csoaa*)&cs_a, ec); }
309 
311 {
312  auto data = scoped_calloc_or_throw<cs_active>();
313 
314  bool simulation = false;
315  int domination;
316  option_group_definition new_options("Cost-sensitive Active Learning");
317  new_options
318  .add(make_option("cs_active", data->num_classes).keep().help("Cost-sensitive active learning with <k> costs"))
319  .add(make_option("simulation", simulation).help("cost-sensitive active learning simulation mode"))
320  .add(make_option("baseline", data->is_baseline).help("cost-sensitive active learning baseline"))
321  .add(make_option("domination", domination)
322  .default_value(1)
323  .help("cost-sensitive active learning use domination. Default 1"))
324  .add(make_option("mellowness", data->c0).default_value(0.1f).help("mellowness parameter c_0. Default 0.1."))
325  .add(make_option("range_c", data->c1)
326  .default_value(0.5f)
327  .help("parameter controlling the threshold for per-label cost uncertainty. Default 0.5."))
328  .add(make_option("max_labels", data->max_labels).default_value(-1).help("maximum number of label queries."))
329  .add(make_option("min_labels", data->min_labels).default_value(-1).help("minimum number of label queries."))
330  .add(make_option("cost_max", data->cost_max).default_value(1.f).help("cost upper bound. Default 1."))
331  .add(make_option("cost_min", data->cost_min).default_value(0.f).help("cost lower bound. Default 0."))
332  // TODO replace with trace and quiet
333  .add(make_option("csa_debug", data->print_debug_stuff).help("print debug stuff for cs_active"));
334  options.add_and_parse(new_options);
335 
336  data->use_domination = true;
337  if (options.was_supplied("domination") && !domination)
338  data->use_domination = false;
339 
340  if (!options.was_supplied("cs_active"))
341  return nullptr;
342 
343  data->all = &all;
344  data->t = 1;
345 
346  auto loss_function_type = all.loss->getType();
347  if (loss_function_type != "squared")
348  THROW("error: you can't use non-squared loss with cs_active");
349 
350  if (options.was_supplied("lda"))
351  THROW("error: you can't combine lda and active learning");
352 
353  if (options.was_supplied("active"))
354  THROW("error: you can't use --cs_active and --active at the same time");
355 
356  if (options.was_supplied("active_cover"))
357  THROW("error: you can't use --cs_active and --active_cover at the same time");
358 
359  if (options.was_supplied("csoaa"))
360  THROW("error: you can't use --cs_active and --csoaa at the same time");
361 
362  if (!options.was_supplied("adax"))
363  all.trace_message << "WARNING: --cs_active should be used with --adax" << endl;
364 
365  all.p->lp = cs_label; // assigning the label parser
366  all.set_minmax(all.sd, data->cost_max);
367  all.set_minmax(all.sd, data->cost_min);
368  for (uint32_t i = 0; i < data->num_classes + 1; i++) data->examples_by_queries.push_back(0);
369 
370  learner<cs_active, example>& l = simulation
371  ? init_learner(data, as_singleline(setup_base(options, all)), predict_or_learn<true, true>,
372  predict_or_learn<false, true>, data->num_classes, prediction_type::multilabels)
373  : init_learner(data, as_singleline(setup_base(options, all)), predict_or_learn<true, false>,
374  predict_or_learn<false, false>, data->num_classes, prediction_type::multilabels);
375 
377  base_learner* b = make_base(l);
378  all.cost_sensitive = b;
379  return b;
380 }
uint32_t multiclass
Definition: example.h:49
size_t example_counter
Definition: example.h:64
loss_function * loss
Definition: global_data.h:523
bool is_range_large
Definition: cs_active.cc:25
void predict(E &ec, size_t i=0)
Definition: learner.h:169
LEARNER::base_learner * cost_sensitive
Definition: global_data.h:385
void find_cost_range(cs_active &cs_a, single_learner &base, example &ec, uint32_t i, float delta, float eta, float &min_pred, float &max_pred, bool &is_range_large)
Definition: cs_active.cc:144
LEARNER::base_learner * l
Definition: cs_active.cc:51
float scalar
Definition: example.h:45
label_parser cs_label
float min_pred
Definition: cs_active.cc:24
bool print_debug_stuff
Definition: cs_active.cc:46
size_t overlapped_and_range_small
Definition: cs_active.cc:56
~cs_active()
Definition: cs_active.cc:62
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
float partial_prediction
Definition: example.h:68
float weight
Definition: simple_label.h:15
virtual void add_and_parse(const option_group_definition &group)=0
float label
Definition: simple_label.h:14
void save_predictor(vw &all, std::string reg_name)
label_data simple
Definition: example.h:28
void finish_example(vw &all, csoaa &, example &ec)
Definition: csoaa.cc:115
#define add_passthrough_feature(ec, i, x)
Definition: example.h:119
float range
Definition: cs_active.cc:60
float cost_max
Definition: cs_active.cc:37
vw * all
Definition: cs_active.cc:50
size_t size() const
Definition: v_array.h:68
float cost_min
Definition: cs_active.cc:38
float distance_to_range
Definition: cs_active.cc:59
float c1
Definition: cs_active.cc:36
uint32_t num_classes
Definition: cs_active.cc:40
parser * p
Definition: global_data.h:377
single_learner * as_singleline(learner< T, E > *l)
Definition: learner.h:476
void(* set_minmax)(shared_data *sd, float label)
Definition: global_data.h:394
void set_finish_example(void(*f)(vw &all, T &, E &))
Definition: learner.h:307
float binarySearch(float fhat, float delta, float sens, float tol)
Definition: cs_active.cc:65
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void push_back(const T &new_ele)
Definition: v_array.h:107
COST_SENSITIVE::label cs
Definition: example.h:30
shared_data * sd
Definition: global_data.h:375
v_array< lq_data > query_data
Definition: cs_active.cc:53
bool is_range_overlapped
Definition: cs_active.cc:26
vw_ostream trace_message
Definition: global_data.h:424
void finish_example(vw &all, cs_active &cs_a, example &ec)
Definition: cs_active.cc:308
virtual bool was_supplied(const std::string &key)=0
virtual std::string getType()=0
bool query_needed
Definition: cs_active.cc:28
base_learner * cs_active_setup(options_i &options, vw &all)
Definition: cs_active.cc:310
size_t num_any_queries
Definition: cs_active.cc:55
#define B_SEARCH_MAX_ITER
Definition: cs_active.cc:11
void inner_loop(cs_active &cs_a, single_learner &base, example &ec, uint32_t i, float cost, uint32_t &prediction, float &score, float &partial_prediction, bool query_this_label, bool &query_needed)
Definition: cs_active.cc:90
float sensitivity(example &ec, size_t i=0)
Definition: learner.h:242
option_group_definition & add(T &&op)
Definition: options.h:90
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
size_t min_labels
Definition: cs_active.cc:43
polylabel l
Definition: example.h:57
void predict_or_learn(cs_active &cs_a, single_learner &base, example &ec)
Definition: cs_active.cc:176
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
size_t t
Definition: cs_active.cc:41
bool is_baseline
Definition: cs_active.cc:47
float max_pred
Definition: cs_active.cc:23
v_array< size_t > examples_by_queries
Definition: cs_active.cc:57
size_t labels_outside_range
Definition: cs_active.cc:58
COST_SENSITIVE::wclass & cl
Definition: cs_active.cc:29
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
bool use_domination
Definition: cs_active.cc:48
polyprediction pred
Definition: example.h:60
void delete_v()
Definition: v_array.h:98
void learn(E &ec, size_t i=0)
Definition: learner.h:160
std::string final_regressor_name
Definition: global_data.h:535
float c0
Definition: cs_active.cc:35
v_array< wclass > costs
float weight
Definition: example.h:62
#define THROW(args)
Definition: vw_exception.h:181
size_t queries
Definition: global_data.h:135
float f
Definition: cache.cc:40
label_parser lp
Definition: parser.h:102
size_t max_labels
Definition: cs_active.cc:44