Vowpal Wabbit
loss_functions.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include <math.h>
7 #include "correctedMath.h"
8 #include <iostream>
9 #include <stdlib.h>
10 #include <float.h>
11 
12 #include "global_data.h"
13 #include "vw_exception.h"
14 
15 class squaredloss : public loss_function
16 {
17  public:
18  std::string getType() { return "squared"; }
19 
20  float getLoss(shared_data* sd, float prediction, float label)
21  {
22  if (prediction <= sd->max_label && prediction >= sd->min_label)
23  {
24  float example_loss = (prediction - label) * (prediction - label);
25  return example_loss;
26  }
27  else if (prediction < sd->min_label)
28  if (label == sd->min_label)
29  return 0.;
30  else
31  return (float)((label - sd->min_label) * (label - sd->min_label) +
32  2. * (label - sd->min_label) * (sd->min_label - prediction));
33  else if (label == sd->max_label)
34  return 0.;
35  else
36  return float((sd->max_label - label) * (sd->max_label - label) +
37  2. * (sd->max_label - label) * (prediction - sd->max_label));
38  }
39 
40  float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
41  {
42  if (update_scale * pred_per_update < 1e-6)
43  {
44  /* When exp(-eta_t)~= 1 we replace 1-exp(-eta_t)
45  * with its first order Taylor expansion around 0
46  * to avoid catastrophic cancellation.
47  */
48  return 2.f * (label - prediction) * update_scale;
49  }
50  return (label - prediction) * (1.f - correctedExp(-2.f * update_scale * pred_per_update)) / pred_per_update;
51  }
52 
53  float getUnsafeUpdate(float prediction, float label, float update_scale)
54  {
55  return 2.f * (label - prediction) * update_scale;
56  }
57 
58  float getRevertingWeight(shared_data* sd, float prediction, float eta_t)
59  {
60  float t = 0.5f * (sd->min_label + sd->max_label);
61  float alternative = (prediction > t) ? sd->min_label : sd->max_label;
62  return log((alternative - prediction) / (alternative - t)) / eta_t;
63  }
64 
65  float getSquareGrad(float prediction, float label) { return 4.f * (prediction - label) * (prediction - label); }
66  float first_derivative(shared_data* sd, float prediction, float label)
67  {
68  if (prediction < sd->min_label)
69  prediction = sd->min_label;
70  else if (prediction > sd->max_label)
71  prediction = sd->max_label;
72  return 2.f * (prediction - label);
73  }
74  float second_derivative(shared_data* sd, float prediction, float)
75  {
76  if (prediction <= sd->max_label && prediction >= sd->min_label)
77  return 2.;
78  else
79  return 0.;
80  }
81 };
82 
84 {
85  public:
86  std::string getType() { return "classic"; }
87 
88  float getLoss(shared_data*, float prediction, float label)
89  {
90  float example_loss = (prediction - label) * (prediction - label);
91  return example_loss;
92  }
93 
94  float getUpdate(float prediction, float label, float update_scale, float /* pred_per_update */)
95  {
96  return 2.f * (label - prediction) * update_scale;
97  }
98 
99  float getUnsafeUpdate(float prediction, float label, float update_scale)
100  {
101  return 2.f * (label - prediction) * update_scale;
102  }
103 
104  float getRevertingWeight(shared_data* sd, float prediction, float eta_t)
105  {
106  float t = 0.5f * (sd->min_label + sd->max_label);
107  float alternative = (prediction > t) ? sd->min_label : sd->max_label;
108  return (t - prediction) / ((alternative - prediction) * eta_t);
109  }
110 
111  float getSquareGrad(float prediction, float label) { return 4.f * (prediction - label) * (prediction - label); }
112  float first_derivative(shared_data*, float prediction, float label) { return 2.f * (prediction - label); }
113  float second_derivative(shared_data*, float, float) { return 2.; }
114 };
115 
116 class hingeloss : public loss_function
117 {
118  public:
119  std::string getType() { return "hinge"; }
120 
121  float getLoss(shared_data*, float prediction, float label)
122  {
123  if (label != -1.f && label != 1.f)
124  std::cout << "You are using label " << label << " not -1 or 1 as loss function expects!" << std::endl;
125  float e = 1 - label * prediction;
126  return (e > 0) ? e : 0;
127  }
128 
129  float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
130  {
131  if (label * prediction >= 1)
132  return 0;
133  float err = 1 - label * prediction;
134  return label * (update_scale * pred_per_update < err ? update_scale : err / pred_per_update);
135  }
136 
137  float getUnsafeUpdate(float prediction, float label, float update_scale)
138  {
139  if (label * prediction >= 1)
140  return 0;
141  return label * update_scale;
142  }
143 
144  float getRevertingWeight(shared_data*, float prediction, float eta_t) { return fabs(prediction) / eta_t; }
145 
146  float getSquareGrad(float prediction, float label)
147  {
148  float d = first_derivative(nullptr, prediction, label);
149  return d * d;
150  }
151 
152  float first_derivative(shared_data*, float prediction, float label) { return (label * prediction >= 1) ? 0 : -label; }
153 
154  float second_derivative(shared_data*, float, float) { return 0.; }
155 };
156 
157 class logloss : public loss_function
158 {
159  public:
160  std::string getType() { return "logistic"; }
161 
162  float getLoss(shared_data*, float prediction, float label)
163  {
164  if (label != -1.f && label != 1.f)
165  std::cout << "You are using label " << label << " not -1 or 1 as loss function expects!" << std::endl;
166  return log(1 + correctedExp(-label * prediction));
167  }
168 
169  float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
170  {
171  float w, x;
172  float d = correctedExp(label * prediction);
173  if (update_scale * pred_per_update < 1e-6)
174  {
175  /* As with squared loss, for small eta_t we replace the update
176  * with its first order Taylor expansion to avoid numerical problems
177  */
178  return label * update_scale / (1 + d);
179  }
180  x = update_scale * pred_per_update + label * prediction + d;
181  w = wexpmx(x);
182  return -(label * w + prediction) / pred_per_update;
183  }
184 
185  float getUnsafeUpdate(float prediction, float label, float update_scale)
186  {
187  float d = correctedExp(label * prediction);
188  return label * update_scale / (1 + d);
189  }
190 
191  inline float wexpmx(float x)
192  {
193  /* This piece of code is approximating W(exp(x))-x.
194  * W is the Lambert W function: W(z)*exp(W(z))=z.
195  * The absolute error of this approximation is less than 9e-5.
196  * Faster/better approximations can be substituted here.
197  */
198  double w = x >= 1. ? 0.86 * x + 0.01 : correctedExp(0.8 * x - 0.65); // initial guess
199  double r = x >= 1. ? x - log(w) - w : 0.2 * x + 0.65 - w; // residual
200  double t = 1. + w;
201  double u = 2. * t * (t + 2. * r / 3.); // magic
202  return (float)(w * (1. + r / t * (u - r) / (u - 2. * r)) - x); // more magic
203  }
204 
205  float getRevertingWeight(shared_data*, float prediction, float eta_t)
206  {
207  float z = -fabs(prediction);
208  return (1 - z - correctedExp(z)) / eta_t;
209  }
210 
211  float first_derivative(shared_data*, float prediction, float label)
212  {
213  float v = -label / (1 + correctedExp(label * prediction));
214  return v;
215  }
216 
217  float getSquareGrad(float prediction, float label)
218  {
219  float d = first_derivative(nullptr, prediction, label);
220  return d * d;
221  }
222 
223  float second_derivative(shared_data*, float prediction, float label)
224  {
225  float p = 1 / (1 + correctedExp(label * prediction));
226 
227  return p * (1 - p);
228  }
229 };
230 
232 {
233  public:
234  quantileloss(float& tau_) : tau(tau_) {}
235 
236  std::string getType() { return "quantile"; }
237 
238  float getLoss(shared_data*, float prediction, float label)
239  {
240  float e = label - prediction;
241  if (e > 0)
242  return tau * e;
243  else
244  return -(1 - tau) * e;
245  }
246 
247  float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
248  {
249  float err = label - prediction;
250  if (err == 0)
251  return 0;
252  float normal = update_scale * pred_per_update; // base update size
253  if (err > 0)
254  {
255  normal = tau * normal;
256  return (normal < err ? tau * update_scale : err / pred_per_update);
257  }
258  else
259  {
260  normal = -(1 - tau) * normal;
261  return (normal > err ? (tau - 1) * update_scale : err / pred_per_update);
262  }
263  }
264 
265  float getUnsafeUpdate(float prediction, float label, float update_scale)
266  {
267  float err = label - prediction;
268  if (err == 0)
269  return 0;
270  if (err > 0)
271  return tau * update_scale;
272  return -(1 - tau) * update_scale;
273  }
274 
275  float getRevertingWeight(shared_data* sd, float prediction, float eta_t)
276  {
277  float v, t;
278  t = 0.5f * (sd->min_label + sd->max_label);
279  if (prediction > t)
280  v = -(1 - tau);
281  else
282  v = tau;
283  return (t - prediction) / (eta_t * v);
284  }
285 
286  float first_derivative(shared_data*, float prediction, float label)
287  {
288  float e = label - prediction;
289  if (e == 0)
290  return 0;
291  return e > 0 ? -tau : (1 - tau);
292  }
293 
294  float getSquareGrad(float prediction, float label)
295  {
296  float fd = first_derivative(nullptr, prediction, label);
297  return fd * fd;
298  }
299 
300  float second_derivative(shared_data*, float, float) { return 0.; }
301 
302  float tau;
303 };
304 
306 {
307  public:
308  std::string getType() { return "poisson"; }
309 
310  float getLoss(shared_data*, float prediction, float label)
311  {
312  if (label < 0.f)
313  std::cout << "You are using label " << label << " but loss function expects label >= 0!" << std::endl;
314  float exp_prediction = expf(prediction);
315  // deviance is used instead of log-likelihood
316  return 2 * (label * (logf(label + 1e-6f) - prediction) - (label - exp_prediction));
317  }
318 
319  float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
320  {
321  float exp_prediction = expf(prediction);
322  if (label > 0)
323  {
324  return label * update_scale -
325  log1p(exp_prediction * expm1(label * update_scale * pred_per_update) / label) / pred_per_update;
326  }
327  else
328  {
329  return -log1p(exp_prediction * update_scale * pred_per_update) / pred_per_update;
330  }
331  }
332 
333  float getUnsafeUpdate(float prediction, float label, float update_scale)
334  {
335  float exp_prediction = expf(prediction);
336  return (label - exp_prediction) * update_scale;
337  }
338 
339  float getRevertingWeight(shared_data* /* sd */, float /* prediction */, float /* eta_t */)
340  {
341  THROW("Active learning not supported by poisson loss");
342  }
343 
344  float getSquareGrad(float prediction, float label)
345  {
346  float exp_prediction = expf(prediction);
347  return (exp_prediction - label) * (exp_prediction - label);
348  }
349 
350  float first_derivative(shared_data*, float prediction, float label)
351  {
352  float exp_prediction = expf(prediction);
353  return (exp_prediction - label);
354  }
355 
356  float second_derivative(shared_data*, float prediction, float /* label */)
357  {
358  float exp_prediction = expf(prediction);
359  return exp_prediction;
360  }
361 };
362 
363 loss_function* getLossFunction(vw& all, std::string funcName, float function_parameter)
364 {
365  if (funcName.compare("squared") == 0 || funcName.compare("Huber") == 0)
366  return new squaredloss();
367  else if (funcName.compare("classic") == 0)
368  return new classic_squaredloss();
369  else if (funcName.compare("hinge") == 0)
370  return new hingeloss();
371  else if (funcName.compare("logistic") == 0)
372  {
373  if (all.set_minmax != noop_mm)
374  {
375  all.sd->min_label = -50;
376  all.sd->max_label = 50;
377  }
378  return new logloss();
379  }
380  else if (funcName.compare("quantile") == 0 || funcName.compare("pinball") == 0 || funcName.compare("absolute") == 0)
381  {
382  return new quantileloss(function_parameter);
383  }
384  else if (funcName.compare("poisson") == 0)
385  {
386  if (all.set_minmax != noop_mm)
387  {
388  all.sd->min_label = -50;
389  all.sd->max_label = 50;
390  }
391  return new poisson_loss();
392  }
393  else
394  THROW("Invalid loss function name: \'" << funcName << "\' Bailing!");
395 }
float getUnsafeUpdate(float prediction, float label, float update_scale)
float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
float second_derivative(shared_data *sd, float prediction, float)
float second_derivative(shared_data *, float prediction, float)
float first_derivative(shared_data *, float prediction, float label)
#define correctedExp
Definition: correctedMath.h:27
std::string getType()
float getSquareGrad(float prediction, float label)
float second_derivative(shared_data *, float, float)
float getUnsafeUpdate(float prediction, float label, float update_scale)
float first_derivative(shared_data *sd, float prediction, float label)
float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
float getSquareGrad(float prediction, float label)
float getUnsafeUpdate(float prediction, float label, float update_scale)
std::string getType()
float first_derivative(shared_data *, float prediction, float label)
float getRevertingWeight(shared_data *, float prediction, float eta_t)
float second_derivative(shared_data *, float, float)
float second_derivative(shared_data *, float prediction, float label)
float getSquareGrad(float prediction, float label)
float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
std::string getType()
float getRevertingWeight(shared_data *, float prediction, float eta_t)
float getLoss(shared_data *, float prediction, float label)
void(* set_minmax)(shared_data *sd, float label)
Definition: global_data.h:394
float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
void noop_mm(shared_data *, float)
Definition: global_data.cc:135
float getUnsafeUpdate(float prediction, float label, float update_scale)
float getSquareGrad(float prediction, float label)
float wexpmx(float x)
std::string getType()
shared_data * sd
Definition: global_data.h:375
float getRevertingWeight(shared_data *sd, float prediction, float eta_t)
float getSquareGrad(float prediction, float label)
float getLoss(shared_data *sd, float prediction, float label)
float first_derivative(shared_data *, float prediction, float label)
quantileloss(float &tau_)
std::string getType()
float getUnsafeUpdate(float prediction, float label, float update_scale)
float getLoss(shared_data *, float prediction, float label)
float getSquareGrad(float prediction, float label)
float getRevertingWeight(shared_data *, float, float)
float second_derivative(shared_data *, float, float)
float getUpdate(float prediction, float label, float update_scale, float pred_per_update)
float getLoss(shared_data *, float prediction, float label)
float getRevertingWeight(shared_data *sd, float prediction, float eta_t)
float min_label
Definition: global_data.h:150
float first_derivative(shared_data *, float prediction, float label)
float getRevertingWeight(shared_data *sd, float prediction, float eta_t)
std::string getType()
float getLoss(shared_data *, float prediction, float label)
float max_label
Definition: global_data.h:151
float getLoss(shared_data *, float prediction, float label)
loss_function * getLossFunction(vw &all, std::string funcName, float function_parameter)
float getUpdate(float prediction, float label, float update_scale, float)
float getUnsafeUpdate(float prediction, float label, float update_scale)
#define THROW(args)
Definition: vw_exception.h:181
float f
Definition: cache.cc:40
float first_derivative(shared_data *, float prediction, float label)