Vowpal Wabbit
kernel_svm.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include <fstream>
7 #include <sstream>
8 #include <float.h>
9 #ifdef _WIN32
10 #define NOMINMAX
11 #include <WinSock2.h>
12 #else
13 #include <netdb.h>
14 #endif
15 #include <string.h>
16 #include <stdio.h>
17 #include <assert.h>
18 #include <memory>
19 
20 #include "parse_example.h"
21 #include "constant.h"
22 #include "gd.h"
23 #include "cache.h"
24 #include "accumulate.h"
25 #include "learner.h"
26 #include "vw.h"
27 #include <map>
28 #include "memory.h"
29 #include "vw_allreduce.h"
30 #include "rand48.h"
31 #include "reductions.h"
32 
33 #define SVM_KER_LIN 0
34 #define SVM_KER_RBF 1
35 #define SVM_KER_POLY 2
36 
37 using namespace LEARNER;
38 using namespace VW::config;
39 
40 using std::endl;
41 
42 struct svm_params;
43 
44 static size_t num_kernel_evals = 0;
45 static size_t num_cache_evals = 0;
46 
48 {
51 
52  ~svm_example();
53  void init_svm_example(flat_example* fec);
54  int compute_kernels(svm_params& params);
55  int clear_kernels();
56 };
57 
58 struct svm_model
59 {
60  size_t num_support;
64 };
65 
67 {
68  for (size_t i = 0; i < model->num_support; i++)
69  {
70  model->support_vec[i]->~svm_example();
71  // Warning C6001 is triggered by the following:
72  // example is allocated using (a) '&calloc_or_throw<svm_example>()' and freed using (b)
73  // 'free(model->support_vec[i])'
74  //
75  // When the call to allocation is replaced by (a) 'new svm_example()' and deallocated using (b) 'operator delete
76  // (model->support_vect[i])', the warning goes away. Disable SDL warning.
77  // #pragma warning(disable:6001)
78  free_it(model->support_vec[i]);
79  // #pragma warning(default:6001)
80 
81  model->support_vec[i] = 0;
82  }
83 
84  model->support_vec.delete_v();
85  model->alpha.delete_v();
86  model->delta.delete_v();
87  free(model);
88 }
89 
90 struct svm_params
91 {
92  size_t current_pass;
93  bool active;
96  double active_c;
97 
98  size_t pool_size;
99  size_t pool_pos;
100  size_t subsample; // NOTE: Eliminating subsample to only support 1/pool_size
101  size_t reprocess;
102 
104  size_t maxcache;
105  // size_t curcache;
106 
108  float lambda;
109 
111  size_t kernel_type;
112 
113  size_t local_begin, local_end;
114  size_t current_t;
115 
116  float loss_sum;
117 
118  vw* all; // flatten, parallel
119  std::shared_ptr<rand_state> _random_state;
120 
122  {
123  free(pool);
124  if (all)
125  {
126  all->trace_message << "Num support = " << model->num_support << endl;
127  all->trace_message << "Number of kernel evaluations = " << num_kernel_evals << " "
128  << "Number of cache queries = " << num_cache_evals << endl;
129  all->trace_message << "Total loss = " << loss_sum << endl;
130  }
131  if (model)
132  {
133  free_svm_model(model);
134  }
135  if (all)
136  {
137  all->trace_message << "Done freeing model" << endl;
138  }
139 
140  free(kernel_params);
141  if (all)
142  {
143  all->trace_message << "Done freeing kernel params" << endl;
144  all->trace_message << "Done with finish " << endl;
145  }
146  }
147 };
148 
150 {
151  ex = *fec;
152  free(fec);
153 }
154 
156 {
157  krow.delete_v();
158  // free flatten example contents
159  flat_example* fec = &calloc_or_throw<flat_example>();
160  *fec = ex;
161  free_flatten_example(fec); // free contents of flat example and frees fec.
162 }
163 
164 float kernel_function(const flat_example* fec1, const flat_example* fec2, void* params, size_t kernel_type);
165 
167 {
168  int alloc = 0;
169  svm_model* model = params.model;
170  size_t n = model->num_support;
171 
172  if (krow.size() < n)
173  {
174  // computing new kernel values and caching them
175  // if(params->curcache + n > params->maxcache)
176  // trim_cache(params);
177  num_kernel_evals += krow.size();
178  // std::cerr<<"Kernels ";
179  for (size_t i = krow.size(); i < n; i++)
180  {
181  svm_example* sec = model->support_vec[i];
182  float kv = kernel_function(&ex, &(sec->ex), params.kernel_params, params.kernel_type);
183  krow.push_back(kv);
184  alloc += 1;
185  // std::cerr<<kv<<" ";
186  }
187  // std::cerr<< endl;
188  }
189  else
190  num_cache_evals += n;
191  return alloc;
192 }
193 
195 {
196  int rowsize = (int)krow.size();
197  krow.end() = krow.begin();
198  krow.resize(0);
199  return -rowsize;
200 }
201 
202 static int make_hot_sv(svm_params& params, size_t svi)
203 {
204  svm_model* model = params.model;
205  size_t n = model->num_support;
206  if (svi >= model->num_support)
207  params.all->trace_message << "Internal error at " << __FILE__ << ":" << __LINE__ << endl;
208  // rotate params fields
209  svm_example* svi_e = model->support_vec[svi];
210  int alloc = svi_e->compute_kernels(params);
211  float svi_alpha = model->alpha[svi];
212  float svi_delta = model->delta[svi];
213  for (size_t i = svi; i > 0; --i)
214  {
215  model->support_vec[i] = model->support_vec[i - 1];
216  model->alpha[i] = model->alpha[i - 1];
217  model->delta[i] = model->delta[i - 1];
218  }
219  model->support_vec[0] = svi_e;
220  model->alpha[0] = svi_alpha;
221  model->delta[0] = svi_delta;
222  // rotate cache
223  for (size_t j = 0; j < n; j++)
224  {
225  svm_example* e = model->support_vec[j];
226  size_t rowsize = e->krow.size();
227  if (svi < rowsize)
228  {
229  float kv = e->krow[svi];
230  for (size_t i = svi; i > 0; --i) e->krow[i] = e->krow[i - 1];
231  e->krow[0] = kv;
232  }
233  else
234  {
235  float kv = svi_e->krow[j];
236  e->krow.push_back(0);
237  alloc += 1;
238  for (size_t i = e->krow.size() - 1; i > 0; --i) e->krow[i] = e->krow[i - 1];
239  e->krow[0] = kv;
240  }
241  }
242  return alloc;
243 }
244 
245 static int trim_cache(svm_params& params)
246 {
247  int sz = (int)params.maxcache;
248  svm_model* model = params.model;
249  size_t n = model->num_support;
250  int alloc = 0;
251  for (size_t i = 0; i < n; i++)
252  {
253  svm_example* e = model->support_vec[i];
254  sz -= (int)e->krow.size();
255  if (sz < 0)
256  alloc += e->clear_kernels();
257  }
258  return alloc;
259 }
260 
261 int save_load_flat_example(io_buf& model_file, bool read, flat_example*& fec)
262 {
263  size_t brw = 1;
264  if (read)
265  {
266  fec = &calloc_or_throw<flat_example>();
267  brw = model_file.bin_read_fixed((char*)fec, sizeof(flat_example), "");
268 
269  if (brw > 0)
270  {
271  if (fec->tag_len > 0)
272  {
273  fec->tag = calloc_or_throw<char>(fec->tag_len);
274  brw = model_file.bin_read_fixed((char*)fec->tag, fec->tag_len * sizeof(char), "");
275  if (!brw)
276  return 2;
277  }
278  if (fec->fs.size() > 0)
279  {
280  features& fs = fec->fs;
281  size_t len = fs.size();
282  fs.values = v_init<feature_value>();
283  fs.values.resize(len);
284  brw = model_file.bin_read_fixed((char*)fs.values.begin(), len * sizeof(feature_value), "");
285  if (!brw)
286  return 3;
287  fs.values.end() = fs.values.begin() + len;
288 
289  len = fs.indicies.size();
290  fs.indicies = v_init<feature_index>();
291  fs.indicies.resize(len);
292  brw = model_file.bin_read_fixed((char*)fs.indicies.begin(), len * sizeof(feature_index), "");
293  if (!brw)
294  return 3;
295  fs.indicies.end() = fs.indicies.begin() + len;
296  }
297  }
298  else
299  return 1;
300  }
301  else
302  {
303  brw = model_file.bin_write_fixed((char*)fec, sizeof(flat_example));
304 
305  if (brw > 0)
306  {
307  if (fec->tag_len > 0)
308  {
309  brw = model_file.bin_write_fixed((char*)fec->tag, (uint32_t)fec->tag_len * sizeof(char));
310  if (!brw)
311  {
312  std::cerr << fec->tag_len << " " << fec->tag << endl;
313  return 2;
314  }
315  }
316  if (fec->fs.size() > 0)
317  {
318  brw =
319  model_file.bin_write_fixed((char*)fec->fs.values.begin(), (uint32_t)fec->fs.size() * sizeof(feature_value));
320  if (!brw)
321  return 3;
322  brw = model_file.bin_write_fixed(
323  (char*)fec->fs.indicies.begin(), (uint32_t)fec->fs.indicies.size() * sizeof(feature_index));
324  if (!brw)
325  return 3;
326  }
327  }
328  else
329  return 1;
330  }
331  return 0;
332 }
333 
334 void save_load_svm_model(svm_params& params, io_buf& model_file, bool read, bool text)
335 {
336  svm_model* model = params.model;
337  // TODO: check about initialization
338 
339  // params.all->opts_n_args.trace_message<<"Save load svm "<<read<<" "<<text<< endl;
340  if (model_file.files.size() == 0)
341  return;
342  std::stringstream msg;
343  bin_text_read_write_fixed(model_file, (char*)&(model->num_support), sizeof(model->num_support), "", read, msg, text);
344  // params.all->opts_n_args.trace_message<<"Read num support "<<model->num_support<< endl;
345 
346  flat_example* fec = nullptr;
347  if (read)
348  model->support_vec.resize(model->num_support);
349 
350  for (uint32_t i = 0; i < model->num_support; i++)
351  {
352  if (read)
353  {
354  save_load_flat_example(model_file, read, fec);
355  svm_example* tmp = &calloc_or_throw<svm_example>();
356  tmp->init_svm_example(fec);
357  model->support_vec.push_back(tmp);
358  }
359  else
360  {
361  fec = &(model->support_vec[i]->ex);
362  save_load_flat_example(model_file, read, fec);
363  }
364  }
365 
366  if (read)
367  model->alpha.resize(model->num_support);
369  model_file, (char*)model->alpha.begin(), (uint32_t)model->num_support * sizeof(float), "", read, msg, text);
370  if (read)
371  model->delta.resize(model->num_support);
373  model_file, (char*)model->delta.begin(), (uint32_t)model->num_support * sizeof(float), "", read, msg, text);
374 }
375 
376 void save_load(svm_params& params, io_buf& model_file, bool read, bool text)
377 {
378  if (text)
379  {
380  params.all->trace_message << "Not supporting readable model for kernel svm currently" << endl;
381  return;
382  }
383 
384  save_load_svm_model(params, model_file, read, text);
385 }
386 
387 float linear_kernel(const flat_example* fec1, const flat_example* fec2)
388 {
389  float dotprod = 0;
390 
391  features& fs_1 = (features&)fec1->fs;
392  features& fs_2 = (features&)fec2->fs;
393  if (fs_2.indicies.size() == 0)
394  return 0.f;
395 
396  int numint = 0;
397  for (size_t idx1 = 0, idx2 = 0; idx1 < fs_1.size() && idx2 < fs_2.size(); idx1++)
398  {
399  uint64_t ec1pos = fs_1.indicies[idx1];
400  uint64_t ec2pos = fs_2.indicies[idx2];
401  // params.all->opts_n_args.trace_message<<ec1pos<<" "<<ec2pos<<" "<<idx1<<" "<<idx2<<" "<<f->x<<" "<<ec2f->x<< endl;
402  if (ec1pos < ec2pos)
403  continue;
404 
405  while (ec1pos > ec2pos && ++idx2 < fs_2.size()) ec2pos = fs_2.indicies[idx2];
406 
407  if (ec1pos == ec2pos)
408  {
409  // params.all->opts_n_args.trace_message<<ec1pos<<" "<<ec2pos<<" "<<idx1<<" "<<idx2<<" "<<f->x<<"
410  // "<<ec2f->x<< endl;
411  numint++;
412  dotprod += fs_1.values[idx1] * fs_2.values[idx2];
413  ++idx2;
414  }
415  }
416  // params.all->opts_n_args.trace_message<< endl;
417  // params.all->opts_n_args.trace_message<<"numint = "<<numint<<" dotprod = "<<dotprod<< endl;
418  return dotprod;
419 }
420 
421 float poly_kernel(const flat_example* fec1, const flat_example* fec2, int power)
422 {
423  float dotprod = linear_kernel(fec1, fec2);
424  // std::cerr<<"Bandwidth = "<<bandwidth<< endl;
425  // std::cout<<pow(1 + dotprod, power)<< endl;
426  return pow(1 + dotprod, power);
427 }
428 
429 float rbf_kernel(const flat_example* fec1, const flat_example* fec2, float bandwidth)
430 {
431  float dotprod = linear_kernel(fec1, fec2);
432  // std::cerr<<"Bandwidth = "<<bandwidth<< endl;
433  return expf(-(fec1->total_sum_feat_sq + fec2->total_sum_feat_sq - 2 * dotprod) * bandwidth);
434 }
435 
436 float kernel_function(const flat_example* fec1, const flat_example* fec2, void* params, size_t kernel_type)
437 {
438  switch (kernel_type)
439  {
440  case SVM_KER_RBF:
441  return rbf_kernel(fec1, fec2, *((float*)params));
442  case SVM_KER_POLY:
443  return poly_kernel(fec1, fec2, *((int*)params));
444  case SVM_KER_LIN:
445  return linear_kernel(fec1, fec2);
446  }
447  return 0;
448 }
449 
450 float dense_dot(float* v1, v_array<float> v2, size_t n)
451 {
452  float dot_prod = 0.;
453  for (size_t i = 0; i < n; i++) dot_prod += v1[i] * v2[i];
454  return dot_prod;
455 }
456 
457 void predict(svm_params& params, svm_example** ec_arr, float* scores, size_t n)
458 {
459  svm_model* model = params.model;
460  for (size_t i = 0; i < n; i++)
461  {
462  ec_arr[i]->compute_kernels(params);
463  // std::cout<<"size of krow = "<<ec_arr[i]->krow.size()<< endl;
464  if (ec_arr[i]->krow.size() > 0)
465  scores[i] = dense_dot(ec_arr[i]->krow.begin(), model->alpha, model->num_support) / params.lambda;
466  else
467  scores[i] = 0;
468  }
469 }
470 
472 {
473  flat_example* fec = flatten_sort_example(*(params.all), &ec);
474  if (fec)
475  {
476  svm_example* sec = &calloc_or_throw<svm_example>();
477  sec->init_svm_example(fec);
478  float score;
479  predict(params, &sec, &score, 1);
480  ec.pred.scalar = score;
481  sec->~svm_example();
482  free(sec);
483  }
484 }
485 
486 size_t suboptimality(svm_model* model, double* subopt)
487 {
488  size_t max_pos = 0;
489  // std::cerr<<"Subopt ";
490  double max_val = 0;
491  for (size_t i = 0; i < model->num_support; i++)
492  {
493  float tmp = model->alpha[i] * model->support_vec[i]->ex.l.simple.label;
494 
495  if ((tmp < model->support_vec[i]->ex.l.simple.weight && model->delta[i] < 0) || (tmp > 0 && model->delta[i] > 0))
496  subopt[i] = fabs(model->delta[i]);
497  else
498  subopt[i] = 0;
499 
500  if (subopt[i] > max_val)
501  {
502  max_val = subopt[i];
503  max_pos = i;
504  }
505  // std::cerr<<subopt[i]<<" ";
506  }
507  // std::cerr<< endl;
508  return max_pos;
509 }
510 
511 int remove(svm_params& params, size_t svi)
512 {
513  svm_model* model = params.model;
514  if (svi >= model->num_support)
515  params.all->trace_message << "Internal error at " << __FILE__ << ":" << __LINE__ << endl;
516  // shift params fields
517  svm_example* svi_e = model->support_vec[svi];
518  for (size_t i = svi; i < model->num_support - 1; ++i)
519  {
520  model->support_vec[i] = model->support_vec[i + 1];
521  model->alpha[i] = model->alpha[i + 1];
522  model->delta[i] = model->delta[i + 1];
523  }
524  svi_e->~svm_example();
525  free(svi_e);
526  model->support_vec.pop();
527  model->alpha.pop();
528  model->delta.pop();
529  model->num_support--;
530  // shift cache
531  int alloc = 0;
532  for (size_t j = 0; j < model->num_support; j++)
533  {
534  svm_example* e = model->support_vec[j];
535  size_t rowsize = e->krow.size();
536  if (svi < rowsize)
537  {
538  for (size_t i = svi; i < rowsize - 1; i++) e->krow[i] = e->krow[i + 1];
539  e->krow.pop();
540  alloc -= 1;
541  }
542  }
543  return alloc;
544 }
545 
546 int add(svm_params& params, svm_example* fec)
547 {
548  svm_model* model = params.model;
549  model->num_support++;
550  model->support_vec.push_back(fec);
551  model->alpha.push_back(0.);
552  model->delta.push_back(0.);
553  // std::cout<<"After adding "<<model->num_support<< endl;
554  return (int)(model->support_vec.size() - 1);
555 }
556 
557 bool update(svm_params& params, size_t pos)
558 {
559  // params.all->opts_n_args.trace_message<<"Update\n";
560  svm_model* model = params.model;
561  bool overshoot = false;
562  // params.all->opts_n_args.trace_message<<"Updating model "<<pos<<" "<<model->num_support<<" ";
563  svm_example* fec = model->support_vec[pos];
564  label_data& ld = fec->ex.l.simple;
565  fec->compute_kernels(params);
566  float* inprods = fec->krow.begin();
567  float alphaKi = dense_dot(inprods, model->alpha, model->num_support);
568  model->delta[pos] = alphaKi * ld.label / params.lambda - 1;
569  float alpha_old = model->alpha[pos];
570  alphaKi -= model->alpha[pos] * inprods[pos];
571  model->alpha[pos] = 0.;
572 
573  float proj = alphaKi * ld.label;
574  float ai = (params.lambda - proj) / inprods[pos];
575  // std::cout<<model->num_support<<" "<<pos<<" "<<proj<<" "<<alphaKi<<" "<<alpha_old<<" "<<ld.label<<"
576  // "<<model->delta[pos]<<" " << ai<<" "<<params.lambda<< endl;
577 
578  if (ai > fec->ex.l.simple.weight)
579  ai = fec->ex.l.simple.weight;
580  else if (ai < 0)
581  ai = 0;
582 
583  ai *= ld.label;
584  float diff = ai - alpha_old;
585 
586  if (fabs(diff) > 1.0e-06)
587  overshoot = true;
588 
589  if (fabs(diff) > 1.)
590  {
591  // params.all->opts_n_args.trace_message<<"Here\n";
592  diff = (float)(diff > 0) - (diff < 0);
593  ai = alpha_old + diff;
594  }
595 
596  for (size_t i = 0; i < model->num_support; i++)
597  {
598  label_data& ldi = model->support_vec[i]->ex.l.simple;
599  model->delta[i] += diff * inprods[i] * ldi.label / params.lambda;
600  }
601 
602  if (fabs(ai) <= 1.0e-10)
603  remove(params, pos);
604  else
605  model->alpha[pos] = ai;
606 
607  return overshoot;
608 }
609 
610 void copy_char(char& c1, const char& c2) noexcept
611 {
612  if (c2 != '\0')
613  c1 = c2;
614 }
615 
616 void add_size_t(size_t& t1, const size_t& t2) noexcept { t1 += t2; }
617 
618 void add_double(double& t1, const double& t2) noexcept { t1 += t2; }
619 
620 void sync_queries(vw& all, svm_params& params, bool* train_pool)
621 {
622  io_buf* b = new io_buf();
623 
624  char* queries;
625  flat_example* fec = nullptr;
626 
627  for (size_t i = 0; i < params.pool_pos; i++)
628  {
629  if (!train_pool[i])
630  continue;
631 
632  fec = &(params.pool[i]->ex);
633  save_load_flat_example(*b, false, fec);
634  delete params.pool[i];
635  }
636 
637  size_t* sizes = calloc_or_throw<size_t>(all.all_reduce->total);
638  sizes[all.all_reduce->node] = b->head - b->space.begin();
639  // params.all->opts_n_args.trace_message<<"Sizes = "<<sizes[all.node]<<" ";
640  all_reduce<size_t, add_size_t>(all, sizes, all.all_reduce->total);
641 
642  size_t prev_sum = 0, total_sum = 0;
643  for (size_t i = 0; i < all.all_reduce->total; i++)
644  {
645  if (i <= (all.all_reduce->node - 1))
646  prev_sum += sizes[i];
647  total_sum += sizes[i];
648  }
649 
650  // params.all->opts_n_args.trace_message<<total_sum<<" "<<prev_sum<< endl;
651  if (total_sum > 0)
652  {
653  queries = calloc_or_throw<char>(total_sum);
654  memcpy(queries + prev_sum, b->space.begin(), b->head - b->space.begin());
655  b->space.delete_v();
656  all_reduce<char, copy_char>(all, queries, total_sum);
657 
658  b->space.begin() = queries;
659  b->head = b->space.begin();
660  b->space.end() = &queries[total_sum * sizeof(char)];
661 
662  size_t num_read = 0;
663  params.pool_pos = 0;
664 
665  for (size_t i = 0; i < params.pool_size; i++)
666  {
667  if (!save_load_flat_example(*b, true, fec))
668  {
669  params.pool[i] = &calloc_or_throw<svm_example>();
670  params.pool[i]->init_svm_example(fec);
671  train_pool[i] = true;
672  params.pool_pos++;
673  // for(int j = 0;j < fec->feature_map_len;j++)
674  // params.all->opts_n_args.trace_message<<fec->feature_map[j].weight_index<<":"<<fec->feature_map[j].x<<" ";
675  // params.all->opts_n_args.trace_message<< endl;
676  // params.pool[i]->in_use = true;
677  // params.current_t += ((label_data*) params.pool[i]->ld)->weight;
678  // params.pool[i]->example_t = params.current_t;
679  }
680  else
681  break;
682 
683  num_read += b->head - b->space.begin();
684  if (num_read == prev_sum)
685  params.local_begin = i + 1;
686  if (num_read == prev_sum + sizes[all.all_reduce->node])
687  params.local_end = i;
688  }
689  }
690  if (fec)
691  free(fec);
692  free(sizes);
693  delete b;
694 }
695 
696 void train(svm_params& params)
697 {
698  // params.all->opts_n_args.trace_message<<"In train "<<params.all->training<< endl;
699 
700  bool* train_pool = calloc_or_throw<bool>(params.pool_size);
701  for (size_t i = 0; i < params.pool_size; i++) train_pool[i] = false;
702 
703  float* scores = calloc_or_throw<float>(params.pool_pos);
704  predict(params, params.pool, scores, params.pool_pos);
705  // std::cout<<scores[0]<< endl;
706 
707  if (params.active)
708  {
709  if (params.active_pool_greedy)
710  {
711  std::multimap<double, size_t> scoremap;
712  for (size_t i = 0; i < params.pool_pos; i++)
713  scoremap.insert(std::pair<const double, const size_t>(fabs(scores[i]), i));
714 
715  std::multimap<double, size_t>::iterator iter = scoremap.begin();
716  // params.all->opts_n_args.trace_message<<params.pool_size<<" "<<"Scoremap: ";
717  // for(;iter != scoremap.end();iter++)
718  // params.all->opts_n_args.trace_message<<iter->first<<" "<<iter->second<<"
719  // "<<((label_data*)params.pool[iter->second]->ld)->label<<"\t"; params.all->opts_n_args.trace_message<< endl;
720  iter = scoremap.begin();
721 
722  for (size_t train_size = 1; iter != scoremap.end() && train_size <= params.subsample; train_size++)
723  {
724  // params.all->opts_n_args.trace_message<<train_size<<" "<<iter->second<<" "<<iter->first<< endl;
725  train_pool[iter->second] = 1;
726  iter++;
727  }
728  }
729  else
730  {
731  for (size_t i = 0; i < params.pool_pos; i++)
732  {
733  float queryp = 2.0f /
734  (1.0f +
735  expf(
736  (float)(params.active_c * fabs(scores[i])) * (float)pow(params.pool[i]->ex.example_counter, 0.5f)));
737  if (params._random_state->get_and_update_random() < queryp)
738  {
739  svm_example* fec = params.pool[i];
740  fec->ex.l.simple.weight *= 1 / queryp;
741  train_pool[i] = 1;
742  }
743  }
744  }
745  // free(scores);
746  }
747 
748  if (params.para_active)
749  {
750  for (size_t i = 0; i < params.pool_pos; i++)
751  if (!train_pool[i])
752  delete params.pool[i];
753  sync_queries(*(params.all), params, train_pool);
754  }
755 
756  if (params.all->training)
757  {
758  svm_model* model = params.model;
759 
760  for (size_t i = 0; i < params.pool_pos; i++)
761  {
762  // params.all->opts_n_args.trace_message<<"process: "<<i<<" "<<train_pool[i]<< endl;
763  int model_pos = -1;
764  if (params.active)
765  {
766  if (train_pool[i])
767  {
768  // params.all->opts_n_args.trace_message<<"i = "<<i<<"train_pool[i] = "<<train_pool[i]<<"
769  // "<<params.pool[i]->example_counter<< endl;
770  model_pos = add(params, params.pool[i]);
771  }
772  }
773  else
774  model_pos = add(params, params.pool[i]);
775 
776  // params.all->opts_n_args.trace_message<<"Added: "<<model_pos<<"
777  // "<<model->support_vec[model_pos]->example_counter<< endl;std::cout<<"After adding in train
778  // "<<model->num_support<< endl;
779 
780  if (model_pos >= 0)
781  {
782  bool overshoot = update(params, model_pos);
783  // std::cout<<model_pos<<":alpha = "<<model->alpha[model_pos]<< endl;
784 
785  double* subopt = calloc_or_throw<double>(model->num_support);
786  for (size_t j = 0; j < params.reprocess; j++)
787  {
788  if (model->num_support == 0)
789  break;
790  // std::cout<<"reprocess: ";
791  int randi = 1;
792  if (params._random_state->get_and_update_random() < 0.5)
793  randi = 0;
794  if (randi)
795  {
796  size_t max_pos = suboptimality(model, subopt);
797  if (subopt[max_pos] > 0)
798  {
799  if (!overshoot && max_pos == (size_t)model_pos && max_pos > 0 && j == 0)
800  params.all->trace_message << "Shouldn't reprocess right after process!!!" << endl;
801  // std::cout<<max_pos<<" "<<subopt[max_pos]<< endl;
802  // std::cout<<params.model->support_vec[0]->example_counter<< endl;
803  if (max_pos * model->num_support <= params.maxcache)
804  make_hot_sv(params, max_pos);
805  update(params, max_pos);
806  }
807  }
808  else
809  {
810  size_t rand_pos = (size_t)floorf(params._random_state->get_and_update_random() * model->num_support);
811  update(params, rand_pos);
812  }
813  }
814  // std::cout<< endl;
815  // td::cout<<params.model->support_vec[0]->example_counter<< endl;
816  free(subopt);
817  }
818  }
819  }
820  else
821  for (size_t i = 0; i < params.pool_pos; i++) delete params.pool[i];
822 
823  // params.all->opts_n_args.trace_message<<params.model->support_vec[0]->example_counter<< endl;
824  // for(int i = 0;i < params.pool_size;i++)
825  // params.all->opts_n_args.trace_message<<scores[i]<<" ";
826  // params.all->opts_n_args.trace_message<< endl;
827  free(scores);
828  // params.all->opts_n_args.trace_message<<params.model->support_vec[0]->example_counter<< endl;
829  free(train_pool);
830  // params.all->opts_n_args.trace_message<<params.model->support_vec[0]->example_counter<< endl;
831 }
832 
834 {
835  flat_example* fec = flatten_sort_example(*(params.all), &ec);
836  if (fec)
837  {
838  svm_example* sec = &calloc_or_throw<svm_example>();
839  sec->init_svm_example(fec);
840  float score = 0;
841  predict(params, &sec, &score, 1);
842  ec.pred.scalar = score;
843  // std::cout<<"Score = "<<score<< endl;
844  ec.loss = std::max(0.f, 1.f - score * ec.l.simple.label);
845  params.loss_sum += ec.loss;
846  if (params.all->training && ec.example_counter % 100 == 0)
847  trim_cache(params);
848  if (params.all->training && ec.example_counter % 1000 == 0 && ec.example_counter >= 2)
849  {
850  params.all->trace_message << "Number of support vectors = " << params.model->num_support << endl;
851  params.all->trace_message << "Number of kernel evaluations = " << num_kernel_evals << " "
852  << "Number of cache queries = " << num_cache_evals << " loss sum = " << params.loss_sum
853  << " " << params.model->alpha[params.model->num_support - 1] << " "
854  << params.model->alpha[params.model->num_support - 2] << endl;
855  }
856  params.pool[params.pool_pos] = sec;
857  params.pool_pos++;
858 
859  if (params.pool_pos == params.pool_size)
860  {
861  train(params);
862  params.pool_pos = 0;
863  }
864  }
865 }
866 
868 {
869  auto params = scoped_calloc_or_throw<svm_params>();
870  std::string kernel_type;
871  float bandwidth = 1.f;
872  int degree = 2;
873 
874  bool ksvm = false;
875 
876  option_group_definition new_options("Kernel SVM");
877  new_options.add(make_option("ksvm", ksvm).keep().help("kernel svm"))
878  .add(make_option("reprocess", params->reprocess).default_value(1).help("number of reprocess steps for LASVM"))
879  .add(make_option("pool_greedy", params->active_pool_greedy).help("use greedy selection on mini pools"))
880  .add(make_option("para_active", params->para_active).help("do parallel active learning"))
881  .add(make_option("pool_size", params->pool_size).default_value(1).help("size of pools for active learning"))
882  .add(make_option("subsample", params->subsample)
883  .default_value(1)
884  .help("number of items to subsample from the pool"))
885  .add(make_option("kernel", kernel_type)
886  .keep()
887  .default_value("linear")
888  .help("type of kernel (rbf or linear (default))"))
889  .add(make_option("bandwidth", bandwidth).keep().default_value(1.f).help("bandwidth of rbf kernel"))
890  .add(make_option("degree", degree).keep().default_value(2).help("degree of poly kernel"));
891  options.add_and_parse(new_options);
892 
893  if (!ksvm)
894  {
895  return nullptr;
896  }
897 
898  std::string loss_function = "hinge";
899  float loss_parameter = 0.0;
900  delete all.loss;
901  all.loss = getLossFunction(all, loss_function, (float)loss_parameter);
902 
903  params->model = &calloc_or_throw<svm_model>();
904  params->model->num_support = 0;
905  params->maxcache = 1024 * 1024 * 1024;
906  params->loss_sum = 0.;
907  params->all = &all;
908  params->_random_state = all.get_random_state();
909 
910  // This param comes from the active reduction.
911  // During options refactor: this changes the semantics a bit - now this will only be true if --active was supplied and
912  // NOT --simulation
913  if (all.active)
914  params->active = true;
915  if (params->active)
916  params->active_c = 1.;
917 
918  params->pool = calloc_or_throw<svm_example*>(params->pool_size);
919  params->pool_pos = 0;
920 
921  if (!options.was_supplied("subsample") && params->para_active)
922  params->subsample = (size_t)ceil(params->pool_size / all.all_reduce->total);
923 
924  params->lambda = all.l2_lambda;
925  if (params->lambda == 0.)
926  params->lambda = 1.;
927  params->all->trace_message << "Lambda = " << params->lambda << endl;
928  params->all->trace_message << "Kernel = " << kernel_type << endl;
929 
930  if (kernel_type.compare("rbf") == 0)
931  {
932  params->kernel_type = SVM_KER_RBF;
933  params->all->trace_message << "bandwidth = " << bandwidth << endl;
934  params->kernel_params = &calloc_or_throw<double>();
935  *((float*)params->kernel_params) = bandwidth;
936  }
937  else if (kernel_type.compare("poly") == 0)
938  {
939  params->kernel_type = SVM_KER_POLY;
940  params->all->trace_message << "degree = " << degree << endl;
941  params->kernel_params = &calloc_or_throw<int>();
942  *((int*)params->kernel_params) = degree;
943  }
944  else
945  params->kernel_type = SVM_KER_LIN;
946 
947  params->all->weights.stride_shift(0);
948 
951  return make_base(l);
952 }
float poly_kernel(const flat_example *fec1, const flat_example *fec2, int power)
Definition: kernel_svm.cc:421
void resize(size_t length)
Definition: v_array.h:69
LEARNER::base_learner * kernel_svm_setup(options_i &options, vw &all)
Definition: kernel_svm.cc:867
size_t example_counter
Definition: example.h:64
loss_function * loss
Definition: global_data.h:523
static size_t num_kernel_evals
Definition: kernel_svm.cc:44
T pop()
Definition: v_array.h:58
std::shared_ptr< rand_state > _random_state
Definition: kernel_svm.cc:119
float scalar
Definition: example.h:45
void save_load_svm_model(svm_params &params, io_buf &model_file, bool read, bool text)
Definition: kernel_svm.cc:334
void init_svm_example(flat_example *fec)
Definition: kernel_svm.cc:149
size_t maxcache
Definition: kernel_svm.cc:104
v_array< feature_index > indicies
svm_model * model
Definition: kernel_svm.cc:103
size_t local_begin
Definition: kernel_svm.cc:113
float total_sum_feat_sq
Definition: example.h:96
the core definition of a set of features.
float dense_dot(float *v1, v_array< float > v2, size_t n)
Definition: kernel_svm.cc:450
float feature_value
Definition: feature_group.h:20
size_t current_t
Definition: kernel_svm.cc:114
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
float lambda
Definition: kernel_svm.cc:108
float weight
Definition: simple_label.h:15
v_array< feature_value > values
bool active
Definition: kernel_svm.cc:93
virtual void add_and_parse(const option_group_definition &group)=0
void set_save_load(void(*sl)(T &, io_buf &, bool, bool))
Definition: learner.h:257
float label
Definition: simple_label.h:14
svm_example ** pool
Definition: kernel_svm.cc:107
label_data simple
Definition: example.h:28
void free_flatten_example(flat_example *fec)
Definition: example.cc:190
void learn(svm_params &params, single_learner &, example &ec)
Definition: kernel_svm.cc:833
#define SVM_KER_LIN
Definition: kernel_svm.cc:33
polylabel l
Definition: example.h:86
features fs
Definition: example.h:97
const size_t total
Definition: allreduce.h:80
const size_t node
Definition: allreduce.h:81
void sync_queries(vw &all, svm_params &params, bool *train_pool)
Definition: kernel_svm.cc:620
T *& begin()
Definition: v_array.h:42
bool training
Definition: global_data.h:488
size_t size() const
Definition: v_array.h:68
void free_it(void *ptr)
Definition: memory.h:94
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
char * head
Definition: io_buf.h:67
static int trim_cache(svm_params &params)
Definition: kernel_svm.cc:245
AllReduce * all_reduce
Definition: global_data.h:381
size_t size() const
size_t bin_read_fixed(char *data, size_t len, const char *read_message)
Definition: io_buf.h:230
void copy_char(char &c1, const char &c2) noexcept
Definition: kernel_svm.cc:610
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
bool update(svm_params &params, size_t pos)
Definition: kernel_svm.cc:557
void push_back(const T &new_ele)
Definition: v_array.h:107
float l2_lambda
Definition: global_data.h:445
int save_load_flat_example(io_buf &model_file, bool read, flat_example *&fec)
Definition: kernel_svm.cc:261
size_t pool_pos
Definition: kernel_svm.cc:99
v_array< float > delta
Definition: kernel_svm.cc:63
v_array< int > files
Definition: io_buf.h:64
void add_size_t(size_t &t1, const size_t &t2) noexcept
Definition: kernel_svm.cc:616
void * kernel_params
Definition: kernel_svm.cc:110
bool active
Definition: global_data.h:489
vw_ostream trace_message
Definition: global_data.h:424
static int make_hot_sv(svm_params &params, size_t svi)
Definition: kernel_svm.cc:202
size_t pool_size
Definition: kernel_svm.cc:98
virtual bool was_supplied(const std::string &key)=0
void predict(svm_params &params, svm_example **ec_arr, float *scores, size_t n)
Definition: kernel_svm.cc:457
size_t reprocess
Definition: kernel_svm.cc:101
float kernel_function(const flat_example *fec1, const flat_example *fec2, void *params, size_t kernel_type)
Definition: kernel_svm.cc:436
size_t current_pass
Definition: kernel_svm.cc:92
float linear_kernel(const flat_example *fec1, const flat_example *fec2)
Definition: kernel_svm.cc:387
uint64_t feature_index
Definition: feature_group.h:21
size_t kernel_type
Definition: kernel_svm.cc:111
size_t subsample
Definition: kernel_svm.cc:100
#define SVM_KER_POLY
Definition: kernel_svm.cc:35
size_t suboptimality(svm_model *model, double *subopt)
Definition: kernel_svm.cc:486
bool active_pool_greedy
Definition: kernel_svm.cc:94
Definition: io_buf.h:54
v_array< char > space
Definition: io_buf.h:62
#define SVM_KER_RBF
Definition: kernel_svm.cc:34
void free_svm_model(svm_model *model)
Definition: kernel_svm.cc:66
void save_load(svm_params &params, io_buf &model_file, bool read, bool text)
Definition: kernel_svm.cc:376
T *& end()
Definition: v_array.h:43
float loss
Definition: example.h:70
v_array< svm_example * > support_vec
Definition: kernel_svm.cc:61
option_group_definition & add(T &&op)
Definition: options.h:90
int compute_kernels(svm_params &params)
Definition: kernel_svm.cc:166
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
flat_example * flatten_sort_example(vw &all, example *ec)
Definition: example.cc:182
polylabel l
Definition: example.h:57
v_array< float > alpha
Definition: kernel_svm.cc:62
void add_double(double &t1, const double &t2) noexcept
Definition: kernel_svm.cc:618
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool para_active
Definition: kernel_svm.cc:95
size_t bin_write_fixed(const char *data, size_t len)
Definition: io_buf.h:252
size_t local_end
Definition: kernel_svm.cc:113
void train(svm_params &params)
Definition: kernel_svm.cc:696
size_t example_counter
Definition: example.h:91
float loss_sum
Definition: kernel_svm.cc:116
size_t num_support
Definition: kernel_svm.cc:60
polyprediction pred
Definition: example.h:60
flat_example ex
Definition: kernel_svm.cc:50
void delete_v()
Definition: v_array.h:98
v_array< float > krow
Definition: kernel_svm.cc:49
loss_function * getLossFunction(vw &all, std::string funcName, float function_parameter)
float rbf_kernel(const flat_example *fec1, const flat_example *fec2, float bandwidth)
Definition: kernel_svm.cc:429
size_t bin_text_read_write_fixed(io_buf &io, char *data, size_t len, const char *read_message, bool read, std::stringstream &msg, bool text)
Definition: io_buf.h:326
static size_t num_cache_evals
Definition: kernel_svm.cc:45
float f
Definition: cache.cc:40
double active_c
Definition: kernel_svm.cc:96
int clear_kernels()
Definition: kernel_svm.cc:194