cpp/8.8.1/svrg_8cc_source.html

 #include <cassert>
 #include <iostream>

 #include "gd.h"
 #include "vw.h"
 #include "reductions.h"

 using namespace LEARNER;
 using namespace VW::config;

 namespace SVRG
 {
 #define W_INNER 0       // working "inner-loop" weights, updated per example
 #define W_STABLE 1      // stable weights, updated per stage
 #define W_STABLEGRAD 2  // gradient corresponding to stable weights

 struct svrg
 {
   int stage_size;         // Number of data passes per stage.
   int prev_pass;          // To detect that we're in a new pass.
   int stable_grad_count;  // Number of data points that
   // contributed to the stable gradient
   // calculation.

   // The VW process' global state.
   vw* all;
 };

 // Mimic GD::inline_predict but with offset for predicting with either
 // stable versus inner weights.

 template <int offset>
 inline void vec_add(float& p, const float x, float& w)
 {
   float* ws = &w;
   p += x * ws[offset];
 }

 template <int offset>
 inline float inline_predict(vw& all, example& ec)
 {
   float acc = ec.l.simple.initial;
   GD::foreach_feature<float, vec_add<offset> >(all, ec, acc);
   return acc;
 }

 // -- Prediction, using inner vs. stable weights --

 float predict_stable(const svrg& s, example& ec)
 {
   return GD::finalize_prediction(s.all->sd, inline_predict<W_STABLE>(*s.all, ec));
 }

 void predict(svrg& s, single_learner&, example& ec)
 {
   ec.partial_prediction = inline_predict<W_INNER>(*s.all, ec);
   ec.pred.scalar = GD::finalize_prediction(s.all->sd, ec.partial_prediction);
 }

 float gradient_scalar(const svrg& s, const example& ec, float pred)
 {
   return s.all->loss->first_derivative(s.all->sd, pred, ec.l.simple.label) * ec.weight;
 }

 // -- Updates, taking inner steps vs. accumulating a full gradient --

 struct update
 {
   float g_scalar_stable;
   float g_scalar_inner;
   float eta;
   float norm;
 };

 inline void update_inner_feature(update& u, float x, float& w)
 {
   float* ws = &w;
   w -= u.eta * ((u.g_scalar_inner - u.g_scalar_stable) * x + ws[W_STABLEGRAD] / u.norm);
 }

 inline void update_stable_feature(float& g_scalar, float x, float& w)
 {
   float* ws = &w;
   ws[W_STABLEGRAD] += g_scalar * x;
 }

 void update_inner(const svrg& s, example& ec)
 {
   update u;
   // |ec| already has prediction according to inner weights.
   u.g_scalar_inner = gradient_scalar(s, ec, ec.pred.scalar);
   u.g_scalar_stable = gradient_scalar(s, ec, predict_stable(s, ec));
   u.eta = s.all->eta;
   u.norm = (float)s.stable_grad_count;
   GD::foreach_feature<update, update_inner_feature>(*s.all, ec, u);
 }

 void update_stable(const svrg& s, example& ec)
 {
   float g = gradient_scalar(s, ec, predict_stable(s, ec));
   GD::foreach_feature<float, update_stable_feature>(*s.all, ec, g);
 }

 void learn(svrg& s, single_learner& base, example& ec)
 {
   assert(ec.in_use);

   predict(s, base, ec);

   const int pass = (int)s.all->passes_complete;

   if (pass % (s.stage_size + 1) == 0)  // Compute exact gradient
   {
     if (s.prev_pass != pass && !s.all->quiet)
     {
       std::cout << "svrg pass " << pass << ": committing stable point" << std::endl;
       for (uint32_t j = 0; j < VW::num_weights(*s.all); j++)
       {
         float w = VW::get_weight(*s.all, j, W_INNER);
         VW::set_weight(*s.all, j, W_STABLE, w);
         VW::set_weight(*s.all, j, W_STABLEGRAD, 0.f);
       }
       s.stable_grad_count = 0;
       std::cout << "svrg pass " << pass << ": computing exact gradient" << std::endl;
     }
     update_stable(s, ec);
     s.stable_grad_count++;
   }
   else  // Perform updates
   {
     if (s.prev_pass != pass && !s.all->quiet)
     {
       std::cout << "svrg pass " << pass << ": taking steps" << std::endl;
     }
     update_inner(s, ec);
   }

   s.prev_pass = pass;
 }

 void save_load(svrg& s, io_buf& model_file, bool read, bool text)
 {
   if (read)
   {
     initialize_regressor(*s.all);
   }

   if (!model_file.files.empty())
   {
     bool resume = s.all->save_resume;
     std::stringstream msg;
     msg << ":" << resume << "\n";
     bin_text_read_write_fixed(model_file, (char*)&resume, sizeof(resume), "", read, msg, text);

     double temp = 0.;
     if (resume)
       GD::save_load_online_state(*s.all, model_file, read, text, temp);
     else
       GD::save_load_regressor(*s.all, model_file, read, text);
   }
 }

 }  // namespace SVRG

 using namespace SVRG;

 base_learner* svrg_setup(options_i& options, vw& all)
 {
   auto s = scoped_calloc_or_throw<svrg>();

   bool svrg_option = false;
   option_group_definition new_options("Stochastic Variance Reduced Gradient");
   new_options.add(make_option("svrg", svrg_option).keep().help("Streaming Stochastic Variance Reduced Gradient"))
       .add(make_option("stage_size", s->stage_size).default_value(1).help("Number of passes per SVRG stage"));
   options.add_and_parse(new_options);

   if (!svrg_option)
   {
     return nullptr;
   }

   s->all = &all;
   s->prev_pass = -1;
   s->stable_grad_count = 0;

   // Request more parameter storage (4 floats per feature)
   all.weights.stride_shift(2);
   learner<svrg, example>& l = init_learner(s, learn, predict, UINT64_ONE << all.weights.stride_shift());
   l.set_save_load(save_load);
   return make_base(l);
 }
GD::finalize_prediction
float finalize_prediction(shared_data *sd, float ret)
Definition: gd.cc:339

vw::weights
parameters weights
Definition: global_data.h:537

vw::loss
loss_function * loss
Definition: global_data.h:523

initialize_regressor
void initialize_regressor(vw &all, T &weights)
Definition: parse_regressor.cc:97

svrg_setup
base_learner * svrg_setup(options_i &options, vw &all)
Definition: svrg.cc:168

SVRG::update::norm
float norm
Definition: svrg.cc:73

polyprediction::scalar
float scalar
Definition: example.h:45

gd.h

SVRG::predict_stable
float predict_stable(const svrg &s, example &ec)
Definition: svrg.cc:50

SVRG::svrg::prev_pass
int prev_pass
Definition: svrg.cc:21

SVRG::update_stable
void update_stable(const svrg &s, example &ec)
Definition: svrg.cc:99

SVRG::update::eta
float eta
Definition: svrg.cc:72

W_STABLE
#define W_STABLE
Definition: svrg.cc:15

LEARNER::make_base
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462

VW::config::option_group_definition
Definition: options.h:85

example::partial_prediction
float partial_prediction
Definition: example.h:68

vw::quiet
bool quiet
Definition: global_data.h:487

VW::config::options_i::add_and_parse
virtual void add_and_parse(const option_group_definition &group)=0

LEARNER::learner::set_save_load
void set_save_load(void(*sl)(T &, io_buf &, bool, bool))
Definition: learner.h:257

SVRG::update_stable_feature
void update_stable_feature(float &g_scalar, float x, float &w)
Definition: svrg.cc:82

label_data::label
float label
Definition: simple_label.h:14

polylabel::simple
label_data simple
Definition: example.h:28

LEARNER::learner
Definition: cb_explore.h:11

loss_function::first_derivative
virtual float first_derivative(shared_data *, float prediction, float label)=0

vw
Definition: global_data.h:369

GD::save_load_online_state
void save_load_online_state(vw &all, io_buf &model_file, bool read, bool text, gd *g, std::stringstream &msg, uint32_t ftrl_size, T &weights)
Definition: gd.cc:776

SVRG::update::g_scalar_inner
float g_scalar_inner
Definition: svrg.cc:71

W_INNER
#define W_INNER
Definition: svrg.cc:14

SVRG
Definition: svrg.cc:12

SVRG::svrg::all
vw * all
Definition: svrg.cc:27

SVRG::inline_predict
float inline_predict(vw &all, example &ec)
Definition: svrg.cc:41

SVRG::svrg::stable_grad_count
int stable_grad_count
Definition: svrg.cc:22

LEARNER::init_learner
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369

vw::sd
shared_data * sd
Definition: global_data.h:375

io_buf::files
v_array< int > files
Definition: io_buf.h:64

VW::get_weight
float get_weight(vw &all, uint32_t index, uint32_t offset)
Definition: vw.h:177

SVRG::update_inner_feature
void update_inner_feature(update &u, float x, float &w)
Definition: svrg.cc:76

VW::config::options_i
Definition: options.h:107

VW::set_weight
void set_weight(vw &all, uint32_t index, uint32_t offset, float value)
Definition: vw.h:182

SVRG::update::g_scalar_stable
float g_scalar_stable
Definition: svrg.cc:70

example
Definition: example.h:54

label_data::initial
float initial
Definition: simple_label.h:16

io_buf
Definition: io_buf.h:54

reductions.h

SVRG::gradient_scalar
float gradient_scalar(const svrg &s, const example &ec, float pred)
Definition: svrg.cc:61

vw::eta
float eta
Definition: global_data.h:531

VW::config::option_group_definition::add
option_group_definition & add(T &&op)
Definition: options.h:90

SVRG::predict
void predict(svrg &s, single_learner &, example &ec)
Definition: svrg.cc:55

VW::config
Definition: options.h:11

example::l
polylabel l
Definition: example.h:57

vw::passes_complete
size_t passes_complete
Definition: global_data.h:452

example::in_use
bool in_use
Definition: example.h:79

VW::config::make_option
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80

SVRG::svrg
Definition: svrg.cc:18

SVRG::learn
void learn(svrg &s, single_learner &base, example &ec)
Definition: svrg.cc:105

UINT64_ONE
constexpr uint64_t UINT64_ONE
Definition: crossplat_compat.h:16

v_array::empty
bool empty() const
Definition: v_array.h:59

SVRG::update_inner
void update_inner(const svrg &s, example &ec)
Definition: svrg.cc:88

SVRG::update
Definition: svrg.cc:68

vw.h

vw::save_resume
bool save_resume
Definition: global_data.h:415

parameters::stride_shift
uint32_t stride_shift()
Definition: array_parameters.h:244

SVRG::save_load
void save_load(svrg &s, io_buf &model_file, bool read, bool text)
Definition: svrg.cc:142

SVRG::vec_add
void vec_add(float &p, const float x, float &w)
Definition: svrg.cc:34

example::pred
polyprediction pred
Definition: example.h:60

LEARNER
Definition: cb_explore.h:8

VW::num_weights
uint32_t num_weights(vw &all)
Definition: vw.h:187

W_STABLEGRAD
#define W_STABLEGRAD
Definition: svrg.cc:16

SVRG::svrg::stage_size
int stage_size
Definition: svrg.cc:20

GD::save_load_regressor
void save_load_regressor(vw &all, io_buf &model_file, bool read, bool text, T &weights)
Definition: gd.cc:707

example::weight
float weight
Definition: example.h:62

bin_text_read_write_fixed
size_t bin_text_read_write_fixed(io_buf &io, char *data, size_t len, const char *read_message, bool read, std::stringstream &msg, bool text)
Definition: io_buf.h:326