Vowpal Wabbit
Classes | Functions
audit_regressor.cc File Reference
#include "reductions.h"
#include "interactions.h"
#include "parse_args.h"
#include "vw.h"

Go to the source code of this file.

Classes

struct  audit_regressor_data
 

Functions

void audit_regressor_interaction (audit_regressor_data &dat, const audit_strings *f)
 
void audit_regressor_feature (audit_regressor_data &dat, const float, const uint64_t ft_idx)
 
void audit_regressor_lda (audit_regressor_data &rd, LEARNER::single_learner &, example &ec)
 
void audit_regressor (audit_regressor_data &rd, LEARNER::single_learner &base, example &ec)
 
void end_examples (audit_regressor_data &d)
 
void print_ex (vw &all, size_t ex_processed, size_t vals_found, size_t progress)
 
void finish_example (vw &all, audit_regressor_data &dd, example &ec)
 
void finish (audit_regressor_data &dat)
 
template<class T >
void regressor_values (audit_regressor_data &dat, T &w)
 
void init_driver (audit_regressor_data &dat)
 
LEARNER::base_learneraudit_regressor_setup (options_i &options, vw &all)
 

Function Documentation

◆ audit_regressor()

void audit_regressor ( audit_regressor_data rd,
LEARNER::single_learner base,
example ec 
)

Definition at line 103 of file audit_regressor.cc.

References audit_regressor_data::all, audit_regressor_feature(), audit_regressor_interaction(), audit_regressor_lda(), v_array< T >::begin(), audit_regressor_data::cur_class, v_array< T >::end(), example_predict::feature_space, example_predict::ft_offset, INTERACTIONS::generate_interactions(), example_predict::indices, features::indicies, vw::interactions, vw::lda, vw::permutations, v_array< T >::size(), features::size(), features::space_names, parameters::sparse, parameters::sparse_weights, audit_regressor_data::total_class_cnt, features::values, and vw::weights.

Referenced by audit_regressor_setup().

104 {
105  vw& all = *rd.all;
106 
107  if (all.lda > 0)
108  audit_regressor_lda(rd, base, ec);
109  else
110  {
111  rd.cur_class = 0;
112  uint64_t old_offset = ec.ft_offset;
113 
114  while (rd.cur_class < rd.total_class_cnt)
115  {
116  for (unsigned char* i = ec.indices.begin(); i != ec.indices.end(); ++i)
117  {
118  features& fs = ec.feature_space[(size_t)*i];
119  if (fs.space_names.size() > 0)
120  for (size_t j = 0; j < fs.size(); ++j)
121  {
122  audit_regressor_interaction(rd, fs.space_names[j].get());
123  audit_regressor_feature(rd, fs.values[j], (uint32_t)fs.indicies[j] + ec.ft_offset);
124  audit_regressor_interaction(rd, NULL);
125  }
126  else
127  for (size_t j = 0; j < fs.size(); ++j)
128  audit_regressor_feature(rd, fs.values[j], (uint32_t)fs.indicies[j] + ec.ft_offset);
129  }
130 
131  if (rd.all->weights.sparse)
134  rd.all->interactions, rd.all->permutations, ec, rd, rd.all->weights.sparse_weights);
135  else
136  INTERACTIONS::generate_interactions<audit_regressor_data, const uint64_t, audit_regressor_feature, true,
138  rd.all->interactions, rd.all->permutations, ec, rd, rd.all->weights.dense_weights);
139 
140  ec.ft_offset += rd.increment;
141  ++rd.cur_class;
142  }
143 
144  ec.ft_offset = old_offset; // make sure example is not changed.
145  }
146 }
v_array< namespace_index > indices
parameters weights
Definition: global_data.h:537
v_array< feature_index > indicies
the core definition of a set of features.
v_array< feature_value > values
T *& begin()
Definition: v_array.h:42
void audit_regressor_lda(audit_regressor_data &rd, LEARNER::single_learner &, example &ec)
size_t size() const
Definition: v_array.h:68
uint32_t lda
Definition: global_data.h:508
std::array< features, NUM_NAMESPACES > feature_space
size_t size() const
void generate_interactions(vw &all, example_predict &ec, R &dat)
Definition: interactions.h:45
T *& end()
Definition: v_array.h:43
void audit_regressor_feature(audit_regressor_data &dat, const float, const uint64_t ft_idx)
v_array< audit_strings_ptr > space_names
void audit_regressor_interaction(audit_regressor_data &dat, const audit_strings *f)
sparse_parameters sparse_weights
std::vector< std::string > interactions
Definition: global_data.h:457
bool permutations
Definition: global_data.h:454

◆ audit_regressor_feature()

void audit_regressor_feature ( audit_regressor_data dat,
const float  ,
const uint64_t  ft_idx 
)
inline

Definition at line 51 of file audit_regressor.cc.

References audit_regressor_data::all, io_buf::bin_write_fixed(), audit_regressor_data::cur_class, parameters::mask(), audit_regressor_data::ns_pre, audit_regressor_data::out_file, parameters::stride_shift(), prediction_type::to_string(), audit_regressor_data::total_class_cnt, audit_regressor_data::values_audited, and vw::weights.

Referenced by audit_regressor().

52 {
53  parameters& weights = dat.all->weights;
54  if (weights[ft_idx] != 0)
55  ++dat.values_audited;
56  else
57  return;
58 
59  std::string ns_pre;
60  for (std::vector<std::string>::const_iterator s = dat.ns_pre->begin(); s != dat.ns_pre->end(); ++s) ns_pre += *s;
61 
62  std::ostringstream tempstream;
63  tempstream << ':' << ((ft_idx & weights.mask()) >> weights.stride_shift()) << ':' << weights[ft_idx];
64 
65  std::string temp = ns_pre + tempstream.str() + '\n';
66  if (dat.total_class_cnt > 1) // add class prefix for multiclass problems
67  temp = std::to_string(dat.cur_class) + ':' + temp;
68 
69  dat.out_file->bin_write_fixed(temp.c_str(), (uint32_t)temp.size());
70 
71  weights[ft_idx] = 0.; // mark value audited
72 }
parameters weights
Definition: global_data.h:537
std::vector< std::string > * ns_pre
size_t bin_write_fixed(const char *data, size_t len)
Definition: io_buf.h:252
uint32_t stride_shift()
uint64_t mask()
const char * to_string(prediction_type_t prediction_type)
Definition: learner.cc:12

◆ audit_regressor_interaction()

void audit_regressor_interaction ( audit_regressor_data dat,
const audit_strings f 
)
inline

Definition at line 26 of file audit_regressor.cc.

References audit_regressor_data::ns_pre.

Referenced by audit_regressor().

27 {
28  // same as audit_interaction in gd.cc
29  if (f == nullptr)
30  {
31  dat.ns_pre->pop_back();
32  return;
33  }
34 
35  std::string ns_pre;
36  if (!dat.ns_pre->empty())
37  ns_pre += '*';
38 
39  if (f->first != "" && ((f->first) != " "))
40  {
41  ns_pre.append(f->first);
42  ns_pre += '^';
43  }
44  if (f->second != "")
45  {
46  ns_pre.append(f->second);
47  dat.ns_pre->push_back(ns_pre);
48  }
49 }
std::vector< std::string > * ns_pre
float f
Definition: cache.cc:40

◆ audit_regressor_lda()

void audit_regressor_lda ( audit_regressor_data rd,
LEARNER::single_learner ,
example ec 
)

Definition at line 74 of file audit_regressor.cc.

References audit_regressor_data::all, v_array< T >::begin(), io_buf::bin_write_fixed(), v_array< T >::end(), example_predict::feature_space, example_predict::indices, features::indicies, vw::lda, audit_regressor_data::out_file, vw::parse_mask, features::size(), features::space_names, parameters::stride_shift(), and vw::weights.

Referenced by audit_regressor().

75 {
76  vw& all = *rd.all;
77 
78  std::ostringstream tempstream;
79  parameters& weights = rd.all->weights;
80  for (unsigned char* i = ec.indices.begin(); i != ec.indices.end(); i++)
81  {
82  features& fs = ec.feature_space[*i];
83  for (size_t j = 0; j < fs.size(); ++j)
84  {
85  tempstream << '\t' << fs.space_names[j].get()->first << '^' << fs.space_names[j].get()->second << ':'
86  << ((fs.indicies[j] >> weights.stride_shift()) & all.parse_mask);
87  for (size_t k = 0; k < all.lda; k++)
88  {
89  weight& w = weights[(fs.indicies[j] + k)];
90  tempstream << ':' << w;
91  w = 0.;
92  }
93  tempstream << std::endl;
94  }
95  }
96 
97  rd.out_file->bin_write_fixed(tempstream.str().c_str(), (uint32_t)tempstream.str().size());
98 }
v_array< namespace_index > indices
parameters weights
Definition: global_data.h:537
v_array< feature_index > indicies
the core definition of a set of features.
T *& begin()
Definition: v_array.h:42
uint32_t lda
Definition: global_data.h:508
std::array< features, NUM_NAMESPACES > feature_space
size_t size() const
T *& end()
Definition: v_array.h:43
float weight
v_array< audit_strings_ptr > space_names
uint64_t parse_mask
Definition: global_data.h:453
size_t bin_write_fixed(const char *data, size_t len)
Definition: io_buf.h:252
uint32_t stride_shift()

◆ audit_regressor_setup()

LEARNER::base_learner* audit_regressor_setup ( options_i options,
vw all 
)

Definition at line 246 of file audit_regressor.cc.

References VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), LEARNER::as_singleline(), vw::audit, audit_regressor(), end_examples(), VW::finish(), VW::finish_example(), init_driver(), LEARNER::init_learner(), VW::config::make_option(), vw::numpasses, LEARNER::learner< T, E >::set_end_examples(), LEARNER::learner< T, E >::set_finish(), LEARNER::learner< T, E >::set_finish_example(), LEARNER::learner< T, E >::set_init_driver(), setup_base(), vw::stdin_off, THROW, VW::config::options_i::was_supplied(), and io_buf::WRITE.

Referenced by parse_reductions().

247 {
248  std::string out_file;
249 
250  option_group_definition new_options("Audit Regressor");
251  new_options.add(make_option("audit_regressor", out_file)
252  .keep()
253  .help("stores feature names and their regressor values. Same dataset must be used for both "
254  "regressor training and this mode."));
255  options.add_and_parse(new_options);
256 
257  if (!options.was_supplied("audit_regressor"))
258  return nullptr;
259 
260  if (out_file.empty())
261  THROW("audit_regressor argument (output filename) is missing.");
262 
263  if (all.numpasses > 1)
264  THROW("audit_regressor can't be used with --passes > 1.");
265 
266  all.audit = true;
267 
268  auto dat = scoped_calloc_or_throw<audit_regressor_data>();
269  dat->all = &all;
270  dat->ns_pre = new std::vector<std::string>(); // explicitly invoking std::vector's constructor
271  dat->out_file = new io_buf();
272  dat->out_file->open_file(out_file.c_str(), all.stdin_off, io_buf::WRITE);
273 
278  ret.set_finish(finish);
280 
281  return LEARNER::make_base<audit_regressor_data>(ret);
282 }
void set_init_driver(void(*f)(T &))
Definition: learner.h:299
void finish_example(vw &all, audit_regressor_data &dd, example &ec)
static constexpr int WRITE
Definition: io_buf.h:72
virtual void add_and_parse(const option_group_definition &group)=0
single_learner * as_singleline(learner< T, E > *l)
Definition: learner.h:476
void set_finish_example(void(*f)(vw &all, T &, E &))
Definition: learner.h:307
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
virtual bool was_supplied(const std::string &key)=0
Definition: io_buf.h:54
void init_driver(audit_regressor_data &dat)
size_t numpasses
Definition: global_data.h:451
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
void set_finish(void(*f)(T &))
Definition: learner.h:265
void audit_regressor(audit_regressor_data &rd, LEARNER::single_learner &base, example &ec)
void end_examples(audit_regressor_data &d)
void finish(audit_regressor_data &dat)
bool audit
Definition: global_data.h:486
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
bool stdin_off
Definition: global_data.h:527
#define THROW(args)
Definition: vw_exception.h:181
void set_end_examples(void(*f)(T &))
Definition: learner.h:295

◆ end_examples()

void end_examples ( audit_regressor_data d)

Definition at line 147 of file audit_regressor.cc.

References io_buf::close_file(), io_buf::flush(), audit_regressor_data::ns_pre, and audit_regressor_data::out_file.

Referenced by audit_regressor_setup().

148 {
149  d.out_file->flush(); // close_file() should do this for me ...
150  d.out_file->close_file();
151  delete (d.out_file);
152  d.out_file = NULL;
153  delete d.ns_pre;
154  d.ns_pre = NULL;
155 }
virtual bool close_file()
Definition: io_buf.h:204
std::vector< std::string > * ns_pre
virtual void flush()
Definition: io_buf.h:194

◆ finish()

void finish ( audit_regressor_data dat)

Definition at line 185 of file audit_regressor.cc.

References audit_regressor_data::all, audit_regressor_data::loaded_regressor_values, vw::trace_message, and audit_regressor_data::values_audited.

186 {
188  dat.all->trace_message << "Note: for some reason audit couldn't find all regressor values in dataset ("
189  << dat.values_audited << " of " << dat.loaded_regressor_values << " found)." << std::endl;
190 }
vw_ostream trace_message
Definition: global_data.h:424

◆ finish_example()

void finish_example ( vw all,
audit_regressor_data dd,
example ec 
)

Definition at line 163 of file audit_regressor.cc.

References shared_data::dump_interval, example::example_counter, VW::finish_example(), audit_regressor_data::loaded_regressor_values, print_ex(), vw::progress_add, vw::progress_arg, vw::quiet, vw::sd, set_done(), shared_data::update_dump_interval(), audit_regressor_data::values_audited, and shared_data::weighted_unlabeled_examples.

164 {
165  bool printed = false;
166  if (ec.example_counter + 1 >= all.sd->dump_interval && !all.quiet)
167  {
169  all.sd->weighted_unlabeled_examples = (double)(ec.example_counter + 1); // used in update_dump_interval
171  printed = true;
172  }
173 
175  {
176  // all regressor values were audited
177  if (!printed)
178  print_ex(all, ec.example_counter + 1, dd.values_audited, 100);
179  set_done(all);
180  }
181 
182  VW::finish_example(all, ec);
183 }
void print_ex(vw &all, size_t ex_processed, size_t vals_found, size_t progress)
size_t example_counter
Definition: example.h:64
void set_done(vw &all)
Definition: parser.cc:578
double weighted_unlabeled_examples
Definition: global_data.h:143
bool quiet
Definition: global_data.h:487
bool progress_add
Definition: global_data.h:545
shared_data * sd
Definition: global_data.h:375
float progress_arg
Definition: global_data.h:546
void finish_example(vw &, example &)
Definition: parser.cc:881
float dump_interval
Definition: global_data.h:147
void update_dump_interval(bool progress_add, float progress_arg)
Definition: global_data.h:215

◆ init_driver()

void init_driver ( audit_regressor_data dat)

Definition at line 200 of file audit_regressor.cc.

References audit_regressor_data::all, shared_data::col_current_label, shared_data::col_example_counter, shared_data::col_example_weight, parameters::dense_weights, shared_data::dump_interval, shared_data::example_number, VW::config::options_i::get_typed_option(), audit_regressor_data::increment, LEARNER::learner< T, E >::increment, vw::l, audit_regressor_data::loaded_regressor_values, vw::options, vw::quiet, regressor_values(), vw::sd, parameters::sparse, parameters::sparse_weights, THROW, audit_regressor_data::total_class_cnt, vw::trace_message, VW::config::options_i::was_supplied(), LEARNER::learner< T, E >::weights, and vw::weights.

Referenced by audit_regressor_setup().

201 {
202  // checks a few settings that might be applied after audit_regressor_setup() is called
203  if ((dat.all->options->was_supplied("cache_file") || dat.all->options->was_supplied("cache")) &&
204  !dat.all->options->was_supplied("kill_cache"))
205  {
206  THROW("audit_regressor is incompatible with a cache file. Use it in single pass mode only.");
207  }
208 
209  dat.all->sd->dump_interval = 1.; // regressor could initialize these if saved with --save_resume
210  dat.all->sd->example_number = 0;
211 
212  dat.increment = dat.all->l->increment / dat.all->l->weights;
213  dat.total_class_cnt = dat.all->l->weights;
214 
215  if (dat.all->options->was_supplied("csoaa"))
216  {
217  size_t n = dat.all->options->get_typed_option<uint32_t>("csoaa").value();
218  if (n != dat.total_class_cnt)
219  {
220  dat.total_class_cnt = n;
221  dat.increment = dat.all->l->increment / n;
222  }
223  }
224 
225  // count non-null feature values in regressor
226  if (dat.all->weights.sparse)
228  else
230 
231  if (dat.loaded_regressor_values == 0)
232  THROW("regressor has no non-zero weights. Nothing to audit.");
233 
234  if (!dat.all->quiet)
235  {
236  dat.all->trace_message << "Regressor contains " << dat.loaded_regressor_values << " values\n";
237  dat.all->trace_message << std::left << std::setw(shared_data::col_example_counter) << "example"
238  << " " << std::setw(shared_data::col_example_weight) << "values"
239  << " " << std::setw(shared_data::col_current_label) << "total" << std::endl;
240  dat.all->trace_message << std::left << std::setw(shared_data::col_example_counter) << "counter"
241  << " " << std::setw(shared_data::col_example_weight) << "audited"
242  << " " << std::setw(shared_data::col_current_label) << "progress" << std::endl;
243  }
244 }
parameters weights
Definition: global_data.h:537
VW::config::options_i * options
Definition: global_data.h:428
static constexpr int col_current_label
Definition: global_data.h:182
bool quiet
Definition: global_data.h:487
void regressor_values(audit_regressor_data &dat, T &w)
static constexpr int col_example_weight
Definition: global_data.h:180
shared_data * sd
Definition: global_data.h:375
typed_option< T > & get_typed_option(const std::string &key)
Definition: options.h:120
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
static constexpr int col_example_counter
Definition: global_data.h:179
dense_parameters dense_weights
uint64_t example_number
Definition: global_data.h:137
size_t increment
Definition: learner.h:153
sparse_parameters sparse_weights
LEARNER::base_learner * l
Definition: global_data.h:383
size_t weights
Definition: learner.h:152
float dump_interval
Definition: global_data.h:147
#define THROW(args)
Definition: vw_exception.h:181

◆ print_ex()

void print_ex ( vw all,
size_t  ex_processed,
size_t  vals_found,
size_t  progress 
)
inline

Definition at line 157 of file audit_regressor.cc.

References shared_data::col_example_counter, and vw::trace_message.

Referenced by finish_example().

158 {
159  all.trace_message << std::left << std::setw(shared_data::col_example_counter) << ex_processed << " " << std::right
160  << std::setw(9) << vals_found << " " << std::right << std::setw(12) << progress << '%' << std::endl;
161 }
vw_ostream trace_message
Definition: global_data.h:424
static constexpr int col_example_counter
Definition: global_data.h:179

◆ regressor_values()

template<class T >
void regressor_values ( audit_regressor_data dat,
T &  w 
)

Definition at line 193 of file audit_regressor.cc.

References audit_regressor_data::loaded_regressor_values.

Referenced by init_driver().

194 {
195  for (typename T::iterator iter = w.begin(); iter != w.end(); ++iter)
196  if (*iter != 0)
198 }