Vowpal Wabbit
Classes | Functions | Variables
COST_SENSITIVE Namespace Reference

Classes

struct  label
 
struct  wclass
 

Functions

void name_value (substring &s, v_array< substring > &name, float &v)
 
char * bufread_label (label *ld, char *c, io_buf &cache)
 
size_t read_cached_label (shared_data *, void *v, io_buf &cache)
 
float weight (void *)
 
char * bufcache_label (label *ld, char *c)
 
void cache_label (void *v, io_buf &cache)
 
void default_label (void *v)
 
bool test_label (void *v)
 
void delete_label (void *v)
 
void copy_label (void *dst, void *src)
 
void parse_label (parser *p, shared_data *sd, void *v, v_array< substring > &words)
 
void print_update (vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores, uint32_t prediction)
 
void output_example (vw &all, example &ec)
 
void finish_example (vw &all, example &ec)
 
bool example_is_test (example &ec)
 
bool ec_is_example_header (example const &ec)
 
template<class T >
void finish_example (vw &all, T &, example &ec)
 

Variables

label_parser cs_label
 

Function Documentation

◆ bufcache_label()

char* COST_SENSITIVE::bufcache_label ( label ld,
char *  c 
)

Definition at line 67 of file cost_sensitive.cc.

References c, and COST_SENSITIVE::label::costs.

Referenced by cache_label().

68 {
69  *(size_t*)c = ld->costs.size();
70  c += sizeof(size_t);
71  for (unsigned int i = 0; i < ld->costs.size(); i++)
72  {
73  *(wclass*)c = ld->costs[i];
74  c += sizeof(wclass);
75  }
76  return c;
77 }
constexpr uint64_t c
Definition: rand48.cc:12

◆ bufread_label()

char* COST_SENSITIVE::bufread_label ( label ld,
char *  c,
io_buf cache 
)

Definition at line 31 of file cost_sensitive.cc.

References io_buf::buf_read(), c, and COST_SENSITIVE::label::costs.

Referenced by read_cached_label().

32 {
33  size_t num = *(size_t*)c;
34  ld->costs.clear();
35  c += sizeof(size_t);
36  size_t total = sizeof(wclass) * num;
37  if (cache.buf_read(c, (int)total) < total)
38  {
39  std::cout << "error in demarshal of cost data" << std::endl;
40  return c;
41  }
42  for (size_t i = 0; i < num; i++)
43  {
44  wclass temp = *(wclass*)c;
45  c += sizeof(wclass);
46  ld->costs.push_back(temp);
47  }
48 
49  return c;
50 }
constexpr uint64_t c
Definition: rand48.cc:12
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ cache_label()

void COST_SENSITIVE::cache_label ( void *  v,
io_buf cache 
)

Definition at line 79 of file cost_sensitive.cc.

References io_buf::buf_write(), bufcache_label(), c, and COST_SENSITIVE::label::costs.

80 {
81  char* c;
82  label* ld = (label*)v;
83  cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size());
84  bufcache_label(ld, c);
85 }
char * bufcache_label(label *ld, char *c)
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
constexpr uint64_t c
Definition: rand48.cc:12

◆ copy_label()

void COST_SENSITIVE::copy_label ( void *  dst,
void *  src 
)

Definition at line 111 of file cost_sensitive.cc.

References copy_array(), and COST_SENSITIVE::label::costs.

112 {
113  if (dst && src)
114  {
115  label* ldD = (label*)dst;
116  label* ldS = (label*)src;
117  copy_array(ldD->costs, ldS->costs);
118  }
119 }
void copy_array(v_array< T > &dst, const v_array< T > &src)
Definition: v_array.h:185

◆ default_label()

void COST_SENSITIVE::default_label ( void *  v)

Definition at line 87 of file cost_sensitive.cc.

References COST_SENSITIVE::label::costs.

88 {
89  label* ld = (label*)v;
90  ld->costs.clear();
91 }

◆ delete_label()

void COST_SENSITIVE::delete_label ( void *  v)

Definition at line 104 of file cost_sensitive.cc.

References COST_SENSITIVE::label::costs.

105 {
106  label* ld = (label*)v;
107  if (ld)
108  ld->costs.delete_v();
109 }

◆ ec_is_example_header()

bool COST_SENSITIVE::ec_is_example_header ( example const &  ec)

Definition at line 308 of file cost_sensitive.cc.

References COST_SENSITIVE::label::costs, polylabel::cs, example::l, and v_array< T >::size().

Referenced by finish_example(), Search::generate_training_example(), output_example(), Search::search::predictLDF(), Search::search_predict(), and Search::single_prediction_LDF().

309 {
310  v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
311  if (costs.size() != 1)
312  return false;
313  if (costs[0].class_index != 0)
314  return false;
315  if (costs[0].x != -FLT_MAX)
316  return false;
317  return true;
318 }
size_t size() const
Definition: v_array.h:68

◆ example_is_test()

bool COST_SENSITIVE::example_is_test ( example ec)

Definition at line 297 of file cost_sensitive.cc.

References COST_SENSITIVE::label::costs, polylabel::cs, example::l, and v_array< T >::size().

298 {
300  if (costs.size() == 0)
301  return true;
302  for (size_t j = 0; j < costs.size(); j++)
303  if (costs[j].x != FLT_MAX)
304  return false;
305  return true;
306 }
size_t size() const
Definition: v_array.h:68
COST_SENSITIVE::label cs
Definition: example.h:30
polylabel l
Definition: example.h:57
v_array< wclass > costs

◆ finish_example() [1/2]

template<class T >
void COST_SENSITIVE::finish_example ( vw all,
T &  ,
example ec 
)

Definition at line 37 of file cost_sensitive.h.

References cs_label, ec_is_example_header(), finish_example(), MULTILABEL::multilabel, and print_update().

38 {
39  finish_example(all, ec);
40 }
void finish_example(vw &all, T &, example &ec)

◆ finish_example() [2/2]

void COST_SENSITIVE::finish_example ( vw all,
example ec 
)

Definition at line 291 of file cost_sensitive.cc.

References VW::finish_example(), and output_example().

Referenced by finish_example(), CSOAA::finish_example(), and LEARNER::init_cost_sensitive_learner().

292 {
293  output_example(all, ec);
294  VW::finish_example(all, ec);
295 }
void finish_example(vw &, example &)
Definition: parser.cc:881
void output_example(vw &all, example &ec)

◆ name_value()

void COST_SENSITIVE::name_value ( substring s,
v_array< substring > &  name,
float &  v 
)

Definition at line 9 of file cost_sensitive.cc.

References substring::begin, substring::end, float_of_substring(), v_array< T >::size(), THROW, and tokenize().

Referenced by parse_label().

10 {
11  tokenize(':', s, name);
12 
13  switch (name.size())
14  {
15  case 0:
16  case 1:
17  v = 1.;
18  break;
19  case 2:
20  v = float_of_substring(name[1]);
21  if (std::isnan(v))
22  THROW("error NaN value for: " << name[0]);
23  break;
24  default:
25  std::cerr << "example with a wierd name. What is '";
26  std::cerr.write(s.begin, s.end - s.begin);
27  std::cerr << "'?\n";
28  }
29 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
size_t size() const
Definition: v_array.h:68
float float_of_substring(substring s)
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
#define THROW(args)
Definition: vw_exception.h:181

◆ output_example()

void COST_SENSITIVE::output_example ( vw all,
example ec 
)

Definition at line 236 of file cost_sensitive.cc.

References substring::begin, COST_SENSITIVE::wclass::class_index, COST_SENSITIVE::label::costs, polylabel::cs, substring::end, vw::final_prediction_sink, namedlabels::get(), example::l, shared_data::ldict, loss(), polyprediction::multiclass, example::num_features, COST_SENSITIVE::wclass::partial_prediction, example::pred, vw::print, vw::print_text, print_update(), vw::raw_prediction, vw::sd, example::tag, test_label(), example::test_only, shared_data::update(), and example::weight.

Referenced by finish_example().

237 {
238  label& ld = ec.l.cs;
239 
240  float loss = 0.;
241  if (!test_label(&ld))
242  {
243  // need to compute exact loss
244  size_t pred = (size_t)ec.pred.multiclass;
245 
246  float chosen_loss = FLT_MAX;
247  float min = FLT_MAX;
248  for (auto& cl : ld.costs)
249  {
250  if (cl.class_index == pred)
251  chosen_loss = cl.x;
252  if (cl.x < min)
253  min = cl.x;
254  }
255  if (chosen_loss == FLT_MAX)
256  std::cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?"
257  << std::endl;
258 
259  loss = (chosen_loss - min) * ec.weight;
260  // TODO(alberto): add option somewhere to allow using absolute loss instead?
261  // loss = chosen_loss;
262  }
263 
264  all.sd->update(ec.test_only, !test_label(&ld), loss, ec.weight, ec.num_features);
265 
266  for (int sink : all.final_prediction_sink)
267  if (!all.sd->ldict)
268  all.print(sink, (float)ec.pred.multiclass, 0, ec.tag);
269  else
270  {
271  substring ss_pred = all.sd->ldict->get(ec.pred.multiclass);
272  all.print_text(sink, std::string(ss_pred.begin, ss_pred.end - ss_pred.begin), ec.tag);
273  }
274 
275  if (all.raw_prediction > 0)
276  {
277  std::stringstream outputStringStream;
278  for (unsigned int i = 0; i < ld.costs.size(); i++)
279  {
280  wclass cl = ld.costs[i];
281  if (i > 0)
282  outputStringStream << ' ';
283  outputStringStream << cl.class_index << ':' << cl.partial_prediction;
284  }
285  all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
286  }
287 
288  print_update(all, test_label(&ec.l.cs), ec, nullptr, false, ec.pred.multiclass);
289 }
v_array< char > tag
Definition: example.h:63
int raw_prediction
Definition: global_data.h:519
uint32_t multiclass
Definition: example.h:49
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
v_array< int > final_prediction_sink
Definition: global_data.h:518
namedlabels * ldict
Definition: global_data.h:153
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
Definition: cbify.cc:60
COST_SENSITIVE::label cs
Definition: example.h:30
shared_data * sd
Definition: global_data.h:375
size_t num_features
Definition: example.h:67
void(* print_text)(int, std::string, v_array< char >)
Definition: global_data.h:522
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190
polylabel l
Definition: example.h:57
uint64_t get(substring &s)
Definition: global_data.h:108
bool test_label(void *v)
Definition: simple_label.cc:70
polyprediction pred
Definition: example.h:60
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores, uint32_t prediction)
float weight
Definition: example.h:62
void(* print)(int, float, float, v_array< char >)
Definition: global_data.h:521
bool test_only
Definition: example.h:76

◆ parse_label()

void COST_SENSITIVE::parse_label ( parser p,
shared_data sd,
void *  v,
v_array< substring > &  words 
)

Definition at line 121 of file cost_sensitive.cc.

References v_array< T >::begin(), COST_SENSITIVE::wclass::class_index, COST_SENSITIVE::label::costs, f, float_of_substring(), namedlabels::get(), hashstring(), shared_data::ldict, name_value(), parser::parse_name, v_array< T >::size(), substring_equal(), THROW, and COST_SENSITIVE::wclass::x.

122 {
123  label* ld = (label*)v;
124  ld->costs.clear();
125 
126  // handle shared and label first
127  if (words.size() == 1)
128  {
129  float fx;
130  name_value(words[0], p->parse_name, fx);
131  bool eq_shared = substring_equal(p->parse_name[0], "***shared***");
132  bool eq_label = substring_equal(p->parse_name[0], "***label***");
133  if (!sd->ldict)
134  {
135  eq_shared |= substring_equal(p->parse_name[0], "shared");
136  eq_label |= substring_equal(p->parse_name[0], "label");
137  }
138  if (eq_shared || eq_label)
139  {
140  if (eq_shared)
141  {
142  if (p->parse_name.size() != 1)
143  std::cerr << "shared feature vectors should not have costs on: " << words[0] << std::endl;
144  else
145  {
146  wclass f = {-FLT_MAX, 0, 0., 0.};
147  ld->costs.push_back(f);
148  }
149  }
150  if (eq_label)
151  {
152  if (p->parse_name.size() != 2)
153  std::cerr << "label feature vectors should have exactly one cost on: " << words[0] << std::endl;
154  else
155  {
156  wclass f = {float_of_substring(p->parse_name[1]), 0, 0., 0.};
157  ld->costs.push_back(f);
158  }
159  }
160  return;
161  }
162  }
163 
164  // otherwise this is a "real" example
165  for (unsigned int i = 0; i < words.size(); i++)
166  {
167  wclass f = {0., 0, 0., 0.};
168  name_value(words[i], p->parse_name, f.x);
169 
170  if (p->parse_name.size() == 0)
171  THROW(" invalid cost: specification -- no names on: " << words[i]);
172 
173  if (p->parse_name.size() == 1 || p->parse_name.size() == 2 || p->parse_name.size() == 3)
174  {
175  f.class_index =
176  sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0);
177  if (p->parse_name.size() == 1 && f.x >= 0) // test examples are specified just by un-valued class #s
178  f.x = FLT_MAX;
179  }
180  else
181  THROW("malformed cost specification on '" << (p->parse_name[0].begin) << "'");
182 
183  ld->costs.push_back(f);
184  }
185 }
namedlabels * ldict
Definition: global_data.h:153
v_array< substring > parse_name
Definition: parser.h:100
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
float float_of_substring(substring s)
void name_value(substring &s, v_array< substring > &name, float &v)
VW_STD14_CONSTEXPR uint64_t hashstring(substring s, uint64_t h)
Definition: hashstring.h:18
bool substring_equal(const substring &a, const substring &b)
uint64_t get(substring &s)
Definition: global_data.h:108
#define THROW(args)
Definition: vw_exception.h:181
float f
Definition: cache.cc:40

◆ print_update()

void COST_SENSITIVE::print_update ( vw all,
bool  is_test,
example ec,
multi_ex ec_seq,
bool  action_scores,
uint32_t  prediction 
)

Definition at line 190 of file cost_sensitive.cc.

References polyprediction::a_s, vw::bfgs, shared_data::col_current_predict, vw::current_pass, shared_data::dump_interval, namedlabels::get(), vw::holdout_set_off, shared_data::ldict, example::num_features, example::pred, shared_data::print_update(), vw::progress_add, vw::progress_arg, vw::quiet, vw::sd, and shared_data::weighted_examples().

Referenced by finish_example(), output_example(), output_example(), CSOAA::output_example(), and CSOAA::output_rank_example().

191 {
192  if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
193  {
194  size_t num_current_features = ec.num_features;
195  // for csoaa_ldf we want features from the whole (multiline example),
196  // not only from one line (the first one) represented by ec
197  if (ec_seq != nullptr)
198  {
199  num_current_features = 0;
200  // TODO: including quadratic and cubic.
201  for (auto& ecc : *ec_seq) num_current_features += ecc->num_features;
202  }
203 
204  std::string label_buf;
205  if (is_test)
206  label_buf = " unknown";
207  else
208  label_buf = " known";
209 
210  if (action_scores || all.sd->ldict)
211  {
212  std::ostringstream pred_buf;
213 
214  pred_buf << std::setw(all.sd->col_current_predict) << std::right << std::setfill(' ');
215  if (all.sd->ldict)
216  {
217  if (action_scores)
218  pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action);
219  else
220  pred_buf << all.sd->ldict->get(prediction);
221  }
222  else
223  pred_buf << ec.pred.a_s[0].action;
224  if (action_scores)
225  pred_buf << ".....";
226  all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features,
227  all.progress_add, all.progress_arg);
228  ;
229  }
230  else
231  all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, num_current_features,
232  all.progress_add, all.progress_arg);
233  }
234 }
ACTION_SCORE::action_scores a_s
Definition: example.h:47
v_array< action_score > action_scores
Definition: action_score.h:10
namedlabels * ldict
Definition: global_data.h:153
bool quiet
Definition: global_data.h:487
bool holdout_set_off
Definition: global_data.h:499
bool progress_add
Definition: global_data.h:545
shared_data * sd
Definition: global_data.h:375
float progress_arg
Definition: global_data.h:546
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:225
bool bfgs
Definition: global_data.h:412
size_t num_features
Definition: example.h:67
uint64_t current_pass
Definition: global_data.h:396
uint64_t get(substring &s)
Definition: global_data.h:108
polyprediction pred
Definition: example.h:60
static constexpr int col_current_predict
Definition: global_data.h:184
double weighted_examples()
Definition: global_data.h:188
float dump_interval
Definition: global_data.h:147

◆ read_cached_label()

size_t COST_SENSITIVE::read_cached_label ( shared_data ,
void *  v,
io_buf cache 
)

Definition at line 52 of file cost_sensitive.cc.

References io_buf::buf_read(), bufread_label(), c, and COST_SENSITIVE::label::costs.

53 {
54  label* ld = (label*)v;
55  ld->costs.clear();
56  char* c;
57  size_t total = sizeof(size_t);
58  if (cache.buf_read(c, (int)total) < total)
59  return 0;
60  bufread_label(ld, c, cache);
61 
62  return total;
63 }
char * bufread_label(label *ld, char *c, io_buf &cache)
constexpr uint64_t c
Definition: rand48.cc:12
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ test_label()

bool COST_SENSITIVE::test_label ( void *  v)

Definition at line 93 of file cost_sensitive.cc.

References COST_SENSITIVE::label::costs.

Referenced by output_example().

94 {
95  label* ld = (label*)v;
96  if (ld->costs.size() == 0)
97  return true;
98  for (unsigned int i = 0; i < ld->costs.size(); i++)
99  if (FLT_MAX != ld->costs[i].x)
100  return false;
101  return true;
102 }

◆ weight()

float COST_SENSITIVE::weight ( void *  )

Definition at line 65 of file cost_sensitive.cc.

65 { return 1.; }

Variable Documentation

◆ cs_label

label_parser COST_SENSITIVE::cs_label
Initial value:
test_label, sizeof(label)}
void cache_label(void *v, io_buf &cache)
size_t read_cached_label(shared_data *, void *v, io_buf &cache)
void copy_label(void *dst, void *src)
void delete_label(void *v)
float weight(void *)
void default_label(void *v)
void parse_label(parser *p, shared_data *sd, void *v, v_array< substring > &words)
bool test_label(void *v)
Definition: simple_label.cc:70

Definition at line 187 of file cost_sensitive.cc.

Referenced by add_to_vali(), cbifyldf_setup(), cs_active_setup(), CSOAA::csldf_setup(), CSOAA::csoaa_setup(), EntityRelationTask::finish(), DepParserTask::finish(), SequenceTask_DemoLDF::finish(), finish_example(), Search::predictor::free_ec(), init_adf_data(), LEARNER::init_cost_sensitive_learner(), DepParserTask::initialize(), GraphTask::initialize(), SequenceTask_DemoLDF::initialize(), output_example(), CSOAA::output_example(), CSOAA::output_rank_example(), Search::search_initialize(), Search::search_predict(), Search::predictor::set_input_at(), Search::setup(), Search::single_prediction_LDF(), CSOAA::test_ldf_sequence(), Search::train_single_example(), CB_ALGS::cb::~cb(), CB_EXPLORE::cb_explore::~cb_explore(), Search::search::~search(), and warm_cb::~warm_cb().