22 #pragma managed(push, off) 74 char* temp = calloc_or_throw<char>(1 + label_list.length());
75 memcpy(temp, label_list.c_str(), strlen(label_list.c_str()));
77 ss.
end = ss.
begin + label_list.length();
80 K = (uint32_t)id2name.size();
83 for (
size_t k = 0; k < K; k++)
87 uint64_t
id = name2id.
get(l, hash);
89 THROW(
"error: label dictionary initialized with multiple occurances of: " << l);
91 substring l_copy = {calloc_or_throw<char>(len),
nullptr};
92 memcpy(l_copy.
begin, l.
begin, len *
sizeof(
char));
94 name2id.
put(l_copy, hash, k + 1);
100 if (id2name.size() > 0)
101 free(id2name[0].begin);
110 uint64_t hash =
uniform_hash((
unsigned char*)s.begin, s.end - s.begin, 378401);
111 uint64_t v = name2id.
get(s, hash);
114 std::cerr <<
"warning: missing named label '";
115 for (
char*
c = s.begin;
c != s.end;
c++) std::cerr << *
c;
116 std::cerr <<
'\'' << std::endl;
123 if ((v == 0) || (v > K))
129 return id2name[v - 1];
175 static constexpr
int col_avg_loss = 8;
176 static constexpr
int prec_avg_loss = 6;
177 static constexpr
int col_since_last = 8;
178 static constexpr
int prec_since_last = 6;
179 static constexpr
int col_example_counter = 12;
180 static constexpr
int col_example_weight = col_example_counter + 2;
181 static constexpr
int prec_example_weight = 1;
182 static constexpr
int col_current_label = 8;
183 static constexpr
int prec_current_label = 4;
184 static constexpr
int col_current_predict = 8;
185 static constexpr
int prec_current_predict = 4;
186 static constexpr
int col_current_features = 8;
193 if (test_example && labeled_example)
195 weighted_holdout_examples +=
weight;
196 weighted_holdout_examples_since_last_dump +=
weight;
197 weighted_holdout_examples_since_last_pass +=
weight;
198 holdout_sum_loss +=
loss;
199 holdout_sum_loss_since_last_dump +=
loss;
200 holdout_sum_loss_since_last_pass +=
loss;
205 weighted_labeled_examples +=
weight;
207 weighted_unlabeled_examples +=
weight;
209 sum_loss_since_last_dump +=
loss;
210 total_features += num_features;
217 sum_loss_since_last_dump = 0.0;
218 old_weighted_labeled_examples = weighted_labeled_examples;
220 dump_interval = (float)weighted_examples() + progress_arg;
222 dump_interval = (float)weighted_examples() * progress_arg;
225 void print_update(
bool holdout_set_off,
size_t current_pass,
float label,
float prediction,
size_t num_features,
226 bool progress_add,
float progress_arg)
228 std::ostringstream label_buf, pred_buf;
230 label_buf << std::setw(col_current_label) << std::setfill(
' ');
232 label_buf << std::setprecision(prec_current_label) << std::fixed << std::right << label;
234 label_buf << std::left <<
" unknown";
236 pred_buf << std::setw(col_current_predict) << std::setprecision(prec_current_predict) << std::fixed << std::right
237 << std::setfill(
' ') << prediction;
240 holdout_set_off, current_pass, label_buf.str(), pred_buf.str(), num_features, progress_add, progress_arg);
243 void print_update(
bool holdout_set_off,
size_t current_pass, uint32_t label, uint32_t prediction,
size_t num_features,
244 bool progress_add,
float progress_arg)
246 std::ostringstream label_buf, pred_buf;
248 label_buf << std::setw(col_current_label) << std::setfill(
' ');
250 label_buf << std::right << label;
252 label_buf << std::left <<
" unknown";
254 pred_buf << std::setw(col_current_predict) << std::right << std::setfill(
' ') << prediction;
257 holdout_set_off, current_pass, label_buf.str(), pred_buf.str(), num_features, progress_add, progress_arg);
260 void print_update(
bool holdout_set_off,
size_t current_pass,
const std::string& label, uint32_t prediction,
261 size_t num_features,
bool progress_add,
float progress_arg)
263 std::ostringstream pred_buf;
265 pred_buf << std::setw(col_current_predict) << std::right << std::setfill(
' ') << prediction;
267 print_update(holdout_set_off, current_pass, label, pred_buf.str(), num_features, progress_add, progress_arg);
270 void print_update(
bool holdout_set_off,
size_t current_pass,
const std::string& label,
const std::string& prediction,
271 size_t num_features,
bool progress_add,
float progress_arg)
273 std::streamsize saved_w = std::cerr.width();
274 std::streamsize saved_prec = std::cerr.precision();
275 std::ostream::fmtflags saved_f = std::cerr.flags();
276 bool holding_out =
false;
278 if (!holdout_set_off && current_pass >= 1)
280 if (holdout_sum_loss == 0. && weighted_holdout_examples == 0.)
281 std::cerr << std::setw(col_avg_loss) << std::left <<
" unknown";
283 std::cerr << std::setw(col_avg_loss) << std::setprecision(prec_avg_loss) << std::fixed << std::right
284 << (holdout_sum_loss / weighted_holdout_examples);
288 if (holdout_sum_loss_since_last_dump == 0. && weighted_holdout_examples_since_last_dump == 0.)
289 std::cerr << std::setw(col_since_last) << std::left <<
" unknown";
291 std::cerr << std::setw(col_since_last) << std::setprecision(prec_since_last) << std::fixed << std::right
292 << (holdout_sum_loss_since_last_dump / weighted_holdout_examples_since_last_dump);
294 weighted_holdout_examples_since_last_dump = 0;
295 holdout_sum_loss_since_last_dump = 0.0;
301 std::cerr << std::setw(col_avg_loss) << std::setprecision(prec_avg_loss) << std::right << std::fixed;
302 if (weighted_labeled_examples > 0.)
303 std::cerr << (sum_loss / weighted_labeled_examples);
306 std::cerr <<
" " << std::setw(col_since_last) << std::setprecision(prec_avg_loss) << std::right << std::fixed;
307 if (weighted_labeled_examples == old_weighted_labeled_examples)
310 std::cerr << (sum_loss_since_last_dump / (weighted_labeled_examples - old_weighted_labeled_examples));
312 std::cerr <<
" " << std::setw(col_example_counter) << std::right << example_number <<
" " 313 << std::setw(col_example_weight) << std::setprecision(prec_example_weight) << std::right
314 << weighted_examples() <<
" " << std::setw(col_current_label) << std::right << label <<
" " 315 << std::setw(col_current_predict) << std::right << prediction <<
" " << std::setw(col_current_features)
316 << std::right << num_features;
321 std::cerr << std::endl;
324 std::cerr.width(saved_w);
325 std::cerr.precision(saved_prec);
326 std::cerr.setf(saved_f);
328 update_dump_interval(progress_add, progress_arg);
372 std::shared_ptr<rand_state> _random_state_sp = std::make_shared<rand_state>();
421 bool vw_is_main =
false;
427 bool should_delete_options =
false;
468 std::array<unsigned char, NUM_NAMESPACES>
redefine;
471 std::array<uint32_t, NUM_NAMESPACES>
ngram;
472 std::array<uint32_t, NUM_NAMESPACES>
skips;
474 std::array<uint32_t, NUM_NAMESPACES>
limit;
475 std::array<uint64_t, NUM_NAMESPACES>
482 namespace_dictionaries{};
485 void (*delete_prediction)(
void*);
513 size_t length() {
return ((
size_t)1) << num_bits; };
515 std::stack<LEARNER::base_learner* (*)(VW::config::options_i&, vw&)>
reduction_stack;
555 vw(
const vw&) =
delete;
556 vw& operator=(
const vw&) =
delete;
560 vw(
const vw&&) =
delete;
561 vw& operator=(
const vw&&) =
delete;
569 std::vector<std::string> grams, std::array<uint32_t, NUM_NAMESPACES>& dest,
char* descriptor,
bool quiet);
570 void compile_limits(std::vector<std::string> limits, std::array<uint32_t, NUM_NAMESPACES>& dest,
bool quiet);
bool report_multiclass_log_loss
std::vector< std::string > skip_strings
void print_update(bool holdout_set_off, size_t current_pass, const std::string &label, uint32_t prediction, size_t num_features, bool progress_add, float progress_arg)
std::array< uint32_t, NUM_NAMESPACES > skips
std::array< bool, NUM_NAMESPACES > spelling_features
float merand48_noadvance(uint64_t v)
constexpr bool test_example(example &ec) noexcept
LEARNER::base_learner * cost_sensitive
std::vector< std::string > pairs
std::array< uint32_t, NUM_NAMESPACES > ngram
void finish_example(vw &all, audit_regressor_data &dd, example &ec)
double holdout_sum_loss_since_last_pass
VW::config::options_i * options
double weighted_unlabeled_examples
void get_prediction(int sock, float &res, float &weight)
std::vector< std::string > ngram_strings
bool is_more_than_two_labels_observed
bool random_positive_weights
label_type::label_type_t label_type
std::string inv_hash_regressor_name
v_array< int > final_prediction_sink
double holdout_multiclass_log_loss
v_hashmap< substring, uint64_t > name2id
std::vector< dictionary_info > loaded_dictionaries
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
std::vector< std::string > limit_strings
std::array< uint64_t, NUM_NAMESPACES > affix_features
float merand48(uint64_t &initial)
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
size_t check_holdout_every_n_passes
std::array< bool, NUM_NAMESPACES > ignore
double sum_loss_since_last_dump
std::shared_ptr< rand_state > get_random_state()
std::array< uint32_t, NUM_NAMESPACES > limit
double multiclass_log_loss
void print_update(vw &all, bool is_test, example &ec, multi_ex *ec_seq, bool action_scores)
int print_tag(std::stringstream &ss, v_array< char > tag)
float first_observed_label
namedlabels(std::string label_list)
constexpr uint64_t get_current_state() const noexcept
VW::version_struct model_file_ver
std::array< bool, NUM_NAMESPACES > ignore_linear
void deleter(substring ss, uint64_t)
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
double old_weighted_labeled_examples
constexpr size_t NUM_NAMESPACES
double weighted_holdout_examples
double weighted_holdout_examples_since_last_pass
std::string per_feature_regularizer_output
AllReduceType all_reduce_type
rand_state(uint64_t initial)
float get_and_update_random()
std::array< unsigned char, NUM_NAMESPACES > redefine
std::string per_feature_regularizer_text
std::vector< std::string > triples
LEARNER::single_learner * scorer
void print_update(bool holdout_set_off, size_t current_pass, uint32_t label, uint32_t prediction, size_t num_features, bool progress_add, float progress_arg)
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
std::vector< example * > multi_ex
std::vector< std::string > initial_regressors
std::vector< std::string > dictionary_path
void print_update(bool holdout_set_off, size_t current_pass, const std::string &label, const std::string &prediction, size_t num_features, bool progress_add, float progress_arg)
void init(size_t min_size, const V &def, bool(*eq)(const K &, const K &))
v_hashmap< substring, features * > feature_dict
void noop_mm(shared_data *, float label)
std::vector< std::string > interactions
bool preserve_performance_counters
LEARNER::base_learner * l
bool substring_equal(const substring &a, const substring &b)
void compile_gram(std::vector< std::string > grams, std::array< uint32_t, NUM_NAMESPACES > &dest, char *descriptor, bool quiet)
double weighted_labeled_examples
std::string per_feature_regularizer_input
double weighted_holdout_examples_since_last_dump
float second_observed_label
void predict(bfgs &b, base_learner &, example &ec)
void binary_print_result(int f, float res, float weight, v_array< char > tag)
double holdout_sum_loss_since_last_dump
void set_random_state(uint64_t initial) noexcept
std::string final_regressor_name
std::vector< substring > id2name
void print_result(int f, float res, float weight, v_array< char > tag)
void learn(bfgs &b, base_learner &base, example &ec)
std::map< std::string, size_t > name_index_map
double weighted_examples()
void put(const K &key, uint64_t hash, const V &val)
V & get(const K &key, uint64_t hash)
void iter(void(*func)(K, V))
std::string data_filename
std::stack< LEARNER::base_learner *(*)(VW::config::options_i &, vw &)> reduction_stack
std::string text_regressor_name
void update_dump_interval(bool progress_add, float progress_arg)
double normalized_sum_norm_x
void compile_limits(std::vector< std::string > limits, std::array< uint32_t, NUM_NAMESPACES > &dest, bool quiet)