11 #include <sys/types.h> 100 bool ends_with(std::string
const& fullString, std::string
const& ending)
102 if (fullString.length() > ending.length())
104 return (fullString.compare(fullString.length() - ending.length(), ending.length(), ending) == 0);
114 uint64_t v = 5289374183516789128;
115 unsigned char buf[1024];
121 for (ssize_t i = 0; i < n; i++)
133 if (stat(path.c_str(), &info) != 0)
136 return (info.st_mode & S_IFDIR) > 0;
141 std::string
find_in_path(std::vector<std::string> paths, std::string fname)
144 std::string delimiter =
"\\";
146 std::string delimiter =
"/";
148 for (std::string
path : paths)
151 std::ifstream
f(full.c_str());
160 if (str.length() == 0)
166 const char* s = str.c_str();
167 if ((str.length() > 2) && (str[1] ==
':'))
175 THROW(
"error: cannot find dictionary '" << s <<
"' in path; try adding --dictionary_path");
181 THROW(
"error: cannot read dictionary from file '" << fname <<
"'" 182 <<
", opening failed");
188 all.
trace_message <<
"scanned dictionary '" << s <<
"' from '" << fname <<
"', hash=" << std::hex << fd_hash
205 THROW(
"error: cannot re-read dictionary from file '" << fname <<
"'" 206 <<
", opening failed");
213 size_t def = (size_t)
' ';
215 ssize_t size = 2048, pos, nread;
217 char* buffer = calloc_or_throw<char>(size);
224 if ((rc != EOF) && (nread > 0))
229 const auto new_buffer = (
char*)(realloc(buffer, size));
230 if (new_buffer ==
nullptr)
238 THROW(
"error: memory allocation failed in reading dictionary");
243 }
while ((rc != EOF) && (rc !=
'\n') && (nread > 0));
248 while (*c ==
' ' || *c ==
'\t') ++
c;
250 while (*d !=
' ' && *d !=
'\t' && *d !=
'\n' && *d !=
'\0') ++d;
253 if (*d !=
' ' && *d !=
'\t')
255 char* word = calloc_or_throw<char>(d -
c);
256 memcpy(word, c, d - c);
259 if (map->
get(ss, hash) !=
nullptr)
275 map->
put(ss, hash, arr);
280 for (
size_t i = 0; i < 256; i++)
284 }
while ((rc != EOF) && (nread > 0));
292 all.
trace_message <<
"dictionary " << s <<
" contains " << map->
size() <<
" item" << (map->
size() == 1 ?
"" :
"s")
296 dictionary_info info = {calloc_or_throw<char>(strlen(s) + 1), fd_hash, map};
297 strcpy(info.
name, s);
303 if (str.length() == 0)
305 char* cstr = calloc_or_throw<char>(str.length() + 1);
306 strcpy(cstr, str.c_str());
308 char* p = strtok(cstr,
",");
320 else if (q[0] ==
'-')
325 if ((q[0] <
'1') || (q[0] >
'7'))
326 THROW(
"malformed affix argument (length must be 1..7): " << p);
328 uint16_t len = (uint16_t)(q[0] -
'0');
329 uint16_t ns = (uint16_t)
' ';
335 THROW(
"malformed affix argument (invalid namespace): " << p);
338 THROW(
"malformed affix argument (too long): " << p);
341 uint16_t afx = (len << 1) | (prefix & 0x1);
345 p = strtok(
nullptr,
",");
359 bool version_arg =
false;
361 std::string progress_arg;
363 diagnostic_group.
add(
make_option(
"version", version_arg).help(
"Version information"))
367 .help(
"Progress update frequency. int: additive, float: multiplicative"))
368 .
add(
make_option(
"quiet", all.
quiet).help(
"Don't output disgnostics and progress updates"))
369 .
add(
make_option(
"help", help).short_name(
"h").help(
"Look here: http://hunch.net/~vw/ and click on Tutorial."));
388 if (progress_arg.find_first_of(
".") == std::string::npos)
395 <<
" can't be < 1: forcing to 1" << endl;
407 all.
trace_message <<
"warning: multiplicative --progress <float>: " << progress_arg <<
" is <= 1.0: adding 1.0" 413 all.
trace_message <<
"warning: multiplicative --progress <float>" 414 <<
" is > 9.0: you probably meant to use an integer" << endl;
429 .help(
"in persistent daemon mode, do not run in the background"))
430 .
add(
make_option(
"port", parsed_options.
port).help(
"port to listen on; use 0 to pick unused port"))
434 .
add(
make_option(
"cache", parsed_options.
cache).short_name(
"c").help(
"Use a cache. The default is <data>.cache"))
437 .
add(
make_option(
"dsjson", parsed_options.
dsjson).help(
"Enable Decision Service JSON parsing."))
440 .help(
"do not reuse existing cache: create a new one always"))
444 "use gzip format whenever possible. If a cache file is being created, this option creates a " 445 "compressed cache file. A mixture of raw-text & compressed inputs are supported with autodetection."))
456 if (boost_options->try_get_positional_option_token(
"data", data, -1))
473 if (parsed_options.
cache)
485 THROW(
"invert_hash is incompatible with a cache file. Use it in single pass mode only.");
488 (options.
was_supplied(
"output_feature_regularizer_binary") ||
489 options.
was_supplied(
"output_feature_regularizer_text")))
492 all.
trace_message <<
"Making holdout_set_off=true since output regularizer specified" << endl;
495 return parsed_options;
507 return "spelling_features";
510 return "affix_features";
512 if (!std::equal(vw1.
ngram.begin(), vw1.
ngram.end(), vw2.
ngram.begin()))
515 if (!std::equal(vw1.
skips.begin(), vw1.
skips.end(), vw2.
skips.begin()))
518 if (!std::equal(vw1.
limit.begin(), vw1.
limit.end(), vw2.
limit.begin()))
525 return "permutations";
528 return "interactions size";
531 return "ignore_some";
537 return "ignore_some_linear";
541 return "ignore_linear";
544 return "redefine_some";
550 return "add_constant";
553 return "dictionary_path size";
556 return "dictionary_path";
561 return "interaction mismatch";
572 while (pos < (
int)arg.size() - 3)
574 if (arg[pos] ==
'\\' && arg[pos + 1] ==
'x')
576 std::string substr = arg.substr(pos + 2, 2);
578 unsigned char c = (
unsigned char)strtoul(substr.c_str(), &p, 16);
586 cerr <<
"Possibly malformed hex representation of a namespace: '\\x" << substr <<
"'\n";
587 res.push_back(arg[pos++]);
591 res.push_back(arg[pos++]);
594 while (pos < (
int)arg.size())
595 res.push_back(arg[pos++]);
602 std::string hash_function(
"strings");
604 std::vector<std::string> spelling_ns;
605 std::vector<std::string> quadratics;
606 std::vector<std::string> cubics;
607 std::vector<std::string> interactions;
608 std::vector<std::string> ignores;
609 std::vector<std::string> ignore_linears;
610 std::vector<std::string> keeps;
611 std::vector<std::string> redefines;
613 std::vector<std::string> dictionary_path;
616 bool leave_duplicate_interactions;
622 .
add(
make_option(
"hash", hash_function).keep().help(
"how to hash the features. Available options: strings, all"))
624 .
add(
make_option(
"ignore", ignores).keep().help(
"ignore namespaces beginning with character <arg>"))
627 .help(
"ignore namespaces beginning with character <arg> for linear terms only"))
628 .
add(
make_option(
"keep", keeps).keep().help(
"keep namespaces beginning with character <arg>"))
631 .help(
"redefine namespaces beginning with characters of std::string S as namespace N. <arg> shall be in " 633 "'N:=S' where := is operator. Empty N or S are treated as default namespace. Use ':' as a " 636 .
add(
make_option(
"bit_precision", new_bits).short_name(
"b").help(
"number of bits in the feature table"))
637 .
add(
make_option(
"noconstant", noconstant).help(
"Don't add a constant feature"))
640 .help(
"Generate N grams. To generate N grams for a single namespace 'foo', arg should be fN."))
642 .help(
"Generate skips in N grams. This in conjunction with the ngram tag can be used to generate " 643 "generalized n-skip-k-gram. To generate n-skips for a single namespace 'foo', arg should be fN."))
645 .help(
"limit to N features. To apply to a single namespace 'foo', arg should be fN"))
648 .help(
"generate prefixes/suffixes of features; argument '+2a,-3b,+1' means generate 2-char prefixes for " 649 "namespace a, 3-char suffixes for b and 1 char prefixes for default namespace"))
652 .help(
"compute spelling features for a give namespace (use '_' for default namespace)"))
655 .help(
"read a dictionary for additional features (arg either 'x:file' or just 'file')"))
657 .help(
"look in this directory for dictionaries; defaults to current directory or env{PATH}"))
660 .help(
"Create feature interactions of any level between namespaces."))
662 .help(
"Use permutations instead of combinations for feature interactions of same namespace."))
663 .
add(
make_option(
"leave_duplicate_interactions", leave_duplicate_interactions)
664 .help(
"Don't remove interactions with duplicate combinations of namespaces. For ex. this is a " 665 "duplicate: '-q ab -q ba' and a lot more in '-q ::'."))
666 .
add(
make_option(
"quadratic", quadratics).short_name(
"q").keep().help(
"Create and use quadratic features"))
668 .
add(
make_option(
"q:", q_colon).help(
": corresponds to a wildcard for all printable characters"))
669 .
add(
make_option(
"cubic", cubics).keep().help(
"Create and use cubic features"));
677 for (
size_t id = 0;
id < spelling_ns.size();
id++)
680 if (spelling_ns[
id][0] ==
'_')
693 THROW(
"ngram is incompatible with sort_features.");
703 THROW(
"You can not skip unless ngram is > 1");
716 THROW(
"Number of bits is set to " << new_bits <<
" and " << all.
num_bits 717 <<
" by argument and model. That does not work.");
726 std::vector<std::string> expanded_interactions;
733 all.
trace_message <<
"WARNING: model file has set of {-q, --cubic, --interactions} settings stored, but they'll be " 734 "OVERRIDEN by set of {-q, --cubic, --interactions} settings from command line." 738 if (!all.
pairs.empty())
749 all.
trace_message <<
"creating quadratic features for pairs: ";
751 for (std::vector<std::string>::iterator i = quadratics.begin(); i != quadratics.end(); ++i)
758 expanded_interactions =
768 all.
trace_message <<
"creating cubic features for triples: ";
769 for (std::vector<std::string>::iterator i = cubics.begin(); i != cubics.end(); ++i)
776 std::vector<std::string> exp_cubic =
778 expanded_interactions.insert(std::begin(expanded_interactions), std::begin(exp_cubic), std::end(exp_cubic));
787 all.
trace_message <<
"creating features for following interactions: ";
788 for (std::vector<std::string>::iterator i = interactions.begin(); i != interactions.end(); ++i)
796 expanded_interactions.insert(std::begin(expanded_interactions), std::begin(exp_inter), std::end(exp_inter));
802 if (expanded_interactions.size() > 0)
807 expanded_interactions, !leave_duplicate_interactions, removed_cnt, sorted_cnt);
810 all.
trace_message <<
"WARNING: duplicate namespace interactions were found. Removed: " << removed_cnt <<
'.' 812 <<
"You can use --leave_duplicate_interactions to disable this behaviour." << endl;
814 all.
trace_message <<
"WARNING: some interactions contain duplicate characters and their characters order has " 815 "been changed. Interactions affected: " 816 << sorted_cnt <<
'.' << endl;
827 for (
auto& i : expanded_interactions)
829 const size_t len = i.size();
831 all.
pairs.push_back(i);
837 for (
size_t i = 0; i < 256; i++)
849 for (std::vector<std::string>::iterator i = ignores.begin(); i != ignores.end(); i++)
852 for (std::string::const_iterator j = i->begin(); j != i->end(); j++) all.
ignore[(
size_t)(
unsigned char)*j] =
true;
858 for (
auto const& ignore : ignores)
859 for (
auto const character : ignore) all.
trace_message << character <<
" ";
869 for (std::vector<std::string>::iterator i = ignore_linears.begin(); i != ignore_linears.end(); i++)
872 for (std::string::const_iterator j = i->begin(); j != i->end(); j++)
878 all.
trace_message <<
"ignoring linear terms for namespaces beginning with: ";
879 for (
auto const& ignore : ignore_linears)
880 for (
auto const character : ignore) all.
trace_message << character <<
" ";
888 for (
size_t i = 0; i < 256; i++) all.
ignore[i] =
true;
892 for (std::vector<std::string>::iterator i = keeps.begin(); i != keeps.end(); i++)
895 for (std::string::const_iterator j = i->begin(); j != i->end(); j++)
896 all.
ignore[(
size_t)(
unsigned char)*j] =
false;
902 for (
auto const& keep : keeps)
903 for (
auto const character : keep) all.
trace_message << character <<
" ";
915 for (
size_t i = 0; i < 256; i++) all.
redefine[i] = (
unsigned char)i;
920 for (std::vector<std::string>::iterator arg_iter = redefines.begin(); arg_iter != redefines.end(); arg_iter++)
923 size_t arg_len = argument.length();
925 size_t operator_pos = 0;
926 bool operator_found =
false;
927 unsigned char new_namespace =
' ';
930 for (
size_t i = 0; i < arg_len; i++)
936 new_namespace = argument[0];
940 else if (argument[i] ==
':')
941 operator_pos = i + 1;
942 else if ((argument[i] ==
'=') && (operator_pos == i))
943 operator_found =
true;
947 THROW(
"argument of --redefine is malformed. Valid format is N:=S, :=S or N:=");
949 if (++operator_pos > 3)
951 <<
"WARNING: multiple namespaces are used in target part of --redefine argument. Only first one ('" 952 << new_namespace <<
"') will be used as target namespace." << endl;
958 if (operator_pos == arg_len)
959 all.
redefine[(int)
' '] = new_namespace;
961 for (
size_t i = operator_pos; i < arg_len; i++)
964 unsigned char c = argument[i];
970 for (
size_t i = 0; i < 256; i++) all.
redefine[i] = new_namespace;
980 for (std::string
path : dictionary_path)
986 const std::string PATH = getenv(
"PATH");
988 const char delimiter =
';';
990 const char delimiter =
':';
995 size_t index = PATH.find(delimiter);
996 while (index != std::string::npos)
998 all.
dictionary_path.push_back(PATH.substr(previous, index - previous));
999 previous = index + 1;
1000 index = PATH.find(delimiter, previous);
1012 std::string named_labels;
1014 float loss_parameter = 0.0;
1015 size_t early_terminate_passes;
1016 bool test_only =
false;
1019 example_options.
add(
make_option(
"testonly", test_only).short_name(
"t").help(
"Ignore label information and just test"))
1023 .help(
"holdout after n training examples, default off (disables holdout_period)"))
1025 make_option(
"early_terminate", early_terminate_passes)
1028 "Specify the number of passes tolerated when holdout loss doesn't decrease before early termination"))
1035 .help(
"turn this on to disregard order in which features have been defined. This will lead to smaller " 1038 .default_value(
"squared")
1039 .help(
"Specify the loss function to be used, uses squared by default. Currently available ones are " 1040 "squared, classic, hinge, logistic, quantile and poisson."))
1042 .default_value(0.5
f)
1043 .help(
"Parameter \\tau associated with Quantile loss. Defaults to 0.5"))
1049 .help(
"use names for labels (multiclass, etc.) rather than integers, argument specified all possible " 1050 "labels, comma-sep, eg \"--named_labels Noun,Verb,Adj,Punc\""));
1053 if (test_only || all.
eta == 0.)
1074 all.
sd->
ldict = &calloc_or_throw<namedlabels>();
1084 all.
trace_message <<
"l1_lambda should be nonnegative: resetting from " << all.
l1_lambda <<
" to 0" << endl;
1089 all.
trace_message <<
"l2_lambda should be nonnegative: resetting from " << all.
l2_lambda <<
" to 0" << endl;
1105 std::string predictions;
1106 std::string raw_predictions;
1109 output_options.
add(
make_option(
"predictions", predictions).short_name(
"p").help(
"File to output predictions to"))
1112 .help(
"File to output unnormalized predictions to"));
1118 all.
trace_message <<
"predictions = " << predictions << endl;
1120 if (predictions ==
"stdout")
1126 const char* fstr = predictions.c_str();
1130 _sopen_s(&f, fstr, _O_CREAT | _O_WRONLY | _O_BINARY | _O_TRUNC, _SH_DENYWR, _S_IREAD | _S_IWRITE);
1132 f = open(fstr, O_CREAT | O_WRONLY |
O_LARGEFILE | O_TRUNC, 0666);
1135 all.
trace_message <<
"Error opening the predictions file: " << fstr << endl;
1144 all.
trace_message <<
"raw predictions = " << raw_predictions << endl;
1146 all.
trace_message <<
"Warning: --raw_predictions has no defined value when --binary specified, expect no output" 1149 if (raw_predictions ==
"stdout")
1153 const char* t = raw_predictions.c_str();
1156 _sopen_s(&f, t, _O_CREAT | _O_WRONLY | _O_BINARY | _O_TRUNC, _SH_DENYWR, _S_IREAD | _S_IWRITE);
1158 f = open(t, O_CREAT | O_WRONLY |
O_LARGEFILE | O_TRUNC, 0666);
1168 output_model_options
1171 .help(
"Output human-readable final regressor with numeric features"))
1173 .help(
"Output human-readable final regressor with feature names. Computationally expensive."))
1175 .help(
"save extra state so learning can be resumed later with new data"))
1177 .help(
"reset performance counters when warmstarting"))
1180 .help(
"Per feature regularization output file"))
1182 .help(
"Per feature regularization output file, in text"))
1183 .
add(
make_option(
"id", all.
id).help(
"User supplied ID embedded into the final regressor"));
1226 auto base = setup_func(options, all);
1228 if (base ==
nullptr)
1252 all.
reduction_stack.push(ExpReplay::expreplay_setup<'b', simple_label>);
1268 all.
reduction_stack.push(ExpReplay::expreplay_setup<'m', MULTICLASS::mc_label>);
1301 all.
reduction_stack.push(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>);
1310 vw& all = *(
new vw());
1323 bool strict_parse =
false;
1326 vw_args.
add(
make_option(
"ring_size", ring_size).default_value(256).help(
"size of example ring"))
1327 .
add(
make_option(
"strict_parse", strict_parse).help(
"throw on malformed examples"));
1330 all.
p =
new parser{ring_size, strict_parse};
1333 update_args.
add(
make_option(
"learning_rate", all.
eta).help(
"Set learning rate").short_name(
"l"))
1336 .help(
"Set Decay factor for learning_rate between passes"))
1339 .help(
"Use existing regressor to determine which parameters may be updated. If no initial_regressor " 1340 "given, also used for initial weights."));
1352 .help(
"Per feature regularization input file"));
1355 std::string span_server_arg;
1356 int span_server_port_arg;
1358 size_t unique_id_arg;
1362 parallelization_args
1363 .
add(
make_option(
"span_server", span_server_arg).help(
"Location of server for setting up spanning tree"))
1365 .
add(
make_option(
"unique_id", unique_id_arg).default_value(0).help(
"unique id used for cluster parallel jobs"))
1367 make_option(
"total", total_arg).default_value(1).help(
"total number of nodes used in cluster parallel job"))
1368 .
add(
make_option(
"node", node_arg).default_value(0).help(
"node number in cluster parallel job"))
1370 .default_value(26543)
1371 .help(
"Port of the server for setting up spanning tree"));
1378 THROW(
"you must specificy unique_id, total, and node if you specify any");
1385 new AllReduceSockets(span_server_arg, span_server_port_arg, unique_id_arg, total_arg, node_arg, all.
quiet);
1405 if (!command_line_has_interaction)
1409 bool file_options_has_interaction = file_options.find(
"--quadratic") != std::string::npos;
1410 file_options_has_interaction = file_options_has_interaction || (file_options.find(
"--cubic") != std::string::npos);
1411 file_options_has_interaction =
1412 file_options_has_interaction || (file_options.find(
"--interactions") != std::string::npos);
1414 return file_options_has_interaction;
1419 std::string file_options;
1425 std::istringstream ss{file_options};
1426 std::vector<std::string> container{std::istream_iterator<std::string>{ss}, std::istream_iterator<std::string>{}};
1428 po::options_description desc(
"");
1431 po::parsed_options pos = po::command_line_parser(container).options(desc).allow_unregistered().run();
1433 bool skipping =
false;
1434 std::string saved_key =
"";
1435 unsigned int count = 0;
1436 bool first_seen =
false;
1437 for (
auto opt : pos.options)
1442 for (
auto token : opt.original_tokens)
1444 auto found = token.find(
"--");
1445 if (found != std::string::npos)
1458 bool treat_as_value =
false;
1464 if (opt.string_key.length() > 1 && opt.string_key[0] ==
'-' && opt.string_key[1] >=
'0' && opt.string_key[1] <=
'9')
1466 treat_as_value =
true;
1471 (opt.string_key ==
"quadratic" || opt.string_key ==
"cubic" || opt.string_key ==
"interactions"))
1481 if (!treat_as_value && opt.string_key !=
"")
1484 if (count == 0 && first_seen)
1486 options.
insert(saved_key,
"");
1489 saved_key = opt.string_key;
1493 if (opt.value.size() > 0)
1495 for (
auto value : opt.value)
1497 options.
insert(saved_key, value);
1506 auto source = treat_as_value ? opt.original_tokens : opt.value;
1507 for (
auto value : source)
1509 options.
insert(saved_key, value);
1515 if (count == 0 && saved_key !=
"")
1517 options.
insert(saved_key,
"");
1558 auto parsed_source_options =
parse_source(all, options);
1563 size_t params_per_problem = all.
l->
increment;
1564 while (params_per_problem > ((uint64_t)1 << i)) i++;
1572 flag_to_replace.append(
1574 std::string cmd = ss->str();
1575 size_t pos = cmd.find(flag_to_replace);
1576 if (pos == std::string::npos)
1578 *ss <<
" " << flag_to_replace << new_value;
1584 pos += flag_to_replace.size();
1588 size_t pos_after_value = cmd.find(
" ", pos);
1589 if (pos_after_value == std::string::npos)
1592 cmd.replace(pos, cmd.size() - pos, new_value);
1597 cmd.replace(pos, pos_after_value - pos, new_value);
1606 substring ss = {
const_cast<char*
>(s.c_str()), const_cast<char*>(s.c_str() + s.length())};
1608 char** argv = calloc_or_throw<char*>(tokens.size() + 1);
1609 argv[0] = calloc_or_throw<char>(2);
1613 for (
size_t i = 0; i < tokens.size(); i++)
1615 argv[i + 1] = calloc_or_throw<char>(tokens[i].end - tokens[i].begin + 1);
1616 sprintf(argv[i + 1],
"%s", tokens[i].begin);
1619 argc =
static_cast<int>(tokens.size() + 1);
1625 char*
c = calloc_or_throw<char>(s.length() + 3);
1628 strcpy(c + 2, s.c_str());
1630 std::vector<substring> foo;
1633 char** argv = calloc_or_throw<char*>(foo.size());
1634 for (
size_t i = 0; i < foo.size(); i++)
1636 *(foo[i].end) =
'\0';
1637 argv[i] = calloc_or_throw<char>(foo[i].end - foo[i].begin + 1);
1638 sprintf(argv[i],
"%s", foo[i].begin);
1641 argc = (int)foo.size();
1650 for (
int i = 0; i < argc; i++) free(argv[i]);
1657 vw& all =
parse_args(options, trace_listener, trace_context);
1671 model = &localModel;
1677 std::vector<std::string> dictionary_nses;
1690 cout << options.
help();
1698 catch (std::exception& e)
1714 char** argv =
to_argv(s, argc);
1719 ret =
initialize(argc, argv, model, skipModelLoad, trace_listener, trace_context);
1732 std::string
const& s,
io_buf* model,
bool skipModelLoad,
trace_message_t trace_listener,
void* trace_context)
1740 ret =
initialize(argc, argv, model, skipModelLoad, trace_listener, trace_context);
1753 int argc,
char* argv[],
io_buf* model,
bool skipModelLoad,
trace_message_t trace_listener,
void* trace_context)
1756 vw* all =
initialize(*options, model, skipModelLoad, trace_listener, trace_context);
1774 if (option->m_name ==
"no_stdin" || option->m_name ==
"initial_regressor")
1779 serializer.
add(*option);
1783 auto serialized_options = serializer.
str();
1784 serialized_options = serialized_options +
" " + extra_args;
1787 VW::initialize(serialized_options.c_str(),
nullptr,
true , trace_listener, trace_context);
1792 new_model->
sd = vw_model->
sd;
1854 <<
"average multiclass log loss = " 1858 <<
"average multiclass log loss = " 1862 float best_constant;
1863 float best_constant_loss;
1866 all.
trace_message << endl <<
"best constant = " << best_constant;
1867 if (best_constant_loss != FLT_MIN)
1868 all.
trace_message << endl <<
"best constant's loss = " << best_constant_loss;
1880 vw_exception finalize_regressor_exception(__FILE__, __LINE__,
"empty");
1881 bool finalize_regressor_exception_thrown =
false;
1888 finalize_regressor_exception = e;
1889 finalize_regressor_exception_thrown =
true;
1892 if (all.
l !=
nullptr)
1903 if (all.
p !=
nullptr)
1953 if (finalize_regressor_exception_thrown)
1954 throw finalize_regressor_exception;
bool report_multiclass_log_loss
vw * seed_vw_model(vw *vw_model, const std::string extra_args, trace_message_t trace_listener, void *trace_context)
void parse_diagnostics(options_i &options, vw &all)
std::vector< std::string > skip_strings
void save_load(io_buf &io, const bool read, const bool text)
void free_args(int argc, char *argv[])
LEARNER::base_learner * kernel_svm_setup(options_i &options, vw &all)
v_array< namespace_index > indices
std::array< uint32_t, NUM_NAMESPACES > skips
LEARNER::base_learner * audit_regressor_setup(options_i &options, vw &all)
void delete_dictionary_entry(substring ss, features *A)
std::array< bool, NUM_NAMESPACES > spelling_features
char ** to_argv(std::string const &s, int &argc)
base_learner * setup(options_i &options, vw &all)
void enable_sources(vw &all, bool quiet, size_t passes, input_options &input_options)
base_learner * memory_tree_setup(options_i &options, vw &all)
LEARNER::base_learner * classweight_setup(options_i &options, vw &all)
LEARNER::base_learner * binary_setup(options_i &options, vw &all)
std::array< std::vector< feature_dict * >, NUM_NAMESPACES > namespace_dictionaries
virtual std::string str() override
base_learner * csoaa_setup(options_i &options, vw &all)
LEARNER::base_learner * multilabel_oaa_setup(options_i &options, vw &all)
void deep_copy_from(const features &src)
base_learner * explore_eval_setup(options_i &options, vw &all)
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
LEARNER::base_learner * scorer_setup(options_i &options, vw &all)
std::vector< std::string > pairs
std::array< uint32_t, NUM_NAMESPACES > ngram
base_learner * bfgs_setup(options_i &options, vw &all)
base_learner * svrg_setup(options_i &options, vw &all)
void finalize_source(parser *p)
base_learner * OjaNewton_setup(options_i &options, vw &all)
void read_line(vw &all, example *ex, char *line)
VW::config::options_i * options
void(* delete_label)(void *)
virtual void check_unregistered()=0
double weighted_unlabeled_examples
void load_input_model(vw &all, io_buf &io_temp)
virtual bool close_file()
std::vector< std::string > ngram_strings
const std::string git_commit(COMMIT_VERSION)
constexpr int quadratic_constant
LEARNER::base_learner * oaa_setup(options_i &options, vw &all)
void parse_output_model(options_i &options, vw &all)
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
base_learner * cbify_setup(options_i &options, vw &all)
std::string inv_hash_regressor_name
LEARNER::base_learner * noop_setup(options_i &options, vw &)
LEARNER::base_learner * lrqfa_setup(options_i &options, vw &all)
base_learner * active_cover_setup(options_i &options, vw &all)
v_array< int > final_prediction_sink
base_learner * setup(options_i &options, vw &all)
the core definition of a set of features.
double holdout_multiclass_log_loss
std::vector< dictionary_info > loaded_dictionaries
LEARNER::base_learner * lda_setup(options_i &options, vw &all)
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
void compile_gram(std::vector< std::string > grams, std::array< uint32_t, NUM_NAMESPACES > &dest, char *descriptor, bool quiet)
LEARNER::base_learner * boosting_setup(options_i &options, vw &all)
char ** get_argv_from_string(std::string s, int &argc)
v_array< act_score > path
std::vector< std::string > limit_strings
base_learner * bs_setup(options_i &options, vw &all)
const char * are_features_compatible(vw &vw1, vw &vw2)
LEARNER::base_learner * print_setup(options_i &options, vw &all)
void finalize_regressor(vw &all, std::string reg_name)
std::array< uint64_t, NUM_NAMESPACES > affix_features
virtual void add_and_parse(const option_group_definition &group)=0
std::string spoof_hex_encoded_namespaces(const std::string &arg)
void finish(vw &all, bool delete_all)
base_learner * lrq_setup(options_i &options, vw &all)
LEARNER::base_learner * shared_feature_merger_setup(config::options_i &options, vw &all)
float loss(cbify &data, uint32_t label, uint32_t final_prediction)
hash_func_t getHasher(const std::string &s)
base_learner * nn_setup(options_i &options, vw &all)
void set_compressed(parser *par)
void parse_dictionary_argument(vw &all, std::string str)
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
v_array< substring > parse_name
std::array< bool, NUM_NAMESPACES > ignore
const version_struct version(PACKAGE_VERSION)
virtual ssize_t read_file(int f, void *buf, size_t nbytes)
static void close_file_or_socket(int f)
static constexpr int READ
example * alloc_examples(size_t, size_t count=1)
virtual std::vector< std::shared_ptr< base_option > > get_all_options()=0
vw & parse_args(options_i &options, trace_message_t trace_listener, void *trace_context)
std::shared_ptr< rand_state > get_random_state()
void parse_output_preds(options_i &options, vw &all)
std::array< features, NUM_NAMESPACES > feature_space
std::array< uint32_t, NUM_NAMESPACES > limit
virtual int open_file(const char *name, bool stdin_off)
input_options parse_source(vw &all, options_i &options)
double multiclass_log_loss
void(* set_minmax)(shared_data *sd, float label)
std::string find_in_path(std::vector< std::string > paths, std::string fname)
base_learner * cb_explore_setup(options_i &options, vw &all)
void noop_mm(shared_data *, float)
void parse_modules(options_i &options, vw &all, std::vector< std::string > &dictionary_nses)
void parse_reductions(options_i &options, vw &all)
base_learner * active_setup(options_i &options, vw &all)
base_learner * cb_adf_setup(options_i &options, vw &all)
bool get_best_constant(vw &all, float &best_constant, float &best_constant_loss)
base_learner * cbifyldf_setup(options_i &options, vw &all)
base_learner * cb_algs_setup(options_i &options, vw &all)
void push_back(const T &new_ele)
typed_option< T > & get_typed_option(const std::string &key)
base_learner * stagewise_poly_setup(options_i &options, vw &all)
base_learner * mwt_setup(options_i &options, vw &all)
std::array< bool, NUM_NAMESPACES > ignore_linear
void compile_limits(std::vector< std::string > limits, std::array< uint32_t, NUM_NAMESPACES > &dest, bool quiet)
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
virtual bool was_supplied(const std::string &key)=0
LEARNER::base_learner * setup(config::options_i &options, vw &all)
base_learner * log_multi_setup(options_i &options, vw &all)
base_learner * csldf_setup(options_i &options, vw &all)
base_learner * gd_mf_setup(options_i &options, vw &all)
base_learner * recall_tree_setup(options_i &options, vw &all)
std::string per_feature_regularizer_output
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
AllReduceType all_reduce_type
void save_load_header(vw &all, io_buf &model_file, bool read, bool text, std::string &file_options, VW::config::options_i &options)
base_learner * ftrl_setup(options_i &options, vw &all)
char ** to_argv_escaped(std::string const &s, int &argc)
std::string to_string() const
LEARNER::base_learner * marginal_setup(options_i &options, vw &all)
std::vector< std::string > expand_interactions(const std::vector< std::string > &vec, const size_t required_length, const std::string &err_msg)
base_learner * baseline_setup(options_i &options, vw &all)
base_learner * cb_sample_setup(options_i &options, vw &all)
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
std::vector< substring > escaped_tokenize(char delim, substring s, bool allow_empty)
base_learner * cs_active_setup(options_i &options, vw &all)
bool check_interaction_settings_collision(options_i &options, std::string file_options)
LEARNER::base_learner * interact_setup(options_i &options, vw &all)
virtual void add(base_option &option) override
void parse_mask_regressor_args(vw &all, std::string feature_mask, std::vector< std::string > initial_regressors)
std::array< unsigned char, NUM_NAMESPACES > redefine
void cmd_string_replace_value(std::stringstream *&ss, std::string flag_to_replace, std::string new_value)
std::string per_feature_regularizer_text
std::vector< std::string > triples
virtual void insert(const std::string &key, const std::string &value)=0
base_learner * warm_cb_setup(options_i &options, vw &all)
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
option_group_definition & add(T &&op)
base_learner * mf_setup(options_i &options, vw &all)
int add(svm_params ¶ms, svm_example *fec)
std::vector< std::string > initial_regressors
void parse_sources(options_i &options, vw &all, io_buf &model, bool skipModelLoad)
LEARNER::base_learner * topk_setup(options_i &options, vw &all)
std::vector< std::string > dictionary_path
options_i & load_header_merge_options(options_i &options, vw &all, io_buf &model)
base_learner * ccb_explore_adf_setup(options_i &options, vw &all)
typed_option< T > make_option(std::string name, T &location)
LEARNER::base_learner * setup(config::options_i &options, vw &all)
vw * initialize_escaped(std::string const &s, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
bool directory_exists(std::string path)
void parse_example_tweaks(options_i &options, vw &all)
LEARNER::base_learner * sender_setup(options_i &options, vw &all)
void(* trace_message_t)(void *context, const std::string &)
void init(size_t min_size, const V &def, bool(*eq)(const K &, const K &))
void parse_feature_tweaks(options_i &options, vw &all, std::vector< std::string > &dictionary_nses)
float accumulate_scalar(vw &all, float local_sum)
void read_regressor_file(vw &all, std::vector< std::string > all_intial, io_buf &io_temp)
bool should_delete_options
std::vector< std::string > interactions
bool preserve_performance_counters
LEARNER::base_learner * l
bool substring_equal(const substring &a, const substring &b)
void free_parser(vw &all)
base_learner * confidence_setup(options_i &options, vw &all)
bool ends_with(std::string const &fullString, std::string const &ending)
virtual std::string help()=0
double weighted_labeled_examples
trace_message_t trace_listener
std::string per_feature_regularizer_input
LEARNER::base_learner * setup_base(options_i &options, vw &all)
void shallow_copy(const parameters &input)
void validate_num_bits(vw &all)
void sort_and_filter_duplicate_interactions(std::vector< std::string > &vec, bool filter_duplicates, size_t &removed_cnt, size_t &sorted_cnt)
std::string final_regressor_name
base_learner * ect_setup(options_i &options, vw &all)
double weighted_examples()
loss_function * getLossFunction(vw &all, std::string funcName, float function_parameter)
void put(const K &key, uint64_t hash, const V &val)
LEARNER::base_learner * autolink_setup(options_i &options, vw &all)
V & get(const K &key, uint64_t hash)
uint64_t hash_file_contents(io_buf *io, int f)
void parse_affix_argument(vw &all, std::string str)
bool interactions_settings_doubled
std::string data_filename
std::stack< LEARNER::base_learner *(*)(VW::config::options_i &, vw &)> reduction_stack
std::string text_regressor_name