Vowpal Wabbit
Namespaces | Functions | Variables
parse_args.cc File Reference
#include <stdio.h>
#include <float.h>
#include <sstream>
#include <fstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <algorithm>
#include "parse_regressor.h"
#include "parser.h"
#include "parse_primitives.h"
#include "vw.h"
#include "interactions.h"
#include "sender.h"
#include "nn.h"
#include "gd.h"
#include "cbify.h"
#include "oaa.h"
#include "boosting.h"
#include "multilabel_oaa.h"
#include "rand48.h"
#include "bs.h"
#include "topk.h"
#include "ect.h"
#include "csoaa.h"
#include "cb_algs.h"
#include "cb_adf.h"
#include "cb_explore.h"
#include "cb_explore_adf_bag.h"
#include "cb_explore_adf_cover.h"
#include "cb_explore_adf_first.h"
#include "cb_explore_adf_greedy.h"
#include "cb_explore_adf_regcb.h"
#include "cb_explore_adf_softmax.h"
#include "mwt.h"
#include "confidence.h"
#include "scorer.h"
#include "expreplay.h"
#include "search.h"
#include "bfgs.h"
#include "lda_core.h"
#include "noop.h"
#include "print.h"
#include "gd_mf.h"
#include "learner.h"
#include "mf.h"
#include "ftrl.h"
#include "svrg.h"
#include "binary.h"
#include "lrq.h"
#include "lrqfa.h"
#include "autolink.h"
#include "log_multi.h"
#include "recall_tree.h"
#include "memory_tree.h"
#include "stagewise_poly.h"
#include "active.h"
#include "active_cover.h"
#include "cs_active.h"
#include "kernel_svm.h"
#include "parse_example.h"
#include "best_constant.h"
#include "interact.h"
#include "vw_exception.h"
#include "accumulate.h"
#include "vw_validate.h"
#include "vw_allreduce.h"
#include "OjaNewton.h"
#include "audit_regressor.h"
#include "marginal.h"
#include "explore_eval.h"
#include "baseline.h"
#include "classweight.h"
#include "cb_sample.h"
#include "warm_cb.h"
#include "shared_feature_merger.h"
#include "options.h"
#include "options_boost_po.h"
#include "options_serializer_boost_po.h"

Go to the source code of this file.

Namespaces

 VW
 

Functions

bool ends_with (std::string const &fullString, std::string const &ending)
 
uint64_t hash_file_contents (io_buf *io, int f)
 
bool directory_exists (std::string path)
 
std::string find_in_path (std::vector< std::string > paths, std::string fname)
 
void parse_dictionary_argument (vw &all, std::string str)
 
void parse_affix_argument (vw &all, std::string str)
 
void parse_diagnostics (options_i &options, vw &all)
 
input_options parse_source (vw &all, options_i &options)
 
const char * VW::are_features_compatible (vw &vw1, vw &vw2)
 
std::string spoof_hex_encoded_namespaces (const std::string &arg)
 
void parse_feature_tweaks (options_i &options, vw &all, std::vector< std::string > &dictionary_nses)
 
void parse_example_tweaks (options_i &options, vw &all)
 
void parse_output_preds (options_i &options, vw &all)
 
void parse_output_model (options_i &options, vw &all)
 
void load_input_model (vw &all, io_buf &io_temp)
 
LEARNER::base_learnersetup_base (options_i &options, vw &all)
 
void parse_reductions (options_i &options, vw &all)
 
vwparse_args (options_i &options, trace_message_t trace_listener, void *trace_context)
 
bool check_interaction_settings_collision (options_i &options, std::string file_options)
 
options_iload_header_merge_options (options_i &options, vw &all, io_buf &model)
 
void parse_modules (options_i &options, vw &all, std::vector< std::string > &dictionary_nses)
 
void parse_sources (options_i &options, vw &all, io_buf &model, bool skipModelLoad)
 
void VW::cmd_string_replace_value (std::stringstream *&ss, std::string flag_to_replace, std::string new_value)
 
char ** VW::to_argv_escaped (std::string const &s, int &argc)
 
char ** VW::to_argv (std::string const &s, int &argc)
 
char ** VW::get_argv_from_string (std::string s, int &argc)
 
void VW::free_args (int argc, char *argv[])
 
vwVW::initialize (options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
 
vwVW::initialize (std::string s, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
 
vwVW::initialize_escaped (std::string const &s, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
 
vwVW::initialize (int argc, char *argv[], io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
 
vwVW::seed_vw_model (vw *vw_model, const std::string extra_args, trace_message_t trace_listener, void *trace_context)
 
void VW::delete_dictionary_entry (substring ss, features *A)
 
void VW::sync_stats (vw &all)
 
void VW::finish (vw &all, bool delete_all)
 

Variables

bool interactions_settings_doubled = false
 

Function Documentation

◆ check_interaction_settings_collision()

bool check_interaction_settings_collision ( options_i options,
std::string  file_options 
)

Definition at line 1400 of file parse_args.cc.

References VW::config::options_i::was_supplied().

Referenced by load_header_merge_options().

1401 {
1402  bool command_line_has_interaction = options.was_supplied("q") || options.was_supplied("quadratic") ||
1403  options.was_supplied("cubic") || options.was_supplied("interactions");
1404 
1405  if (!command_line_has_interaction)
1406  return false;
1407 
1408  // we don't use -q to save pairs in all.file_options, so only 3 options checked
1409  bool file_options_has_interaction = file_options.find("--quadratic") != std::string::npos;
1410  file_options_has_interaction = file_options_has_interaction || (file_options.find("--cubic") != std::string::npos);
1411  file_options_has_interaction =
1412  file_options_has_interaction || (file_options.find("--interactions") != std::string::npos);
1413 
1414  return file_options_has_interaction;
1415 }
virtual bool was_supplied(const std::string &key)=0

◆ directory_exists()

bool directory_exists ( std::string  path)

Definition at line 130 of file parse_args.cc.

Referenced by parse_feature_tweaks().

131 {
132  struct stat info;
133  if (stat(path.c_str(), &info) != 0)
134  return false;
135  else
136  return (info.st_mode & S_IFDIR) > 0;
137  // boost::filesystem::path p(path);
138  // return boost::filesystem::exists(p) && boost::filesystem::is_directory(p);
139 }
v_array< act_score > path
Definition: search_meta.cc:53

◆ ends_with()

bool ends_with ( std::string const &  fullString,
std::string const &  ending 
)

Definition at line 100 of file parse_args.cc.

Referenced by find_in_path(), parse_dictionary_argument(), and parse_source().

101 {
102  if (fullString.length() > ending.length())
103  {
104  return (fullString.compare(fullString.length() - ending.length(), ending.length(), ending) == 0);
105  }
106  else
107  {
108  return false;
109  }
110 }

◆ find_in_path()

std::string find_in_path ( std::vector< std::string >  paths,
std::string  fname 
)

Definition at line 141 of file parse_args.cc.

References ends_with(), and f.

Referenced by parse_dictionary_argument().

142 {
143 #ifdef _WIN32
144  std::string delimiter = "\\";
145 #else
146  std::string delimiter = "/";
147 #endif
148  for (std::string path : paths)
149  {
150  std::string full = ends_with(path, delimiter) ? (path + fname) : (path + delimiter + fname);
151  std::ifstream f(full.c_str());
152  if (f.good())
153  return full;
154  }
155  return "";
156 }
v_array< act_score > path
Definition: search_meta.cc:53
bool ends_with(std::string const &fullString, std::string const &ending)
Definition: parse_args.cc:100
float f
Definition: cache.cc:40

◆ hash_file_contents()

uint64_t hash_file_contents ( io_buf io,
int  f 
)

Definition at line 112 of file parse_args.cc.

References io_buf::read_file().

Referenced by parse_dictionary_argument().

113 {
114  uint64_t v = 5289374183516789128;
115  unsigned char buf[1024];
116  while (true)
117  {
118  ssize_t n = io->read_file(f, buf, 1024);
119  if (n <= 0)
120  break;
121  for (ssize_t i = 0; i < n; i++)
122  {
123  v *= 341789041;
124  v += buf[i];
125  }
126  }
127  return v;
128 }
virtual ssize_t read_file(int f, void *buf, size_t nbytes)
Definition: io_buf.h:167
float f
Definition: cache.cc:40

◆ load_header_merge_options()

options_i& load_header_merge_options ( options_i options,
vw all,
io_buf model 
)

Definition at line 1417 of file parse_args.cc.

References check_interaction_settings_collision(), VW::config::options_i::insert(), interactions_settings_doubled, and save_load_header().

Referenced by VW::initialize().

1418 {
1419  std::string file_options;
1420  save_load_header(all, model, true, false, file_options, options);
1421 
1423 
1424  // Convert file_options into vector.
1425  std::istringstream ss{file_options};
1426  std::vector<std::string> container{std::istream_iterator<std::string>{ss}, std::istream_iterator<std::string>{}};
1427 
1428  po::options_description desc("");
1429 
1430  // Get list of options in file options std::string
1431  po::parsed_options pos = po::command_line_parser(container).options(desc).allow_unregistered().run();
1432 
1433  bool skipping = false;
1434  std::string saved_key = "";
1435  unsigned int count = 0;
1436  bool first_seen = false;
1437  for (auto opt : pos.options)
1438  {
1439  // If we previously encountered an option we want to skip, ignore tokens without --.
1440  if (skipping)
1441  {
1442  for (auto token : opt.original_tokens)
1443  {
1444  auto found = token.find("--");
1445  if (found != std::string::npos)
1446  {
1447  skipping = false;
1448  }
1449  }
1450 
1451  if (skipping)
1452  {
1453  saved_key = "";
1454  continue;
1455  }
1456  }
1457 
1458  bool treat_as_value = false;
1459  // If the key starts with a digit, this is a mis-interpretation of a value as a key. Pull it into the previous
1460  // option. This was found in the case of --lambda -1, misinterpreting -1 as an option key. The easy way to fix this
1461  // requires introducing "identifier-like" semantics for options keys, e.g. "does not begin with a digit". That does
1462  // not seem like an unreasonable restriction. The logical check here is: is "string_key" of the form {'-', <digit>,
1463  // <etc.>}.
1464  if (opt.string_key.length() > 1 && opt.string_key[0] == '-' && opt.string_key[1] >= '0' && opt.string_key[1] <= '9')
1465  {
1466  treat_as_value = true;
1467  }
1468 
1469  // If the interaction settings are doubled, the copy in the model file is ignored.
1471  (opt.string_key == "quadratic" || opt.string_key == "cubic" || opt.string_key == "interactions"))
1472  {
1473  // skip this option.
1474  skipping = true;
1475  continue;
1476  }
1477 
1478  // File options should always use long form.
1479 
1480  // If the key is empty this must be a value, otherwise set the key.
1481  if (!treat_as_value && opt.string_key != "")
1482  {
1483  // If the new token is a new option and there were no values previously it was a bool option. Add it as a switch.
1484  if (count == 0 && first_seen)
1485  {
1486  options.insert(saved_key, "");
1487  }
1488 
1489  saved_key = opt.string_key;
1490  count = 0;
1491  first_seen = true;
1492 
1493  if (opt.value.size() > 0)
1494  {
1495  for (auto value : opt.value)
1496  {
1497  options.insert(saved_key, value);
1498  count++;
1499  }
1500  }
1501  }
1502  else
1503  {
1504  // If treat_as_value is set, boost incorrectly interpreted the token as containing an option key
1505  // In this case, what should have happened is all original_tokens items should be in value.
1506  auto source = treat_as_value ? opt.original_tokens : opt.value;
1507  for (auto value : source)
1508  {
1509  options.insert(saved_key, value);
1510  count++;
1511  }
1512  }
1513  }
1514 
1515  if (count == 0 && saved_key != "")
1516  {
1517  options.insert(saved_key, "");
1518  }
1519 
1520  return options;
1521 }
void save_load_header(vw &all, io_buf &model_file, bool read, bool text, std::string &file_options, VW::config::options_i &options)
bool check_interaction_settings_collision(options_i &options, std::string file_options)
Definition: parse_args.cc:1400
virtual void insert(const std::string &key, const std::string &value)=0
bool interactions_settings_doubled
Definition: parse_args.cc:498

◆ load_input_model()

void load_input_model ( vw all,
io_buf io_temp 
)

Definition at line 1200 of file parse_args.cc.

References io_buf::close_file(), vw::feature_mask, vw::initial_regressors, vw::l, parse_mask_regressor_args(), and LEARNER::learner< T, E >::save_load().

Referenced by parse_sources().

1201 {
1202  // Need to see if we have to load feature mask first or second.
1203  // -i and -mask are from same file, load -i file first so mask can use it
1204  if (!all.feature_mask.empty() && all.initial_regressors.size() > 0 && all.feature_mask == all.initial_regressors[0])
1205  {
1206  // load rest of regressor
1207  all.l->save_load(io_temp, true, false);
1208  io_temp.close_file();
1209 
1211  }
1212  else
1213  { // load mask first
1215 
1216  // load rest of regressor
1217  all.l->save_load(io_temp, true, false);
1218  io_temp.close_file();
1219  }
1220 }
void save_load(io_buf &io, const bool read, const bool text)
Definition: learner.h:251
virtual bool close_file()
Definition: io_buf.h:204
std::string feature_mask
Definition: global_data.h:438
void parse_mask_regressor_args(vw &all, std::string feature_mask, std::vector< std::string > initial_regressors)
std::vector< std::string > initial_regressors
Definition: global_data.h:436
LEARNER::base_learner * l
Definition: global_data.h:383

◆ parse_affix_argument()

void parse_affix_argument ( vw all,
std::string  str 
)

Definition at line 301 of file parse_args.cc.

References vw::affix_features, THROW, and valid_ns().

Referenced by parse_feature_tweaks().

302 {
303  if (str.length() == 0)
304  return;
305  char* cstr = calloc_or_throw<char>(str.length() + 1);
306  strcpy(cstr, str.c_str());
307 
308  char* p = strtok(cstr, ",");
309 
310  try
311  {
312  while (p != 0)
313  {
314  char* q = p;
315  uint16_t prefix = 1;
316  if (q[0] == '+')
317  {
318  q++;
319  }
320  else if (q[0] == '-')
321  {
322  prefix = 0;
323  q++;
324  }
325  if ((q[0] < '1') || (q[0] > '7'))
326  THROW("malformed affix argument (length must be 1..7): " << p);
327 
328  uint16_t len = (uint16_t)(q[0] - '0');
329  uint16_t ns = (uint16_t)' '; // default namespace
330  if (q[1] != 0)
331  {
332  if (valid_ns(q[1]))
333  ns = (uint16_t)q[1];
334  else
335  THROW("malformed affix argument (invalid namespace): " << p);
336 
337  if (q[2] != 0)
338  THROW("malformed affix argument (too long): " << p);
339  }
340 
341  uint16_t afx = (len << 1) | (prefix & 0x1);
342  all.affix_features[ns] <<= 4;
343  all.affix_features[ns] |= afx;
344 
345  p = strtok(nullptr, ",");
346  }
347  }
348  catch (...)
349  {
350  free(cstr);
351  throw;
352  }
353 
354  free(cstr);
355 }
std::array< uint64_t, NUM_NAMESPACES > affix_features
Definition: global_data.h:476
bool valid_ns(char c)
Definition: example.h:111
#define THROW(args)
Definition: vw_exception.h:181

◆ parse_args()

vw& parse_args ( options_i options,
trace_message_t  trace_listener,
void *  trace_context 
)

Definition at line 1308 of file parse_args.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), vw::all_reduce, vw::all_reduce_type, vw::eta, vw::eta_decay_rate, vw::feature_mask, VW::finish(), vw::init_time, vw::initial_regressors, vw::initial_t, vw::initial_weight, VW::config::make_option(), vw::normal_weights, vw::options, vw::p, parse_diagnostics(), vw::per_feature_regularizer_input, vw::power_t, vw::quiet, vw::random_weights, vw::sd, Socket, parameters::sparse, shared_data::t, THROW, vw::tnormal_weights, vw_ostream::trace_context, vw_ostream::trace_listener, vw::trace_message, VW::config::options_i::was_supplied(), and vw::weights.

Referenced by VW::initialize().

1309 {
1310  vw& all = *(new vw());
1311  all.options = &options;
1312 
1313  if (trace_listener)
1314  {
1315  all.trace_message.trace_listener = trace_listener;
1316  all.trace_message.trace_context = trace_context;
1317  }
1318 
1319  try
1320  {
1321  time(&all.init_time);
1322 
1323  bool strict_parse = false;
1324  size_t ring_size;
1325  option_group_definition vw_args("VW options");
1326  vw_args.add(make_option("ring_size", ring_size).default_value(256).help("size of example ring"))
1327  .add(make_option("strict_parse", strict_parse).help("throw on malformed examples"));
1328  options.add_and_parse(vw_args);
1329 
1330  all.p = new parser{ring_size, strict_parse};
1331 
1332  option_group_definition update_args("Update options");
1333  update_args.add(make_option("learning_rate", all.eta).help("Set learning rate").short_name("l"))
1334  .add(make_option("power_t", all.power_t).help("t power value"))
1335  .add(make_option("decay_learning_rate", all.eta_decay_rate)
1336  .help("Set Decay factor for learning_rate between passes"))
1337  .add(make_option("initial_t", all.sd->t).help("initial t value"))
1338  .add(make_option("feature_mask", all.feature_mask)
1339  .help("Use existing regressor to determine which parameters may be updated. If no initial_regressor "
1340  "given, also used for initial weights."));
1341  options.add_and_parse(update_args);
1342 
1343  option_group_definition weight_args("Weight options");
1344  weight_args
1345  .add(make_option("initial_regressor", all.initial_regressors).help("Initial regressor(s)").short_name("i"))
1346  .add(make_option("initial_weight", all.initial_weight).help("Set all weights to an initial value of arg."))
1347  .add(make_option("random_weights", all.random_weights).help("make initial weights random"))
1348  .add(make_option("normal_weights", all.normal_weights).help("make initial weights normal"))
1349  .add(make_option("truncated_normal_weights", all.tnormal_weights).help("make initial weights truncated normal"))
1350  .add(make_option("sparse_weights", all.weights.sparse).help("Use a sparse datastructure for weights"))
1351  .add(make_option("input_feature_regularizer", all.per_feature_regularizer_input)
1352  .help("Per feature regularization input file"));
1353  options.add_and_parse(weight_args);
1354 
1355  std::string span_server_arg;
1356  int span_server_port_arg;
1357  // bool threads_arg;
1358  size_t unique_id_arg;
1359  size_t total_arg;
1360  size_t node_arg;
1361  option_group_definition parallelization_args("Parallelization options");
1362  parallelization_args
1363  .add(make_option("span_server", span_server_arg).help("Location of server for setting up spanning tree"))
1364  //(make_option("threads", threads_arg).help("Enable multi-threading")) Unused option?
1365  .add(make_option("unique_id", unique_id_arg).default_value(0).help("unique id used for cluster parallel jobs"))
1366  .add(
1367  make_option("total", total_arg).default_value(1).help("total number of nodes used in cluster parallel job"))
1368  .add(make_option("node", node_arg).default_value(0).help("node number in cluster parallel job"))
1369  .add(make_option("span_server_port", span_server_port_arg)
1370  .default_value(26543)
1371  .help("Port of the server for setting up spanning tree"));
1372  options.add_and_parse(parallelization_args);
1373 
1374  // total, unique_id and node must be specified together.
1375  if ((options.was_supplied("total") || options.was_supplied("node") || options.was_supplied("unique_id")) &&
1376  !(options.was_supplied("total") && options.was_supplied("node") && options.was_supplied("unique_id")))
1377  {
1378  THROW("you must specificy unique_id, total, and node if you specify any");
1379  }
1380 
1381  if (options.was_supplied("span_server"))
1382  {
1384  all.all_reduce =
1385  new AllReduceSockets(span_server_arg, span_server_port_arg, unique_id_arg, total_arg, node_arg, all.quiet);
1386  }
1387 
1388  parse_diagnostics(options, all);
1389 
1390  all.initial_t = (float)all.sd->t;
1391  return all;
1392  }
1393  catch (...)
1394  {
1395  VW::finish(all);
1396  throw;
1397  }
1398 }
void parse_diagnostics(options_i &options, vw &all)
Definition: parse_args.cc:357
parameters weights
Definition: global_data.h:537
bool tnormal_weights
Definition: global_data.h:495
float initial_t
Definition: global_data.h:530
VW::config::options_i * options
Definition: global_data.h:428
float initial_weight
Definition: global_data.h:409
time_t init_time
Definition: global_data.h:533
float power_t
Definition: global_data.h:447
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
void finish(vw &all, bool delete_all)
Definition: parse_args.cc:1823
parser * p
Definition: global_data.h:377
AllReduce * all_reduce
Definition: global_data.h:381
shared_data * sd
Definition: global_data.h:375
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
std::string feature_mask
Definition: global_data.h:438
AllReduceType all_reduce_type
Definition: global_data.h:380
bool random_weights
Definition: global_data.h:492
float eta
Definition: global_data.h:531
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
std::vector< std::string > initial_regressors
Definition: global_data.h:436
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
void * trace_context
trace_message_t trace_listener
Definition: parser.h:38
std::string per_feature_regularizer_input
Definition: global_data.h:440
#define THROW(args)
Definition: vw_exception.h:181
bool normal_weights
Definition: global_data.h:494
float eta_decay_rate
Definition: global_data.h:532

◆ parse_diagnostics()

void parse_diagnostics ( options_i options,
vw all 
)

Definition at line 357 of file parse_args.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), vw::all_reduce, vw::audit, shared_data::dump_interval, VW::git_commit(), VW::config::make_option(), vw::progress_add, vw::progress_arg, AllReduce::quiet, vw::quiet, vw::sd, VW::version_struct::to_string(), vw::trace_message, VW::version(), and VW::config::options_i::was_supplied().

Referenced by parse_args().

358 {
359  bool version_arg = false;
360  bool help = false;
361  std::string progress_arg;
362  option_group_definition diagnostic_group("Diagnostic options");
363  diagnostic_group.add(make_option("version", version_arg).help("Version information"))
364  .add(make_option("audit", all.audit).short_name("a").help("print weights of features"))
365  .add(make_option("progress", progress_arg)
366  .short_name("P")
367  .help("Progress update frequency. int: additive, float: multiplicative"))
368  .add(make_option("quiet", all.quiet).help("Don't output disgnostics and progress updates"))
369  .add(make_option("help", help).short_name("h").help("Look here: http://hunch.net/~vw/ and click on Tutorial."));
370 
371  options.add_and_parse(diagnostic_group);
372 
373  // pass all.quiet around
374  if (all.all_reduce)
375  all.all_reduce->quiet = all.quiet;
376 
377  // Upon direct query for version -- spit it out to stdout
378  if (version_arg)
379  {
380  cout << VW::version.to_string() << " (git commit: " << VW::git_commit << ")\n";
381  exit(0);
382  }
383 
384  if (options.was_supplied("progress") && !all.quiet)
385  {
386  all.progress_arg = (float)::atof(progress_arg.c_str());
387  // --progress interval is dual: either integer or floating-point
388  if (progress_arg.find_first_of(".") == std::string::npos)
389  {
390  // No "." in arg: assume integer -> additive
391  all.progress_add = true;
392  if (all.progress_arg < 1)
393  {
394  all.trace_message << "warning: additive --progress <int>"
395  << " can't be < 1: forcing to 1" << endl;
396  all.progress_arg = 1;
397  }
398  all.sd->dump_interval = all.progress_arg;
399  }
400  else
401  {
402  // A "." in arg: assume floating-point -> multiplicative
403  all.progress_add = false;
404 
405  if (all.progress_arg <= 1.0)
406  {
407  all.trace_message << "warning: multiplicative --progress <float>: " << progress_arg << " is <= 1.0: adding 1.0"
408  << endl;
409  all.progress_arg += 1.0;
410  }
411  else if (all.progress_arg > 9.0)
412  {
413  all.trace_message << "warning: multiplicative --progress <float>"
414  << " is > 9.0: you probably meant to use an integer" << endl;
415  }
416  all.sd->dump_interval = 1.0;
417  }
418  }
419 }
const std::string git_commit(COMMIT_VERSION)
bool quiet
Definition: allreduce.h:82
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
bool progress_add
Definition: global_data.h:545
const version_struct version(PACKAGE_VERSION)
AllReduce * all_reduce
Definition: global_data.h:381
shared_data * sd
Definition: global_data.h:375
float progress_arg
Definition: global_data.h:546
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
std::string to_string() const
Definition: version.cc:97
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool audit
Definition: global_data.h:486
float dump_interval
Definition: global_data.h:147

◆ parse_dictionary_argument()

void parse_dictionary_argument ( vw all,
std::string  str 
)

Definition at line 158 of file parse_args.cc.

References VW::alloc_examples(), substring::begin, c, v_array< T >::clear(), io_buf::close_file(), VW::dealloc_example(), features::deep_copy_from(), label_parser::delete_label, vw::dictionary_path, substring::end, ends_with(), example_predict::feature_space, find_in_path(), v_hashmap< K, V >::get(), hash_file_contents(), example_predict::indices, v_hashmap< K, V >::init(), label_parser::label_size, vw::loaded_dictionaries, parser::lp, dictionary_info::name, vw::namespace_dictionaries, io_buf::open_file(), vw::p, v_hashmap< K, V >::put(), quadratic_constant, vw::quiet, io_buf::READ, io_buf::read_file(), VW::read_line(), v_hashmap< K, V >::size(), vw::stdin_off, substring_equal(), example::tag, THROW, vw::trace_message, and uniform_hash().

Referenced by VW::initialize().

159 {
160  if (str.length() == 0)
161  return;
162  // expecting 'namespace:file', for instance 'w:foo.txt'
163  // in the case of just 'foo.txt' it's applied to the default namespace
164 
165  char ns = ' ';
166  const char* s = str.c_str();
167  if ((str.length() > 2) && (str[1] == ':'))
168  {
169  ns = str[0];
170  s += 2;
171  }
172 
173  std::string fname = find_in_path(all.dictionary_path, std::string(s));
174  if (fname == "")
175  THROW("error: cannot find dictionary '" << s << "' in path; try adding --dictionary_path");
176 
177  bool is_gzip = ends_with(fname, ".gz");
178  io_buf* io = is_gzip ? new comp_io_buf : new io_buf;
179  int fd = io->open_file(fname.c_str(), all.stdin_off, io_buf::READ);
180  if (fd < 0)
181  THROW("error: cannot read dictionary from file '" << fname << "'"
182  << ", opening failed");
183 
184  uint64_t fd_hash = hash_file_contents(io, fd);
185  io->close_file();
186 
187  if (!all.quiet)
188  all.trace_message << "scanned dictionary '" << s << "' from '" << fname << "', hash=" << std::hex << fd_hash
189  << std::dec << endl;
190 
191  // see if we've already read this dictionary
192  for (size_t id = 0; id < all.loaded_dictionaries.size(); id++)
193  if (all.loaded_dictionaries[id].file_hash == fd_hash)
194  {
195  all.namespace_dictionaries[(size_t)ns].push_back(all.loaded_dictionaries[id].dict);
196  io->close_file();
197  delete io;
198  return;
199  }
200 
201  fd = io->open_file(fname.c_str(), all.stdin_off, io_buf::READ);
202  if (fd < 0)
203  {
204  delete io;
205  THROW("error: cannot re-read dictionary from file '" << fname << "'"
206  << ", opening failed");
207  }
208 
209  feature_dict* map = &calloc_or_throw<feature_dict>();
210  map->init(1023, nullptr, substring_equal);
211  example* ec = VW::alloc_examples(all.p->lp.label_size, 1);
212 
213  size_t def = (size_t)' ';
214 
215  ssize_t size = 2048, pos, nread;
216  char rc;
217  char* buffer = calloc_or_throw<char>(size);
218  do
219  {
220  pos = 0;
221  do
222  {
223  nread = io->read_file(fd, &rc, 1);
224  if ((rc != EOF) && (nread > 0))
225  buffer[pos++] = rc;
226  if (pos >= size - 1)
227  {
228  size *= 2;
229  const auto new_buffer = (char*)(realloc(buffer, size));
230  if (new_buffer == nullptr)
231  {
232  free(buffer);
233  free(ec);
235  delete map;
236  io->close_file();
237  delete io;
238  THROW("error: memory allocation failed in reading dictionary");
239  }
240  else
241  buffer = new_buffer;
242  }
243  } while ((rc != EOF) && (rc != '\n') && (nread > 0));
244  buffer[pos] = 0;
245 
246  // we now have a line in buffer
247  char* c = buffer;
248  while (*c == ' ' || *c == '\t') ++c; // skip initial whitespace
249  char* d = c;
250  while (*d != ' ' && *d != '\t' && *d != '\n' && *d != '\0') ++d; // gobble up initial word
251  if (d == c)
252  continue; // no word
253  if (*d != ' ' && *d != '\t')
254  continue; // reached end of line
255  char* word = calloc_or_throw<char>(d - c);
256  memcpy(word, c, d - c);
257  substring ss = {word, word + (d - c)};
258  uint64_t hash = uniform_hash(ss.begin, ss.end - ss.begin, quadratic_constant);
259  if (map->get(ss, hash) != nullptr) // don't overwrite old values!
260  {
261  free(word);
262  continue;
263  }
264  d--;
265  *d = '|'; // set up for parser::read_line
266  VW::read_line(all, ec, d);
267  // now we just need to grab stuff from the default namespace of ec!
268  if (ec->feature_space[def].size() == 0)
269  {
270  free(word);
271  continue;
272  }
273  features* arr = new features;
274  arr->deep_copy_from(ec->feature_space[def]);
275  map->put(ss, hash, arr);
276 
277  // clear up ec
278  ec->tag.clear();
279  ec->indices.clear();
280  for (size_t i = 0; i < 256; i++)
281  {
282  ec->feature_space[i].clear();
283  }
284  } while ((rc != EOF) && (nread > 0));
285  free(buffer);
286  io->close_file();
287  delete io;
289  free(ec);
290 
291  if (!all.quiet)
292  all.trace_message << "dictionary " << s << " contains " << map->size() << " item" << (map->size() == 1 ? "" : "s")
293  << endl;
294 
295  all.namespace_dictionaries[(size_t)ns].push_back(map);
296  dictionary_info info = {calloc_or_throw<char>(strlen(s) + 1), fd_hash, map};
297  strcpy(info.name, s);
298  all.loaded_dictionaries.push_back(info);
299 }
v_array< char > tag
Definition: example.h:63
v_array< namespace_index > indices
std::array< std::vector< feature_dict * >, NUM_NAMESPACES > namespace_dictionaries
Definition: global_data.h:482
void deep_copy_from(const features &src)
void read_line(vw &all, example *ex, char *line)
void(* delete_label)(void *)
Definition: label_parser.h:16
virtual bool close_file()
Definition: io_buf.h:204
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
constexpr int quadratic_constant
Definition: constant.h:7
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
Definition: example.cc:219
the core definition of a set of features.
std::vector< dictionary_info > loaded_dictionaries
Definition: global_data.h:483
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
bool quiet
Definition: global_data.h:487
virtual ssize_t read_file(int f, void *buf, size_t nbytes)
Definition: io_buf.h:167
static constexpr int READ
Definition: io_buf.h:71
example * alloc_examples(size_t, size_t count=1)
Definition: example.cc:204
parser * p
Definition: global_data.h:377
std::array< features, NUM_NAMESPACES > feature_space
virtual int open_file(const char *name, bool stdin_off)
Definition: io_buf.h:90
std::string find_in_path(std::vector< std::string > paths, std::string fname)
Definition: parse_args.cc:141
void clear()
Definition: v_array.h:88
vw_ostream trace_message
Definition: global_data.h:424
Definition: io_buf.h:54
std::vector< std::string > dictionary_path
Definition: global_data.h:478
size_t label_size
Definition: label_parser.h:23
void init(size_t min_size, const V &def, bool(*eq)(const K &, const K &))
Definition: v_hashmap.h:54
bool substring_equal(const substring &a, const substring &b)
bool ends_with(std::string const &fullString, std::string const &ending)
Definition: parse_args.cc:100
bool stdin_off
Definition: global_data.h:527
void put(const K &key, uint64_t hash, const V &val)
Definition: v_hashmap.h:275
#define THROW(args)
Definition: vw_exception.h:181
V & get(const K &key, uint64_t hash)
Definition: v_hashmap.h:203
constexpr uint64_t c
Definition: rand48.cc:12
uint64_t hash_file_contents(io_buf *io, int f)
Definition: parse_args.cc:112
size_t size()
Definition: v_hashmap.h:281
label_parser lp
Definition: parser.h:102

◆ parse_example_tweaks()

void parse_example_tweaks ( options_i options,
vw all 
)

Definition at line 1010 of file parse_args.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), vw::eta, f, namedlabels::getK(), getLossFunction(), vw::holdout_after, vw::holdout_period, vw::holdout_set_off, vw::l1_lambda, vw::l2_lambda, vw::lda, shared_data::ldict, vw::loss, VW::config::make_option(), vw::max_examples, shared_data::max_label, shared_data::min_label, vw::no_bias, noop_mm(), vw::numpasses, vw::p, vw::pass_length, vw::quiet, vw::reg_mode, vw::sd, vw::set_minmax, parser::sort_features, vw::trace_message, vw::training, and VW::config::options_i::was_supplied().

Referenced by parse_modules().

1011 {
1012  std::string named_labels;
1013  std::string loss_function;
1014  float loss_parameter = 0.0;
1015  size_t early_terminate_passes;
1016  bool test_only = false;
1017 
1018  option_group_definition example_options("Example options");
1019  example_options.add(make_option("testonly", test_only).short_name("t").help("Ignore label information and just test"))
1020  .add(make_option("holdout_off", all.holdout_set_off).help("no holdout data in multiple passes"))
1021  .add(make_option("holdout_period", all.holdout_period).default_value(10).help("holdout period for test only"))
1022  .add(make_option("holdout_after", all.holdout_after)
1023  .help("holdout after n training examples, default off (disables holdout_period)"))
1024  .add(
1025  make_option("early_terminate", early_terminate_passes)
1026  .default_value(3)
1027  .help(
1028  "Specify the number of passes tolerated when holdout loss doesn't decrease before early termination"))
1029  .add(make_option("passes", all.numpasses).help("Number of Training Passes"))
1030  .add(make_option("initial_pass_length", all.pass_length).help("initial number of examples per pass"))
1031  .add(make_option("examples", all.max_examples).help("number of examples to parse"))
1032  .add(make_option("min_prediction", all.sd->min_label).help("Smallest prediction to output"))
1033  .add(make_option("max_prediction", all.sd->max_label).help("Largest prediction to output"))
1034  .add(make_option("sort_features", all.p->sort_features)
1035  .help("turn this on to disregard order in which features have been defined. This will lead to smaller "
1036  "cache sizes"))
1037  .add(make_option("loss_function", loss_function)
1038  .default_value("squared")
1039  .help("Specify the loss function to be used, uses squared by default. Currently available ones are "
1040  "squared, classic, hinge, logistic, quantile and poisson."))
1041  .add(make_option("quantile_tau", loss_parameter)
1042  .default_value(0.5f)
1043  .help("Parameter \\tau associated with Quantile loss. Defaults to 0.5"))
1044  .add(make_option("l1", all.l1_lambda).help("l_1 lambda"))
1045  .add(make_option("l2", all.l2_lambda).help("l_2 lambda"))
1046  .add(make_option("no_bias_regularization", all.no_bias).help("no bias in regularization"))
1047  .add(make_option("named_labels", named_labels)
1048  .keep()
1049  .help("use names for labels (multiclass, etc.) rather than integers, argument specified all possible "
1050  "labels, comma-sep, eg \"--named_labels Noun,Verb,Adj,Punc\""));
1051  options.add_and_parse(example_options);
1052 
1053  if (test_only || all.eta == 0.)
1054  {
1055  if (!all.quiet)
1056  all.trace_message << "only testing" << endl;
1057  all.training = false;
1058  if (all.lda > 0)
1059  all.eta = 0;
1060  }
1061  else
1062  all.training = true;
1063 
1064  if ((all.numpasses > 1 || all.holdout_after > 0) && !all.holdout_set_off)
1065  all.holdout_set_off = false; // holdout is on unless explicitly off
1066  else
1067  all.holdout_set_off = true;
1068 
1069  if (options.was_supplied("min_prediction") || options.was_supplied("max_prediction") || test_only)
1070  all.set_minmax = noop_mm;
1071 
1072  if (options.was_supplied("named_labels"))
1073  {
1074  all.sd->ldict = &calloc_or_throw<namedlabels>();
1075  new (all.sd->ldict) namedlabels(named_labels);
1076  if (!all.quiet)
1077  all.trace_message << "parsed " << all.sd->ldict->getK() << " named labels" << endl;
1078  }
1079 
1080  all.loss = getLossFunction(all, loss_function, loss_parameter);
1081 
1082  if (all.l1_lambda < 0.)
1083  {
1084  all.trace_message << "l1_lambda should be nonnegative: resetting from " << all.l1_lambda << " to 0" << endl;
1085  all.l1_lambda = 0.;
1086  }
1087  if (all.l2_lambda < 0.)
1088  {
1089  all.trace_message << "l2_lambda should be nonnegative: resetting from " << all.l2_lambda << " to 0" << endl;
1090  all.l2_lambda = 0.;
1091  }
1092  all.reg_mode += (all.l1_lambda > 0.) ? 1 : 0;
1093  all.reg_mode += (all.l2_lambda > 0.) ? 2 : 0;
1094  if (!all.quiet)
1095  {
1096  if (all.reg_mode % 2 && !options.was_supplied("bfgs"))
1097  all.trace_message << "using l1 regularization = " << all.l1_lambda << endl;
1098  if (all.reg_mode > 1)
1099  all.trace_message << "using l2 regularization = " << all.l2_lambda << endl;
1100  }
1101 }
uint32_t holdout_after
Definition: global_data.h:502
loss_function * loss
Definition: global_data.h:523
namedlabels * ldict
Definition: global_data.h:153
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
bool holdout_set_off
Definition: global_data.h:499
bool training
Definition: global_data.h:488
uint32_t lda
Definition: global_data.h:508
parser * p
Definition: global_data.h:377
void(* set_minmax)(shared_data *sd, float label)
Definition: global_data.h:394
void noop_mm(shared_data *, float)
Definition: global_data.cc:135
bool no_bias
Definition: global_data.h:446
shared_data * sd
Definition: global_data.h:375
float l2_lambda
Definition: global_data.h:445
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
size_t numpasses
Definition: global_data.h:451
float eta
Definition: global_data.h:531
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
float min_label
Definition: global_data.h:150
size_t pass_length
Definition: global_data.h:450
uint32_t getK()
Definition: global_data.h:106
float max_label
Definition: global_data.h:151
bool sort_features
Definition: parser.h:77
size_t max_examples
Definition: global_data.h:539
float l1_lambda
Definition: global_data.h:444
loss_function * getLossFunction(vw &all, std::string funcName, float function_parameter)
float f
Definition: cache.cc:40
uint32_t holdout_period
Definition: global_data.h:501
int reg_mode
Definition: global_data.h:448

◆ parse_feature_tweaks()

void parse_feature_tweaks ( options_i options,
vw all,
std::vector< std::string > &  dictionary_nses 
)

Definition at line 600 of file parse_args.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), vw::add_constant, c, compile_gram(), compile_limits(), vw::default_bits, vw::dictionary_path, directory_exists(), INTERACTIONS::expand_interactions(), getHasher(), vw::hash_seed, parser::hasher, id(), vw::ignore, vw::ignore_linear, vw::ignore_some, vw::ignore_some_linear, vw::initial_constant, vw::interactions, interactions_settings_doubled, vw::limit, vw::limit_strings, VW::config::make_option(), vw::ngram, vw::ngram_strings, vw::num_bits, vw::p, vw::pairs, parse_affix_argument(), vw::permutations, vw::quiet, vw::redefine, vw::redefine_some, vw::skip_strings, vw::skips, INTERACTIONS::sort_and_filter_duplicate_interactions(), vw::spelling_features, spoof_hex_encoded_namespaces(), THROW, vw::trace_message, vw::triples, VW::validate_num_bits(), and VW::config::options_i::was_supplied().

Referenced by parse_modules().

601 {
602  std::string hash_function("strings");
603  uint32_t new_bits;
604  std::vector<std::string> spelling_ns;
605  std::vector<std::string> quadratics;
606  std::vector<std::string> cubics;
607  std::vector<std::string> interactions;
608  std::vector<std::string> ignores;
609  std::vector<std::string> ignore_linears;
610  std::vector<std::string> keeps;
611  std::vector<std::string> redefines;
612 
613  std::vector<std::string> dictionary_path;
614 
615  bool noconstant;
616  bool leave_duplicate_interactions;
617  std::string affix;
618  std::string q_colon;
619 
620  option_group_definition feature_options("Feature options");
621  feature_options
622  .add(make_option("hash", hash_function).keep().help("how to hash the features. Available options: strings, all"))
623  .add(make_option("hash_seed", all.hash_seed).keep().default_value(0).help("seed for hash function"))
624  .add(make_option("ignore", ignores).keep().help("ignore namespaces beginning with character <arg>"))
625  .add(make_option("ignore_linear", ignore_linears)
626  .keep()
627  .help("ignore namespaces beginning with character <arg> for linear terms only"))
628  .add(make_option("keep", keeps).keep().help("keep namespaces beginning with character <arg>"))
629  .add(make_option("redefine", redefines)
630  .keep()
631  .help("redefine namespaces beginning with characters of std::string S as namespace N. <arg> shall be in "
632  "form "
633  "'N:=S' where := is operator. Empty N or S are treated as default namespace. Use ':' as a "
634  "wildcard in S.")
635  .keep())
636  .add(make_option("bit_precision", new_bits).short_name("b").help("number of bits in the feature table"))
637  .add(make_option("noconstant", noconstant).help("Don't add a constant feature"))
638  .add(make_option("constant", all.initial_constant).short_name("C").help("Set initial value of constant"))
639  .add(make_option("ngram", all.ngram_strings)
640  .help("Generate N grams. To generate N grams for a single namespace 'foo', arg should be fN."))
641  .add(make_option("skips", all.skip_strings)
642  .help("Generate skips in N grams. This in conjunction with the ngram tag can be used to generate "
643  "generalized n-skip-k-gram. To generate n-skips for a single namespace 'foo', arg should be fN."))
644  .add(make_option("feature_limit", all.limit_strings)
645  .help("limit to N features. To apply to a single namespace 'foo', arg should be fN"))
646  .add(make_option("affix", affix)
647  .keep()
648  .help("generate prefixes/suffixes of features; argument '+2a,-3b,+1' means generate 2-char prefixes for "
649  "namespace a, 3-char suffixes for b and 1 char prefixes for default namespace"))
650  .add(make_option("spelling", spelling_ns)
651  .keep()
652  .help("compute spelling features for a give namespace (use '_' for default namespace)"))
653  .add(make_option("dictionary", dictionary_nses)
654  .keep()
655  .help("read a dictionary for additional features (arg either 'x:file' or just 'file')"))
656  .add(make_option("dictionary_path", dictionary_path)
657  .help("look in this directory for dictionaries; defaults to current directory or env{PATH}"))
658  .add(make_option("interactions", interactions)
659  .keep()
660  .help("Create feature interactions of any level between namespaces."))
661  .add(make_option("permutations", all.permutations)
662  .help("Use permutations instead of combinations for feature interactions of same namespace."))
663  .add(make_option("leave_duplicate_interactions", leave_duplicate_interactions)
664  .help("Don't remove interactions with duplicate combinations of namespaces. For ex. this is a "
665  "duplicate: '-q ab -q ba' and a lot more in '-q ::'."))
666  .add(make_option("quadratic", quadratics).short_name("q").keep().help("Create and use quadratic features"))
667  // TODO this option is unused - remove?
668  .add(make_option("q:", q_colon).help(": corresponds to a wildcard for all printable characters"))
669  .add(make_option("cubic", cubics).keep().help("Create and use cubic features"));
670  options.add_and_parse(feature_options);
671 
672  // feature manipulation
673  all.p->hasher = getHasher(hash_function);
674 
675  if (options.was_supplied("spelling"))
676  {
677  for (size_t id = 0; id < spelling_ns.size(); id++)
678  {
679  spelling_ns[id] = spoof_hex_encoded_namespaces(spelling_ns[id]);
680  if (spelling_ns[id][0] == '_')
681  all.spelling_features[(unsigned char)' '] = true;
682  else
683  all.spelling_features[(size_t)spelling_ns[id][0]] = true;
684  }
685  }
686 
687  if (options.was_supplied("affix"))
689 
690  if (options.was_supplied("ngram"))
691  {
692  if (options.was_supplied("sort_features"))
693  THROW("ngram is incompatible with sort_features.");
694 
695  for (size_t i = 0; i < all.ngram_strings.size(); i++)
697  compile_gram(all.ngram_strings, all.ngram, (char*)"grams", all.quiet);
698  }
699 
700  if (options.was_supplied("skips"))
701  {
702  if (!options.was_supplied("ngram"))
703  THROW("You can not skip unless ngram is > 1");
704 
705  for (size_t i = 0; i < all.skip_strings.size(); i++)
707  compile_gram(all.skip_strings, all.skips, (char*)"skips", all.quiet);
708  }
709 
710  if (options.was_supplied("feature_limit"))
711  compile_limits(all.limit_strings, all.limit, all.quiet);
712 
713  if (options.was_supplied("bit_precision"))
714  {
715  if (all.default_bits == false && new_bits != all.num_bits)
716  THROW("Number of bits is set to " << new_bits << " and " << all.num_bits
717  << " by argument and model. That does not work.");
718 
719  all.default_bits = false;
720  all.num_bits = new_bits;
721 
723  }
724 
725  // prepare namespace interactions
726  std::vector<std::string> expanded_interactions;
727 
728  if ( ( ((!all.pairs.empty() || !all.triples.empty() || !all.interactions.empty()) && /*data was restored from old model file directly to v_array and will be overriden automatically*/
729  (options.was_supplied("quadratic") || options.was_supplied("cubic") || options.was_supplied("interactions")) ) )
730  ||
731  interactions_settings_doubled /*settings were restored from model file to file_options and overriden by params from command line*/)
732  {
733  all.trace_message << "WARNING: model file has set of {-q, --cubic, --interactions} settings stored, but they'll be "
734  "OVERRIDEN by set of {-q, --cubic, --interactions} settings from command line."
735  << endl;
736 
737  // in case arrays were already filled in with values from old model file - reset them
738  if (!all.pairs.empty())
739  all.pairs.clear();
740  if (!all.triples.empty())
741  all.triples.clear();
742  if (!all.interactions.empty())
743  all.interactions.clear();
744  }
745 
746  if (options.was_supplied("quadratic"))
747  {
748  if (!all.quiet)
749  all.trace_message << "creating quadratic features for pairs: ";
750 
751  for (std::vector<std::string>::iterator i = quadratics.begin(); i != quadratics.end(); ++i)
752  {
754  if (!all.quiet)
755  all.trace_message << *i << " ";
756  }
757 
758  expanded_interactions =
759  INTERACTIONS::expand_interactions(quadratics, 2, "error, quadratic features must involve two sets.");
760 
761  if (!all.quiet)
762  all.trace_message << endl;
763  }
764 
765  if (options.was_supplied("cubic"))
766  {
767  if (!all.quiet)
768  all.trace_message << "creating cubic features for triples: ";
769  for (std::vector<std::string>::iterator i = cubics.begin(); i != cubics.end(); ++i)
770  {
772  if (!all.quiet)
773  all.trace_message << *i << " ";
774  }
775 
776  std::vector<std::string> exp_cubic =
777  INTERACTIONS::expand_interactions(cubics, 3, "error, cubic features must involve three sets.");
778  expanded_interactions.insert(std::begin(expanded_interactions), std::begin(exp_cubic), std::end(exp_cubic));
779 
780  if (!all.quiet)
781  all.trace_message << endl;
782  }
783 
784  if (options.was_supplied("interactions"))
785  {
786  if (!all.quiet)
787  all.trace_message << "creating features for following interactions: ";
788  for (std::vector<std::string>::iterator i = interactions.begin(); i != interactions.end(); ++i)
789  {
791  if (!all.quiet)
792  all.trace_message << *i << " ";
793  }
794 
795  std::vector<std::string> exp_inter = INTERACTIONS::expand_interactions(interactions, 0, "");
796  expanded_interactions.insert(std::begin(expanded_interactions), std::begin(exp_inter), std::end(exp_inter));
797 
798  if (!all.quiet)
799  all.trace_message << endl;
800  }
801 
802  if (expanded_interactions.size() > 0)
803  {
804  size_t removed_cnt;
805  size_t sorted_cnt;
807  expanded_interactions, !leave_duplicate_interactions, removed_cnt, sorted_cnt);
808 
809  if (removed_cnt > 0)
810  all.trace_message << "WARNING: duplicate namespace interactions were found. Removed: " << removed_cnt << '.'
811  << endl
812  << "You can use --leave_duplicate_interactions to disable this behaviour." << endl;
813  if (sorted_cnt > 0)
814  all.trace_message << "WARNING: some interactions contain duplicate characters and their characters order has "
815  "been changed. Interactions affected: "
816  << sorted_cnt << '.' << endl;
817 
818  if (all.interactions.size() > 0)
819  {
820  // should be empty, but just in case...
821  all.interactions.clear();
822  }
823 
824  all.interactions = expanded_interactions;
825 
826  // copy interactions of size 2 and 3 to old vectors for backward compatibility
827  for (auto& i : expanded_interactions)
828  {
829  const size_t len = i.size();
830  if (len == 2)
831  all.pairs.push_back(i);
832  else if (len == 3)
833  all.triples.push_back(i);
834  }
835  }
836 
837  for (size_t i = 0; i < 256; i++)
838  {
839  all.ignore[i] = false;
840  all.ignore_linear[i] = false;
841  }
842  all.ignore_some = false;
843  all.ignore_some_linear = false;
844 
845  if (options.was_supplied("ignore"))
846  {
847  all.ignore_some = true;
848 
849  for (std::vector<std::string>::iterator i = ignores.begin(); i != ignores.end(); i++)
850  {
852  for (std::string::const_iterator j = i->begin(); j != i->end(); j++) all.ignore[(size_t)(unsigned char)*j] = true;
853  }
854 
855  if (!all.quiet)
856  {
857  all.trace_message << "ignoring namespaces beginning with: ";
858  for (auto const& ignore : ignores)
859  for (auto const character : ignore) all.trace_message << character << " ";
860 
861  all.trace_message << endl;
862  }
863  }
864 
865  if (options.was_supplied("ignore_linear"))
866  {
867  all.ignore_some_linear = true;
868 
869  for (std::vector<std::string>::iterator i = ignore_linears.begin(); i != ignore_linears.end(); i++)
870  {
872  for (std::string::const_iterator j = i->begin(); j != i->end(); j++)
873  all.ignore_linear[(size_t)(unsigned char)*j] = true;
874  }
875 
876  if (!all.quiet)
877  {
878  all.trace_message << "ignoring linear terms for namespaces beginning with: ";
879  for (auto const& ignore : ignore_linears)
880  for (auto const character : ignore) all.trace_message << character << " ";
881 
882  all.trace_message << endl;
883  }
884  }
885 
886  if (options.was_supplied("keep"))
887  {
888  for (size_t i = 0; i < 256; i++) all.ignore[i] = true;
889 
890  all.ignore_some = true;
891 
892  for (std::vector<std::string>::iterator i = keeps.begin(); i != keeps.end(); i++)
893  {
895  for (std::string::const_iterator j = i->begin(); j != i->end(); j++)
896  all.ignore[(size_t)(unsigned char)*j] = false;
897  }
898 
899  if (!all.quiet)
900  {
901  all.trace_message << "using namespaces beginning with: ";
902  for (auto const& keep : keeps)
903  for (auto const character : keep) all.trace_message << character << " ";
904 
905  all.trace_message << endl;
906  }
907  }
908 
909  // --redefine param code
910  all.redefine_some = false; // false by default
911 
912  if (options.was_supplied("redefine"))
913  {
914  // initail values: i-th namespace is redefined to i itself
915  for (size_t i = 0; i < 256; i++) all.redefine[i] = (unsigned char)i;
916 
917  // note: --redefine declaration order is matter
918  // so --redefine :=L --redefine ab:=M --ignore L will ignore all except a and b under new M namspace
919 
920  for (std::vector<std::string>::iterator arg_iter = redefines.begin(); arg_iter != redefines.end(); arg_iter++)
921  {
922  std::string argument = spoof_hex_encoded_namespaces(*arg_iter);
923  size_t arg_len = argument.length();
924 
925  size_t operator_pos = 0; // keeps operator pos + 1 to stay unsigned type
926  bool operator_found = false;
927  unsigned char new_namespace = ' ';
928 
929  // let's find operator ':=' position in N:=S
930  for (size_t i = 0; i < arg_len; i++)
931  {
932  if (operator_found)
933  {
934  if (i > 2)
935  {
936  new_namespace = argument[0];
937  } // N is not empty
938  break;
939  }
940  else if (argument[i] == ':')
941  operator_pos = i + 1;
942  else if ((argument[i] == '=') && (operator_pos == i))
943  operator_found = true;
944  }
945 
946  if (!operator_found)
947  THROW("argument of --redefine is malformed. Valid format is N:=S, :=S or N:=");
948 
949  if (++operator_pos > 3) // seek operator end
950  all.trace_message
951  << "WARNING: multiple namespaces are used in target part of --redefine argument. Only first one ('"
952  << new_namespace << "') will be used as target namespace." << endl;
953 
954  all.redefine_some = true;
955 
956  // case ':=S' doesn't require any additional code as new_namespace = ' ' by default
957 
958  if (operator_pos == arg_len) // S is empty, default namespace shall be used
959  all.redefine[(int)' '] = new_namespace;
960  else
961  for (size_t i = operator_pos; i < arg_len; i++)
962  {
963  // all namespaces from S are redefined to N
964  unsigned char c = argument[i];
965  if (c != ':')
966  all.redefine[c] = new_namespace;
967  else
968  {
969  // wildcard found: redefine all except default and break
970  for (size_t i = 0; i < 256; i++) all.redefine[i] = new_namespace;
971  break; // break processing S
972  }
973  }
974  }
975  }
976 
977  if (options.was_supplied("dictionary"))
978  {
979  if (options.was_supplied("dictionary_path"))
980  for (std::string path : dictionary_path)
981  if (directory_exists(path))
982  all.dictionary_path.push_back(path);
983  if (directory_exists("."))
984  all.dictionary_path.push_back(".");
985 
986  const std::string PATH = getenv("PATH");
987 #if _WIN32
988  const char delimiter = ';';
989 #else
990  const char delimiter = ':';
991 #endif
992  if (!PATH.empty())
993  {
994  size_t previous = 0;
995  size_t index = PATH.find(delimiter);
996  while (index != std::string::npos)
997  {
998  all.dictionary_path.push_back(PATH.substr(previous, index - previous));
999  previous = index + 1;
1000  index = PATH.find(delimiter, previous);
1001  }
1002  all.dictionary_path.push_back(PATH.substr(previous));
1003  }
1004  }
1005 
1006  if (noconstant)
1007  all.add_constant = false;
1008 }
std::vector< std::string > skip_strings
Definition: global_data.h:470
std::array< uint32_t, NUM_NAMESPACES > skips
Definition: global_data.h:472
bool ignore_some_linear
Definition: global_data.h:464
std::array< bool, NUM_NAMESPACES > spelling_features
Definition: global_data.h:477
std::vector< std::string > pairs
Definition: global_data.h:459
std::array< uint32_t, NUM_NAMESPACES > ngram
Definition: global_data.h:471
std::vector< std::string > ngram_strings
Definition: global_data.h:469
bool add_constant
Definition: global_data.h:496
bool redefine_some
Definition: global_data.h:467
void compile_gram(std::vector< std::string > grams, std::array< uint32_t, NUM_NAMESPACES > &dest, char *descriptor, bool quiet)
Definition: global_data.cc:191
v_array< act_score > path
Definition: search_meta.cc:53
bool quiet
Definition: global_data.h:487
std::vector< std::string > limit_strings
Definition: global_data.h:473
hash_func_t hasher
Definition: parser.h:73
virtual void add_and_parse(const option_group_definition &group)=0
std::string spoof_hex_encoded_namespaces(const std::string &arg)
Definition: parse_args.cc:568
hash_func_t getHasher(const std::string &s)
uint32_t num_bits
Definition: global_data.h:398
std::array< bool, NUM_NAMESPACES > ignore
Definition: global_data.h:463
parser * p
Definition: global_data.h:377
std::array< uint32_t, NUM_NAMESPACES > limit
Definition: global_data.h:474
bool ignore_some
Definition: global_data.h:462
float initial_constant
Definition: global_data.h:410
float id(float in)
Definition: scorer.cc:51
std::array< bool, NUM_NAMESPACES > ignore_linear
Definition: global_data.h:465
void compile_limits(std::vector< std::string > limits, std::array< uint32_t, NUM_NAMESPACES > &dest, bool quiet)
Definition: global_data.cc:216
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
std::vector< std::string > expand_interactions(const std::vector< std::string > &vec, const size_t required_length, const std::string &err_msg)
Definition: interactions.cc:56
std::array< unsigned char, NUM_NAMESPACES > redefine
Definition: global_data.h:468
bool default_bits
Definition: global_data.h:399
std::vector< std::string > triples
Definition: global_data.h:461
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
std::vector< std::string > dictionary_path
Definition: global_data.h:478
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool directory_exists(std::string path)
Definition: parse_args.cc:130
std::vector< std::string > interactions
Definition: global_data.h:457
uint32_t hash_seed
Definition: global_data.h:401
bool permutations
Definition: global_data.h:454
void validate_num_bits(vw &all)
Definition: vw_validate.cc:32
void sort_and_filter_duplicate_interactions(std::vector< std::string > &vec, bool filter_duplicates, size_t &removed_cnt, size_t &sorted_cnt)
#define THROW(args)
Definition: vw_exception.h:181
constexpr uint64_t c
Definition: rand48.cc:12
void parse_affix_argument(vw &all, std::string str)
Definition: parse_args.cc:301
bool interactions_settings_doubled
Definition: parse_args.cc:498

◆ parse_modules()

void parse_modules ( options_i options,
vw all,
std::vector< std::string > &  dictionary_nses 
)

Definition at line 1523 of file parse_args.cc.

References VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), vw::eta, vw::eta_decay_rate, vw::get_random_state(), VW::config::make_option(), vw::num_bits, vw::numpasses, parse_example_tweaks(), parse_feature_tweaks(), parse_output_model(), parse_output_preds(), parse_reductions(), vw::power_t, vw::quiet, vw::random_seed, vw::sd, shared_data::t, and vw::trace_message.

Referenced by VW::initialize().

1524 {
1525  option_group_definition rand_options("Randomization options");
1526  rand_options.add(make_option("random_seed", all.random_seed).help("seed random number generator"));
1527  options.add_and_parse(rand_options);
1528  all.get_random_state()->set_random_state(all.random_seed);
1529 
1530  parse_feature_tweaks(options, all, dictionary_nses); // feature tweaks
1531 
1532  parse_example_tweaks(options, all); // example manipulation
1533 
1534  parse_output_model(options, all);
1535 
1536  parse_output_preds(options, all);
1537 
1538  parse_reductions(options, all);
1539 
1540  if (!all.quiet)
1541  {
1542  all.trace_message << "Num weight bits = " << all.num_bits << endl;
1543  all.trace_message << "learning rate = " << all.eta << endl;
1544  all.trace_message << "initial_t = " << all.sd->t << endl;
1545  all.trace_message << "power_t = " << all.power_t << endl;
1546  if (all.numpasses > 1)
1547  all.trace_message << "decay_learning_rate = " << all.eta_decay_rate << endl;
1548  }
1549 }
void parse_output_model(options_i &options, vw &all)
Definition: parse_args.cc:1165
uint64_t random_seed
Definition: global_data.h:491
float power_t
Definition: global_data.h:447
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
uint32_t num_bits
Definition: global_data.h:398
std::shared_ptr< rand_state > get_random_state()
Definition: global_data.h:553
void parse_output_preds(options_i &options, vw &all)
Definition: parse_args.cc:1103
void parse_reductions(options_i &options, vw &all)
Definition: parse_args.cc:1234
shared_data * sd
Definition: global_data.h:375
vw_ostream trace_message
Definition: global_data.h:424
size_t numpasses
Definition: global_data.h:451
float eta
Definition: global_data.h:531
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
void parse_example_tweaks(options_i &options, vw &all)
Definition: parse_args.cc:1010
void parse_feature_tweaks(options_i &options, vw &all, std::vector< std::string > &dictionary_nses)
Definition: parse_args.cc:600
float eta_decay_rate
Definition: global_data.h:532

◆ parse_output_model()

void parse_output_model ( options_i options,
vw all 
)

Definition at line 1165 of file parse_args.cc.

References VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), vw::final_regressor_name, vw::hash_inv, vw::id, vw::inv_hash_regressor_name, VW::config::make_option(), vw::per_feature_regularizer_output, vw::per_feature_regularizer_text, vw::preserve_performance_counters, vw::quiet, vw::save_per_pass, vw::save_resume, vw::text_regressor_name, vw::trace_message, and VW::config::options_i::was_supplied().

Referenced by parse_modules().

1166 {
1167  option_group_definition output_model_options("Output model");
1168  output_model_options
1169  .add(make_option("final_regressor", all.final_regressor_name).short_name("f").help("Final regressor"))
1170  .add(make_option("readable_model", all.text_regressor_name)
1171  .help("Output human-readable final regressor with numeric features"))
1172  .add(make_option("invert_hash", all.inv_hash_regressor_name)
1173  .help("Output human-readable final regressor with feature names. Computationally expensive."))
1174  .add(make_option("save_resume", all.save_resume)
1175  .help("save extra state so learning can be resumed later with new data"))
1176  .add(make_option("preserve_performance_counters", all.preserve_performance_counters)
1177  .help("reset performance counters when warmstarting"))
1178  .add(make_option("save_per_pass", all.save_per_pass).help("Save the model after every pass over data"))
1179  .add(make_option("output_feature_regularizer_binary", all.per_feature_regularizer_output)
1180  .help("Per feature regularization output file"))
1181  .add(make_option("output_feature_regularizer_text", all.per_feature_regularizer_text)
1182  .help("Per feature regularization output file, in text"))
1183  .add(make_option("id", all.id).help("User supplied ID embedded into the final regressor"));
1184  options.add_and_parse(output_model_options);
1185 
1186  if (all.final_regressor_name.compare("") && !all.quiet)
1187  all.trace_message << "final_regressor = " << all.final_regressor_name << endl;
1188 
1189  if (options.was_supplied("invert_hash"))
1190  all.hash_inv = true;
1191 
1192  // Question: This doesn't seem necessary
1193  // if (options.was_supplied("id") && find(arg.args.begin(), arg.args.end(), "--id") == arg.args.end())
1194  // {
1195  // arg.args.push_back("--id");
1196  // arg.args.push_back(arg.vm["id"].as<std::string>());
1197  // }
1198 }
bool hash_inv
Definition: global_data.h:541
std::string inv_hash_regressor_name
Definition: global_data.h:511
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
std::string id
Definition: global_data.h:417
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
std::string per_feature_regularizer_output
Definition: global_data.h:441
std::string per_feature_regularizer_text
Definition: global_data.h:442
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
bool save_per_pass
Definition: global_data.h:408
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool preserve_performance_counters
Definition: global_data.h:416
bool save_resume
Definition: global_data.h:415
std::string final_regressor_name
Definition: global_data.h:535
std::string text_regressor_name
Definition: global_data.h:510

◆ parse_output_preds()

void parse_output_preds ( options_i options,
vw all 
)

Definition at line 1103 of file parse_args.cc.

References VW::config::option_group_definition::add(), VW::config::options_i::add_and_parse(), f, vw::final_prediction_sink, VW::config::make_option(), O_LARGEFILE, v_array< T >::push_back(), vw::quiet, vw::raw_prediction, vw::trace_message, and VW::config::options_i::was_supplied().

Referenced by parse_modules().

1104 {
1105  std::string predictions;
1106  std::string raw_predictions;
1107 
1108  option_group_definition output_options("Output options");
1109  output_options.add(make_option("predictions", predictions).short_name("p").help("File to output predictions to"))
1110  .add(make_option("raw_predictions", raw_predictions)
1111  .short_name("r")
1112  .help("File to output unnormalized predictions to"));
1113  options.add_and_parse(output_options);
1114 
1115  if (options.was_supplied("predictions"))
1116  {
1117  if (!all.quiet)
1118  all.trace_message << "predictions = " << predictions << endl;
1119 
1120  if (predictions == "stdout")
1121  {
1122  all.final_prediction_sink.push_back((size_t)1); // stdout
1123  }
1124  else
1125  {
1126  const char* fstr = predictions.c_str();
1127  int f;
1128  // TODO can we migrate files to fstreams?
1129 #ifdef _WIN32
1130  _sopen_s(&f, fstr, _O_CREAT | _O_WRONLY | _O_BINARY | _O_TRUNC, _SH_DENYWR, _S_IREAD | _S_IWRITE);
1131 #else
1132  f = open(fstr, O_CREAT | O_WRONLY | O_LARGEFILE | O_TRUNC, 0666);
1133 #endif
1134  if (f < 0)
1135  all.trace_message << "Error opening the predictions file: " << fstr << endl;
1136  all.final_prediction_sink.push_back((size_t)f);
1137  }
1138  }
1139 
1140  if (options.was_supplied("raw_predictions"))
1141  {
1142  if (!all.quiet)
1143  {
1144  all.trace_message << "raw predictions = " << raw_predictions << endl;
1145  if (options.was_supplied("binary"))
1146  all.trace_message << "Warning: --raw_predictions has no defined value when --binary specified, expect no output"
1147  << endl;
1148  }
1149  if (raw_predictions == "stdout")
1150  all.raw_prediction = 1; // stdout
1151  else
1152  {
1153  const char* t = raw_predictions.c_str();
1154  int f;
1155 #ifdef _WIN32
1156  _sopen_s(&f, t, _O_CREAT | _O_WRONLY | _O_BINARY | _O_TRUNC, _SH_DENYWR, _S_IREAD | _S_IWRITE);
1157 #else
1158  f = open(t, O_CREAT | O_WRONLY | O_LARGEFILE | O_TRUNC, 0666);
1159 #endif
1160  all.raw_prediction = f;
1161  }
1162  }
1163 }
#define O_LARGEFILE
Definition: io_buf.h:24
int raw_prediction
Definition: global_data.h:519
v_array< int > final_prediction_sink
Definition: global_data.h:518
bool quiet
Definition: global_data.h:487
virtual void add_and_parse(const option_group_definition &group)=0
void push_back(const T &new_ele)
Definition: v_array.h:107
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
float f
Definition: cache.cc:40

◆ parse_reductions()

void parse_reductions ( options_i options,
vw all 
)

Definition at line 1234 of file parse_args.cc.

References active_cover_setup(), active_setup(), audit_regressor_setup(), autolink_setup(), baseline_setup(), bfgs_setup(), binary_setup(), boosting_setup(), bs_setup(), cb_adf_setup(), cb_algs_setup(), cb_explore_setup(), cb_sample_setup(), cbify_setup(), cbifyldf_setup(), CCB::ccb_explore_adf_setup(), classweight_setup(), confidence_setup(), cs_active_setup(), CSOAA::csldf_setup(), CSOAA::csoaa_setup(), ect_setup(), explore_eval_setup(), ftrl_setup(), gd_mf_setup(), interact_setup(), kernel_svm_setup(), vw::l, lda_setup(), log_multi_setup(), lrq_setup(), lrqfa_setup(), marginal_setup(), memory_tree_setup(), mf_setup(), multilabel_oaa_setup(), mwt_setup(), nn_setup(), noop_setup(), oaa_setup(), OjaNewton_setup(), print_setup(), recall_tree_setup(), vw::reduction_stack, scorer_setup(), sender_setup(), VW::cb_explore_adf::softmax::setup(), VW::cb_explore_adf::greedy::setup(), VW::cb_explore_adf::first::setup(), VW::cb_explore_adf::bag::setup(), VW::cb_explore_adf::cover::setup(), VW::cb_explore_adf::regcb::setup(), GD::setup(), Search::setup(), setup_base(), VW::shared_feature_merger::shared_feature_merger_setup(), stagewise_poly_setup(), svrg_setup(), topk_setup(), and warm_cb_setup().

Referenced by parse_modules().

1235 {
1236  // Base algorithms
1237  all.reduction_stack.push(GD::setup);
1239  all.reduction_stack.push(ftrl_setup);
1240  all.reduction_stack.push(svrg_setup);
1241  all.reduction_stack.push(sender_setup);
1242  all.reduction_stack.push(gd_mf_setup);
1243  all.reduction_stack.push(print_setup);
1244  all.reduction_stack.push(noop_setup);
1245  all.reduction_stack.push(lda_setup);
1246  all.reduction_stack.push(bfgs_setup);
1247  all.reduction_stack.push(OjaNewton_setup);
1248  // all.reduction_stack.push(VW_CNTK::setup);
1249 
1250  // Score Users
1251  all.reduction_stack.push(baseline_setup);
1252  all.reduction_stack.push(ExpReplay::expreplay_setup<'b', simple_label>);
1253  all.reduction_stack.push(active_setup);
1256  all.reduction_stack.push(nn_setup);
1257  all.reduction_stack.push(mf_setup);
1258  all.reduction_stack.push(marginal_setup);
1259  all.reduction_stack.push(autolink_setup);
1260  all.reduction_stack.push(lrq_setup);
1261  all.reduction_stack.push(lrqfa_setup);
1263  all.reduction_stack.push(scorer_setup);
1264  // Reductions
1265  all.reduction_stack.push(bs_setup);
1266  all.reduction_stack.push(binary_setup);
1267 
1268  all.reduction_stack.push(ExpReplay::expreplay_setup<'m', MULTICLASS::mc_label>);
1269  all.reduction_stack.push(topk_setup);
1270  all.reduction_stack.push(oaa_setup);
1271  all.reduction_stack.push(boosting_setup);
1272  all.reduction_stack.push(ect_setup);
1273  all.reduction_stack.push(log_multi_setup);
1278 
1279  all.reduction_stack.push(cs_active_setup);
1281  all.reduction_stack.push(interact_setup);
1283  all.reduction_stack.push(cb_algs_setup);
1284  all.reduction_stack.push(cb_adf_setup);
1285  all.reduction_stack.push(mwt_setup);
1293  all.reduction_stack.push(cb_sample_setup);
1296  // cbify/warm_cb can generate multi-examples. Merge shared features after them
1297  all.reduction_stack.push(warm_cb_setup);
1298  all.reduction_stack.push(cbify_setup);
1299  all.reduction_stack.push(cbifyldf_setup);
1301  all.reduction_stack.push(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>);
1302  all.reduction_stack.push(Search::setup);
1304 
1305  all.l = setup_base(options, all);
1306 }
LEARNER::base_learner * kernel_svm_setup(options_i &options, vw &all)
Definition: kernel_svm.cc:867
LEARNER::base_learner * audit_regressor_setup(options_i &options, vw &all)
base_learner * setup(options_i &options, vw &all)
Definition: gd.cc:1119
base_learner * memory_tree_setup(options_i &options, vw &all)
LEARNER::base_learner * classweight_setup(options_i &options, vw &all)
Definition: classweight.cc:71
LEARNER::base_learner * binary_setup(options_i &options, vw &all)
Definition: binary.cc:30
base_learner * csoaa_setup(options_i &options, vw &all)
Definition: csoaa.cc:117
LEARNER::base_learner * multilabel_oaa_setup(options_i &options, vw &all)
base_learner * explore_eval_setup(options_i &options, vw &all)
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
LEARNER::base_learner * scorer_setup(options_i &options, vw &all)
Definition: scorer.cc:53
base_learner * bfgs_setup(options_i &options, vw &all)
Definition: bfgs.cc:1093
base_learner * svrg_setup(options_i &options, vw &all)
Definition: svrg.cc:168
base_learner * OjaNewton_setup(options_i &options, vw &all)
Definition: OjaNewton.cc:535
LEARNER::base_learner * oaa_setup(options_i &options, vw &all)
Definition: oaa.cc:203
base_learner * cbify_setup(options_i &options, vw &all)
Definition: cbify.cc:383
LEARNER::base_learner * noop_setup(options_i &options, vw &)
Definition: noop.cc:14
LEARNER::base_learner * lrqfa_setup(options_i &options, vw &all)
Definition: lrqfa.cc:133
base_learner * active_cover_setup(options_i &options, vw &all)
base_learner * setup(options_i &options, vw &all)
Definition: search.cc:2671
LEARNER::base_learner * lda_setup(options_i &options, vw &all)
Definition: lda_core.cc:1299
LEARNER::base_learner * boosting_setup(options_i &options, vw &all)
Definition: boosting.cc:396
base_learner * bs_setup(options_i &options, vw &all)
Definition: bs.cc:231
base_learner * lrq_setup(options_i &options, vw &all)
Definition: lrq.cc:159
LEARNER::base_learner * shared_feature_merger_setup(config::options_i &options, vw &all)
base_learner * nn_setup(options_i &options, vw &all)
Definition: nn.cc:417
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
base_learner * cb_explore_setup(options_i &options, vw &all)
Definition: cb_explore.cc:274
base_learner * active_setup(options_i &options, vw &all)
Definition: active.cc:133
base_learner * cb_adf_setup(options_i &options, vw &all)
Definition: cb_adf.cc:481
base_learner * cbifyldf_setup(options_i &options, vw &all)
Definition: cbify.cc:456
base_learner * cb_algs_setup(options_i &options, vw &all)
Definition: cb_algs.cc:132
base_learner * stagewise_poly_setup(options_i &options, vw &all)
base_learner * mwt_setup(options_i &options, vw &all)
Definition: mwt.cc:236
LEARNER::base_learner * setup(config::options_i &options, vw &all)
base_learner * log_multi_setup(options_i &options, vw &all)
Definition: log_multi.cc:496
base_learner * csldf_setup(options_i &options, vw &all)
Definition: csoaa.cc:787
base_learner * gd_mf_setup(options_i &options, vw &all)
Definition: gd_mf.cc:327
base_learner * recall_tree_setup(options_i &options, vw &all)
Definition: recall_tree.cc:502
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
base_learner * ftrl_setup(options_i &options, vw &all)
Definition: ftrl.cc:335
LEARNER::base_learner * marginal_setup(options_i &options, vw &all)
Definition: marginal.cc:351
base_learner * baseline_setup(options_i &options, vw &all)
Definition: baseline.cc:193
base_learner * cb_sample_setup(options_i &options, vw &all)
Definition: cb_sample.cc:97
base_learner * cs_active_setup(options_i &options, vw &all)
Definition: cs_active.cc:310
LEARNER::base_learner * interact_setup(options_i &options, vw &all)
Definition: interact.cc:156
base_learner * warm_cb_setup(options_i &options, vw &all)
Definition: warm_cb.cc:552
LEARNER::base_learner * setup(VW::config::options_i &options, vw &all)
base_learner * mf_setup(options_i &options, vw &all)
Definition: mf.cc:195
LEARNER::base_learner * topk_setup(options_i &options, vw &all)
Definition: topk.cc:132
base_learner * ccb_explore_adf_setup(options_i &options, vw &all)
LEARNER::base_learner * setup(config::options_i &options, vw &all)
LEARNER::base_learner * sender_setup(options_i &options, vw &all)
Definition: sender.cc:100
LEARNER::base_learner * l
Definition: global_data.h:383
base_learner * confidence_setup(options_i &options, vw &all)
Definition: confidence.cc:86
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
base_learner * ect_setup(options_i &options, vw &all)
Definition: ect.cc:340
std::stack< LEARNER::base_learner *(*)(VW::config::options_i &, vw &)> reduction_stack
Definition: global_data.h:513

◆ parse_source()

input_options parse_source ( vw all,
options_i options 
)

Definition at line 421 of file parse_args.cc.

References vw::active, VW::config::option_group_definition::add(), add(), VW::config::options_i::add_and_parse(), input_options::cache, input_options::cache_files, input_options::compressed, input_options::daemon, vw::daemon, vw::data_filename, input_options::dsjson, ends_with(), input_options::foreground, vw::holdout_set_off, input_options::json, input_options::kill_cache, VW::config::make_option(), vw::num_children, vw::numpasses, vw::p, input_options::pid_file, input_options::port, input_options::port_file, set_compressed(), vw::stdin_off, THROW, vw::trace_message, and VW::config::options_i::was_supplied().

Referenced by parse_sources().

422 {
423  input_options parsed_options;
424 
425  option_group_definition input_options("Input options");
426  input_options.add(make_option("data", all.data_filename).short_name("d").help("Example set"))
427  .add(make_option("daemon", parsed_options.daemon).help("persistent daemon mode on port 26542"))
428  .add(make_option("foreground", parsed_options.foreground)
429  .help("in persistent daemon mode, do not run in the background"))
430  .add(make_option("port", parsed_options.port).help("port to listen on; use 0 to pick unused port"))
431  .add(make_option("num_children", all.num_children).help("number of children for persistent daemon mode"))
432  .add(make_option("pid_file", parsed_options.pid_file).help("Write pid file in persistent daemon mode"))
433  .add(make_option("port_file", parsed_options.port_file).help("Write port used in persistent daemon mode"))
434  .add(make_option("cache", parsed_options.cache).short_name("c").help("Use a cache. The default is <data>.cache"))
435  .add(make_option("cache_file", parsed_options.cache_files).help("The location(s) of cache_file."))
436  .add(make_option("json", parsed_options.json).help("Enable JSON parsing."))
437  .add(make_option("dsjson", parsed_options.dsjson).help("Enable Decision Service JSON parsing."))
438  .add(make_option("kill_cache", parsed_options.kill_cache)
439  .short_name("k")
440  .help("do not reuse existing cache: create a new one always"))
441  .add(
442  make_option("compressed", parsed_options.compressed)
443  .help(
444  "use gzip format whenever possible. If a cache file is being created, this option creates a "
445  "compressed cache file. A mixture of raw-text & compressed inputs are supported with autodetection."))
446  .add(make_option("no_stdin", all.stdin_off).help("do not default to reading from stdin"));
447 
448  options.add_and_parse(input_options);
449 
450  // If the option provider is program_options try and retrieve data as a positional parameter.
451  options_i* options_ptr = &options;
452  auto boost_options = dynamic_cast<options_boost_po*>(options_ptr);
453  if (boost_options)
454  {
455  std::string data;
456  if (boost_options->try_get_positional_option_token("data", data, -1))
457  {
458  if (all.data_filename != data)
459  {
460  all.data_filename = data;
461  }
462  }
463  }
464 
465  if (parsed_options.daemon || options.was_supplied("pid_file") || (options.was_supplied("port") && !all.active))
466  {
467  all.daemon = true;
468  // allow each child to process up to 1e5 connections
469  all.numpasses = (size_t)1e5;
470  }
471 
472  // Add an implicit cache file based on the data filename.
473  if (parsed_options.cache)
474  {
475  parsed_options.cache_files.push_back(all.data_filename + ".cache");
476  }
477 
478  if (parsed_options.compressed)
479  set_compressed(all.p);
480 
481  if (ends_with(all.data_filename, ".gz"))
482  set_compressed(all.p);
483 
484  if ((parsed_options.cache || options.was_supplied("cache_file")) && options.was_supplied("invert_hash"))
485  THROW("invert_hash is incompatible with a cache file. Use it in single pass mode only.");
486 
487  if (!all.holdout_set_off &&
488  (options.was_supplied("output_feature_regularizer_binary") ||
489  options.was_supplied("output_feature_regularizer_text")))
490  {
491  all.holdout_set_off = true;
492  all.trace_message << "Making holdout_set_off=true since output regularizer specified" << endl;
493  }
494 
495  return parsed_options;
496 }
std::string port_file
Definition: parse_args.h:17
std::vector< std::string > cache_files
Definition: parse_args.h:20
size_t port
Definition: parse_args.h:15
virtual void add_and_parse(const option_group_definition &group)=0
bool holdout_set_off
Definition: global_data.h:499
void set_compressed(parser *par)
Definition: parser.cc:82
parser * p
Definition: global_data.h:377
bool compressed
Definition: parse_args.h:24
bool active
Definition: global_data.h:489
vw_ostream trace_message
Definition: global_data.h:424
virtual bool was_supplied(const std::string &key)=0
bool kill_cache
Definition: parse_args.h:23
bool foreground
Definition: parse_args.h:14
size_t numpasses
Definition: global_data.h:451
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
bool ends_with(std::string const &fullString, std::string const &ending)
Definition: parse_args.cc:100
bool stdin_off
Definition: global_data.h:527
size_t num_children
Definition: global_data.h:406
std::string pid_file
Definition: parse_args.h:16
#define THROW(args)
Definition: vw_exception.h:181
std::string data_filename
Definition: global_data.h:403
bool daemon
Definition: global_data.h:405

◆ parse_sources()

void parse_sources ( options_i options,
vw all,
io_buf model,
bool  skipModelLoad 
)

Definition at line 1551 of file parse_args.cc.

References io_buf::close_file(), enable_sources(), LEARNER::learner< T, E >::increment, vw::l, load_input_model(), vw::numpasses, parse_source(), vw::quiet, parameters::stride_shift(), vw::weights, and vw::wpp.

Referenced by VW::initialize().

1552 {
1553  if (!skipModelLoad)
1554  load_input_model(all, model);
1555  else
1556  model.close_file();
1557 
1558  auto parsed_source_options = parse_source(all, options);
1559  enable_sources(all, all.quiet, all.numpasses, parsed_source_options);
1560 
1561  // force wpp to be a power of 2 to avoid 32-bit overflow
1562  uint32_t i = 0;
1563  size_t params_per_problem = all.l->increment;
1564  while (params_per_problem > ((uint64_t)1 << i)) i++;
1565  all.wpp = (1 << i) >> all.weights.stride_shift();
1566 }
parameters weights
Definition: global_data.h:537
void enable_sources(vw &all, bool quiet, size_t passes, input_options &input_options)
Definition: parser.cc:312
void load_input_model(vw &all, io_buf &io_temp)
Definition: parse_args.cc:1200
virtual bool close_file()
Definition: io_buf.h:204
bool quiet
Definition: global_data.h:487
input_options parse_source(vw &all, options_i &options)
Definition: parse_args.cc:421
uint32_t wpp
Definition: global_data.h:432
size_t numpasses
Definition: global_data.h:451
size_t increment
Definition: learner.h:153
LEARNER::base_learner * l
Definition: global_data.h:383
uint32_t stride_shift()

◆ setup_base()

LEARNER::base_learner* setup_base ( options_i options,
vw all 
)

Definition at line 1222 of file parse_args.cc.

References vw::reduction_stack, and setup_base().

Referenced by active_cover_setup(), active_setup(), audit_regressor_setup(), autolink_setup(), baseline_setup(), binary_setup(), boosting_setup(), bs_setup(), cb_adf_setup(), cb_algs_setup(), cb_explore_setup(), cb_sample_setup(), cbify_setup(), cbifyldf_setup(), CCB::ccb_explore_adf_setup(), classweight_setup(), confidence_setup(), cs_active_setup(), CSOAA::csldf_setup(), CSOAA::csoaa_setup(), ect_setup(), explore_eval_setup(), ExpReplay::expreplay_setup(), interact_setup(), log_multi_setup(), lrq_setup(), lrqfa_setup(), marginal_setup(), memory_tree_setup(), mf_setup(), multilabel_oaa_setup(), mwt_setup(), nn_setup(), oaa_setup(), parse_reductions(), recall_tree_setup(), scorer_setup(), VW::cb_explore_adf::softmax::setup(), VW::cb_explore_adf::greedy::setup(), VW::cb_explore_adf::first::setup(), VW::cb_explore_adf::bag::setup(), VW::cb_explore_adf::cover::setup(), VW::cb_explore_adf::regcb::setup(), Search::setup(), setup_base(), VW::shared_feature_merger::shared_feature_merger_setup(), stagewise_poly_setup(), topk_setup(), and warm_cb_setup().

1223 {
1224  auto setup_func = all.reduction_stack.top();
1225  all.reduction_stack.pop();
1226  auto base = setup_func(options, all);
1227 
1228  if (base == nullptr)
1229  return setup_base(options, all);
1230  else
1231  return base;
1232 }
LEARNER::base_learner * setup_base(options_i &options, vw &all)
Definition: parse_args.cc:1222
std::stack< LEARNER::base_learner *(*)(VW::config::options_i &, vw &)> reduction_stack
Definition: global_data.h:513

◆ spoof_hex_encoded_namespaces()

std::string spoof_hex_encoded_namespaces ( const std::string &  arg)

Definition at line 568 of file parse_args.cc.

References c.

Referenced by lrq_setup(), lrqfa_setup(), and parse_feature_tweaks().

569 {
570  std::string res;
571  int pos = 0;
572  while (pos < (int)arg.size() - 3)
573  {
574  if (arg[pos] == '\\' && arg[pos + 1] == 'x')
575  {
576  std::string substr = arg.substr(pos + 2, 2);
577  char* p;
578  unsigned char c = (unsigned char)strtoul(substr.c_str(), &p, 16);
579  if (*p == '\0')
580  {
581  res.push_back(c);
582  pos += 4;
583  }
584  else
585  {
586  cerr << "Possibly malformed hex representation of a namespace: '\\x" << substr << "'\n";
587  res.push_back(arg[pos++]);
588  }
589  }
590  else
591  res.push_back(arg[pos++]);
592  }
593 
594  while (pos < (int)arg.size()) // copy last 2 characters
595  res.push_back(arg[pos++]);
596 
597  return res;
598 }
constexpr uint64_t c
Definition: rand48.cc:12

Variable Documentation

◆ interactions_settings_doubled

bool interactions_settings_doubled = false

Definition at line 498 of file parse_args.cc.

Referenced by load_header_merge_options(), and parse_feature_tweaks().