12 #include <netinet/tcp.h> 24 typedef int socklen_t;
26 int daemon(
int ,
int )
35 #define getpid _getpid 37 int getpid() {
return (
int)::GetCurrentProcessId(); }
45 #include <netinet/in.h> 70 bool is_test_only(uint32_t counter, uint32_t period, uint32_t after,
bool holdout_off,
71 uint32_t target_modulus)
77 return (counter % period == target_modulus);
79 return (counter > after);
94 buf->
read_file(filepointer, (
char*)&v_length,
sizeof(v_length));
96 THROW(
"cache version too long, cache file is probably invalid");
99 THROW(
"cache version too short, cache file is probably invalid");
101 std::vector<char> t(v_length);
102 buf->
read_file(filepointer, t.data(), v_length);
111 if (buf->
read_file(filepointer, &temp, 1) < 1)
112 THROW(
"failed to read");
115 THROW(
"data file is not a cache file");
118 if (buf->
read_file(filepointer, &cache_numbits,
sizeof(cache_numbits)) < (int)
sizeof(cache_numbits))
150 if (
std::find(fps.cbegin(), fps.cend(), fd) == fps.cend())
172 sockaddr_in client_address;
173 socklen_t size =
sizeof(client_address);
174 int f = (int)accept(all.
p->
bound_sock, (sockaddr*)&client_address, &size);
176 THROW(
"accept: " << strerror(errno));
196 for (
size_t i = 0; i < input->
files.
size(); i++)
200 THROW(
"argh, a bug in caching of some sort!");
209 int f = _fileno(stdin);
211 int f = fileno(stdin);
228 all.
trace_message <<
"Warning: you tried to make two write caches. Only the first one will be made." << endl;
232 std::string temp = newname + std::string(
".writing");
244 output->
write_file(f, &v_length,
sizeof(v_length));
252 all.
trace_message <<
"creating cache_file = " << newname << endl;
255 void parse_cache(
vw& all, std::vector<std::string> cache_files,
bool kill_cache,
bool quiet)
259 for (
auto& file : cache_files)
267 catch (
const std::exception&)
279 all.
trace_message <<
"WARNING: cache file is ignored as it's made with less bit precision than required!" 287 all.
trace_message <<
"using cache_file = " << file.c_str() << endl;
299 if (cache_files.size() == 0)
308 #ifndef MAP_ANONYMOUS 309 #define MAP_ANONYMOUS MAP_ANON 321 int lastError = WSAStartup(MAKEWORD(2, 2), &wsaData);
323 THROWERRNO(
"WSAStartup() returned error:" << lastError);
325 all.
p->
bound_sock = (int)socket(PF_INET, SOCK_STREAM, 0);
328 std::stringstream msg;
329 msg <<
"socket: " << strerror(errno);
331 THROW(msg.str().c_str());
335 if (setsockopt(all.
p->
bound_sock, SOL_SOCKET, SO_REUSEADDR, (
char*)&on,
sizeof(on)) < 0)
336 all.
trace_message <<
"setsockopt SO_REUSEADDR: " << strerror(errno) << endl;
340 if (setsockopt(all.
p->
bound_sock, SOL_SOCKET, SO_KEEPALIVE, (
char*)&enableTKA,
sizeof(enableTKA)) < 0)
341 all.
trace_message <<
"setsockopt SO_KEEPALIVE: " << strerror(errno) << endl;
344 address.sin_family = AF_INET;
345 address.sin_addr.s_addr = htonl(INADDR_ANY);
346 short unsigned int port = 26542;
348 port = (uint16_t)input_options.
port;
349 address.sin_port = htons(port);
352 if (::bind(all.
p->
bound_sock, (sockaddr*)&address,
sizeof(address)) < 0)
362 socklen_t address_size =
sizeof(address);
363 if (getsockname(all.
p->
bound_sock, (sockaddr*)&address, &address_size) < 0)
365 all.
trace_message <<
"getsockname: " << strerror(errno) << endl;
367 std::ofstream port_file;
368 port_file.open(input_options.
port_file.c_str());
369 if (!port_file.is_open())
372 port_file << ntohs(address.sin_port) << endl;
380 if (!all.
active && daemon(1, 1))
387 std::ofstream pid_file;
388 pid_file.open(input_options.
pid_file.c_str());
389 if (!pid_file.is_open())
390 THROW(
"error writing pid file");
392 pid_file << getpid() << endl;
399 THROW(
"not supported on windows");
415 children.
resize(num_children);
416 for (
size_t i = 0; i < num_children; i++)
420 if ((children[i] = fork()) == 0)
422 all.
quiet |= (i > 0);
432 memset(&sa, 0,
sizeof(sa));
434 sigaction(SIGTERM, &sa,
nullptr);
441 pid_t pid = wait(&status);
444 for (
size_t i = 0; i < num_children; i++) kill(children[i], SIGTERM);
450 for (
size_t i = 0; i < num_children; i++)
451 if (pid == children[i])
453 if ((children[i] = fork()) == 0)
455 all.
quiet |= (i > 0);
468 sockaddr_in client_address;
469 socklen_t size =
sizeof(client_address);
473 int f = (int)accept(all.
p->
bound_sock, (sockaddr*)&client_address, &size);
485 all.
trace_message <<
"reading data from port " << port << endl;
510 all.
trace_message <<
"ignoring text input in favor of cache input" << endl;
521 catch (std::exception
const&)
526 all.
trace_message <<
"can't open '" << temp <<
"', sailing on!" << endl;
534 if (input_options.
json || input_options.
dsjson)
540 all.
p->
reader = &read_features_json<true>;
546 all.
p->
reader = &read_features_json<false>;
564 THROW(
"need a cache file for multiple passes : try using --cache_file");
567 if (!quiet && !all.
daemon)
587 if (ngram == 0 && gram_mask.
last() < initial_length)
589 size_t last = initial_length - gram_mask.
last();
590 for (
size_t i = 0; i < last; i++)
592 uint64_t new_index = fs.
indicies[i];
593 for (
size_t n = 1; n < gram_mask.
size(); n++)
599 std::string feature_name(fs.
space_names[i].get()->second);
600 for (
size_t n = 1; n < gram_mask.
size(); n++)
602 feature_name += std::string(
"^");
603 feature_name += std::string(fs.
space_names[i + gram_mask[n]].get()->second);
612 addgrams(all, ngram - 1, skip_gram, fs, initial_length, gram_mask, 0);
615 if (skip_gram > 0 && ngram > 0)
616 addgrams(all, ngram, skip_gram - 1, fs, initial_length, gram_mask, skips + 1);
635 for (
size_t n = 1; n < all.
ngram[index]; n++)
713 memmove(i, i + 1, (ae->
indices.
end() - (i + 1)) *
sizeof(*i));
731 for (
auto& j : fs.indicies) j *= multiplier;
732 ae->num_features = 0;
733 ae->total_sum_feat_sq = 0;
736 ae->num_features += fs.size();
737 ae->total_sum_feat_sq += fs.sum_feat_sq;
743 size_t new_features_cnt;
744 float new_features_sum_feat_sq;
746 ae->num_features += new_features_cnt;
747 ae->total_sum_feat_sq += new_features_sum_feat_sq;
796 if (label.length() > 0)
799 for (
size_t i = 0; i < len; i++)
801 unsigned char index = features[i].
name;
803 for (
size_t j = 0; j < features[i].
len; j++)
819 for (
size_t idx = 0; idx < len; ++idx)
822 fs_ptr[fs_count].
name = i;
824 fs_ptr[fs_count].
fs =
new feature[fs_ptr[fs_count].
len];
832 fs_ptr[fs_count].fs[f_count] = t;
842 for (
size_t i = 0; i < len; i++)
delete[] features[i].fs;
849 char* cstr = (
char*)label.c_str();
850 substring str = {cstr, cstr + label.length()};
936 if (i < scores.
size())
938 return scores[i].score;
962 ex->
tag = v_init<char>();
963 ex->
indices = v_init<namespace_index>();
985 if (output !=
nullptr)
void resize(size_t length)
v_array< namespace_index > indices
std::array< uint32_t, NUM_NAMESPACES > skips
void clean_example(vw &, example &, bool rewind)
ACTION_SCORE::action_scores a_s
bool is_ring_example(vw &all, example *ae)
void parse_example_label(vw &all, example &ec, std::string label)
void enable_sources(vw &all, bool quiet, size_t passes, input_options &input_options)
void unique_sort_features(uint64_t parse_mask, example *ae)
virtual bool compressed()
void(* delete_prediction)(void *)
VW::object_pool< example, example_initializer > example_pool
std::array< uint32_t, NUM_NAMESPACES > ngram
uint64_t stride_shift(const stagewise_poly &poly, uint64_t idx)
void push_back(feature_value v, feature_index i)
void finalize_source(parser *p)
float get_confidence(example *ec)
void read_line(vw &all, example *ex, char *line)
VW::config::options_i * options
float get_importance(example *ec)
v_array< substring > words
void setup_example(vw &all, example *ae)
const char * get_tag(example *ec)
void(* delete_label)(void *)
static constexpr int WRITE
std::shared_ptr< audit_strings > audit_strings_ptr
virtual bool close_file()
uint32_t cache_numbits(io_buf *buf, int filepointer)
std::vector< std::string > ngram_strings
void eval_count_of_generated_ft(vw &all, example &ec, size_t &new_features_cnt, float &new_features_value)
void main_parse_loop(vw *all)
constexpr int quadratic_constant
v_array< feature_index > indicies
example * get_example(parser *p)
int read_features_string(vw *all, v_array< example *> &examples)
void(* default_label)(void *)
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
bool(* test_label)(void *)
void addgrams(vw &all, size_t ngram, size_t skip_gram, features &fs, size_t initial_length, v_array< size_t > &gram_mask, size_t skips)
uint64_t begin_parsed_examples
VW::ptr_queue< example > ready_parsed_examples
v_array< int > final_prediction_sink
the core definition of a set of features.
void generateGrams(vw &all, example *&ex)
void binary_print_result(int f, float res, float weight, v_array< char >)
std::vector< std::string > limit_strings
virtual void reset_file(int f)
void finish(vw &all, bool delete_all)
void set_compressed(parser *par)
int example_is_newline(example const &ec)
v_array< substring > name
void read_lines(vw *all, char *line, size_t, v_array< example *> &examples)
std::array< bool, NUM_NAMESPACES > ignore
void parse_dispatch(vw &all, dispatch_fptr dispatch)
const version_struct version(PACKAGE_VERSION)
virtual ssize_t read_file(int f, void *buf, size_t nbytes)
static void close_file_or_socket(int f)
bool is_test_only(uint32_t counter, uint32_t period, uint32_t after, bool holdout_off, uint32_t target_modulus)
static constexpr int READ
std::array< features, NUM_NAMESPACES > feature_space
std::array< uint32_t, NUM_NAMESPACES > limit
bool is_from_pool(T *obj) const
virtual int open_file(const char *name, bool stdin_off)
v_array< char > finalname
int(* reader)(vw *, v_array< example *> &examples)
virtual size_t num_files()
void reset_source(vw &all, size_t numbits)
float(* get_weight)(void *)
primitive_feature_space * export_example(vw &all, example *ec, size_t &len)
void push_many(v_array< T > &v, const T *_begin, size_t num)
example * import_example(vw &all, const std::string &label, primitive_feature_space *features, size_t len)
void push_back(const T &new_ele)
void start_parser(vw &all)
void add_label(example *ec, float label, float weight, float base)
void parse_cache(vw &all, std::vector< std::string > cache_files, bool kill_cache, bool quiet)
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
virtual bool was_supplied(const std::string &key)=0
void empty_example(vw &, example &ec)
bool sort(uint64_t parse_mask)
unsigned char namespace_index
void(* cache_label)(void *, io_buf &cache)
void share(size_t length)
constexpr uint64_t constant
void(* parse_label)(parser *, shared_data *, void *, v_array< substring > &)
float get_initial(example *ec)
v_array< float > & get_cost_sensitive_prediction_confidence_scores(example *ec)
size_t get_tag_length(example *ec)
void thread_dispatch(vw &all, v_array< example *> examples)
void add_constant_feature(vw &vw, example *ec)
v_array< char > currentname
float get_prediction(example *ec)
v_array< size_t > gram_mask
void finish_example(vw &, example &)
float get_action_score(example *ec, size_t i)
size_t get_feature_number(example *ec)
bool decision_service_json
void end_pass_example(vw &all, example *ae)
iterator over values and indicies
void feature_limit(vw &all, example *ex)
v_array< audit_strings_ptr > space_names
MULTILABEL::labels multilabels
node_pred * find(recall_tree &b, uint32_t cn, example &ec)
void make_write_cache(vw &all, std::string &newname, bool quiet)
void print_result(int f, float res, v_array< char > tag, float lb, float ub)
void cache_features(io_buf &cache, example *ae, uint64_t mask)
v_array< uint32_t > label_v
void adjust_used_index(vw &)
example * read_example(vw &all, char *example_line)
uint64_t end_parsed_examples
example * new_unused_example(vw &all)
float get_cost_sensitive_prediction(example *ec)
std::vector< std::string > interactions
void free_parser(vw &all)
void(* text_reader)(vw *, char *, size_t, v_array< example *> &)
int read_cached_features(vw *all, v_array< example *> &examples)
void unique_features(features &fs, int max)
std::condition_variable output_done
float get_label(example *ec)
example & get_unused_example(vw *all)
example * operator()(example *ex)
void return_object(T *obj)
constexpr unsigned char constant_namespace
uint32_t * get_multilabel_predictions(example *ec, size_t &len)
bool children(log_multi &b, uint32_t ¤t, uint32_t &class_index, uint32_t label)
float get_topic_prediction(example *ec, size_t i)
virtual ssize_t write_file(int f, const void *buf, size_t nbytes)
bool emptylines_separate_examples
size_t get_action_score_length(example *ec)
void(* print)(int, float, float, v_array< char >)
void releaseFeatureSpace(primitive_feature_space *features, size_t len)
std::string data_filename
const char * to_string(prediction_type_t prediction_type)
void lock_done(parser &p)
void setup_examples(vw &all, v_array< example *> &examples)
std::pair< std::string, std::string > audit_strings