Vowpal Wabbit
vw.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #pragma once
7 
8 #ifdef _WIN32
9 #ifdef LEAKCHECK
10 // Visual Leak Detector for memory leak detection on Windows
11 #include <vld.h>
12 #endif
13 #endif
14 
15 #include "global_data.h"
16 #include "example.h"
17 #include "hash.h"
18 #include "simple_label.h"
19 #include "parser.h"
20 #include "parse_example.h"
21 
22 #include "options.h"
23 
24 namespace VW
25 {
26 /* Caveats:
27  (1) Some commandline parameters do not make sense as a library.
28  (2) The code is not yet reentrant.
29  */
30 vw* initialize(config::options_i& options, io_buf* model = nullptr, bool skipModelLoad = false,
31  trace_message_t trace_listener = nullptr, void* trace_context = nullptr);
32 vw* initialize(std::string s, io_buf* model = nullptr, bool skipModelLoad = false,
33  trace_message_t trace_listener = nullptr, void* trace_context = nullptr);
34 vw* initialize(int argc, char* argv[], io_buf* model = nullptr, bool skipModelLoad = false,
35  trace_message_t trace_listener = nullptr, void* trace_context = nullptr);
37  vw* vw_model, std::string extra_args, trace_message_t trace_listener = nullptr, void* trace_context = nullptr);
38 // Allows the input command line string to have spaces escaped by '\'
39 vw* initialize_escaped(std::string const& s, io_buf* model = nullptr, bool skipModelLoad = false,
40  trace_message_t trace_listener = nullptr, void* trace_context = nullptr);
41 
42 void cmd_string_replace_value(std::stringstream*& ss, std::string flag_to_replace, std::string new_value);
43 
44 VW_DEPRECATED("By value version is deprecated, pass std::string by const ref instead using `to_argv`")
45 char** get_argv_from_string(std::string s, int& argc);
46 
47 // The argv array from both of these functions must be freed.
48 char** to_argv(std::string const& s, int& argc);
49 char** to_argv_escaped(std::string const& s, int& argc);
50 void free_args(int argc, char* argv[]);
51 
52 const char* are_features_compatible(vw& vw1, vw& vw2);
53 
54 /*
55  Call finish() after you are done with the vw instance. This cleans up memory usage.
56  */
57 void finish(vw& all, bool delete_all = true);
58 void sync_stats(vw& all);
59 
60 void start_parser(vw& all);
61 void end_parser(vw& all);
62 bool is_ring_example(vw& all, example* ae);
63 
64 struct primitive_feature_space // just a helper definition.
65 {
66  unsigned char name;
68  size_t len;
69 };
70 
71 // The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by
72 // default. You can adjust the exact number by tweaking ring_size.
73 
74 /* The simplest of two ways to create an example. An example_line is the literal line in a VW-format datafile.
75  */
76 example* read_example(vw& all, char* example_line);
77 example* read_example(vw& all, std::string example_line);
78 
79 // The more complex way to create an example.
80 
81 // after you create and fill feature_spaces, get an example with everything filled in.
82 example* import_example(vw& all, const std::string& label, primitive_feature_space* features, size_t len);
83 
84 // callers must free memory using release_example
85 // this interface must be used with care as finish_example is a no-op for these examples.
86 // thus any delay introduced when freeing examples must be at least as long as the one
87 // introduced by all.l->finish_example implementations.
88 // e.g. multiline examples as used by cb_adf must not be released before the finishing newline example.
89 example* alloc_examples(size_t, size_t);
90 void dealloc_example(void (*delete_label)(void*), example& ec, void (*delete_prediction)(void*) = nullptr);
91 
92 void parse_example_label(vw& all, example& ec, std::string label);
93 void setup_examples(vw& all, v_array<example*>& examples);
94 void setup_example(vw& all, example* ae);
97 float get_topic_prediction(example* ec, size_t i); // i=0 to max topic -1
98 float get_label(example* ec);
99 float get_importance(example* ec);
100 float get_initial(example* ec);
101 float get_prediction(example* ec);
104 uint32_t* get_multilabel_predictions(example* ec, size_t& len);
105 float get_action_score(example* ec, size_t i);
106 size_t get_action_score_length(example* ec);
107 size_t get_tag_length(example* ec);
108 const char* get_tag(example* ec);
109 size_t get_feature_number(example* ec);
110 float get_confidence(example* ec);
111 feature* get_features(vw& all, example* ec, size_t& feature_number);
112 void return_features(feature* f);
113 
114 void add_constant_feature(vw& all, example* ec);
115 void add_label(example* ec, float label, float weight = 1, float base = 0);
116 
117 // notify VW that you are done with the example.
118 void finish_example(vw& all, example& ec);
119 void finish_example(vw& all, multi_ex& ec);
120 void empty_example(vw& all, example& ec);
121 
122 void copy_example_data(bool audit, example*, example*, size_t, void (*copy_label)(void*, void*));
123 void copy_example_metadata(bool audit, example*, example*);
124 void copy_example_data(bool audit, example*, example*); // metadata + features, don't copy the label
125 void clear_example_data(example&); // don't clear the label
127 
128 // after export_example, must call releaseFeatureSpace to free native memory
129 primitive_feature_space* export_example(vw& all, example* e, size_t& len);
131 
132 void save_predictor(vw& all, std::string reg_name);
133 void save_predictor(vw& all, io_buf& buf);
134 
135 // inlines
136 
137 // First create the hash of a namespace.
138 inline uint64_t hash_space(vw& all, const std::string& s)
139 {
140  substring ss;
141  ss.begin = (char*)s.c_str();
142  ss.end = ss.begin + s.length();
143  return all.p->hasher(ss, all.hash_seed);
144 }
145 inline uint64_t hash_space_static(const std::string& s, const std::string& hash)
146 {
147  substring ss;
148  ss.begin = (char*)s.c_str();
149  ss.end = ss.begin + s.length();
150  return getHasher(hash)(ss, 0);
151 }
152 // Then use it as the seed for hashing features.
153 inline uint64_t hash_feature(vw& all, const std::string& s, uint64_t u)
154 {
155  substring ss;
156  ss.begin = (char*)s.c_str();
157  ss.end = ss.begin + s.length();
158  return all.p->hasher(ss, u) & all.parse_mask;
159 }
160 inline uint64_t hash_feature_static(const std::string& s, uint64_t u, const std::string& h, uint32_t num_bits)
161 {
162  substring ss;
163  ss.begin = (char*)s.c_str();
164  ss.end = ss.begin + s.length();
165  size_t parse_mark = (1 << num_bits) - 1;
166  return getHasher(h)(ss, u) & parse_mark;
167 }
168 
169 inline uint64_t hash_feature_cstr(vw& all, char* fstr, uint64_t u)
170 {
171  substring ss;
172  ss.begin = fstr;
173  ss.end = ss.begin + strlen(fstr);
174  return all.p->hasher(ss, u) & all.parse_mask;
175 }
176 
177 inline float get_weight(vw& all, uint32_t index, uint32_t offset)
178 {
179  return (&all.weights[((uint64_t)index) << all.weights.stride_shift()])[offset];
180 }
181 
182 inline void set_weight(vw& all, uint32_t index, uint32_t offset, float value)
183 {
184  (&all.weights[((uint64_t)index) << all.weights.stride_shift()])[offset] = value;
185 }
186 
187 inline uint32_t num_weights(vw& all) { return (uint32_t)all.length(); }
188 
189 inline uint32_t get_stride(vw& all) { return all.weights.stride(); }
190 
191 inline void init_features(primitive_feature_space& fs, size_t features_count)
192 {
193  fs.fs = new feature[features_count];
194  fs.len = features_count;
195 }
196 
197 inline void set_feature(primitive_feature_space& fs, size_t index, uint64_t feature_hash, float value)
198 {
199  fs.fs[index].weight_index = feature_hash;
200  fs.fs[index].x = value;
201 }
202 } // namespace VW
void return_features(feature *f)
Definition: example.cc:137
vw * seed_vw_model(vw *vw_model, const std::string extra_args, trace_message_t trace_listener, void *trace_context)
Definition: parse_args.cc:1765
void copy_label(void *dst, void *src)
Definition: cb.cc:104
void free_args(int argc, char *argv[])
Definition: parse_args.cc:1648
size_t length()
Definition: global_data.h:513
void clear_example_data(example &)
bool is_ring_example(vw &all, example *ae)
Definition: parser.cc:1009
void parse_example_label(vw &all, example &ec, std::string label)
Definition: parser.cc:846
parameters weights
Definition: global_data.h:537
char ** to_argv(std::string const &s, int &argc)
Definition: parse_args.cc:1623
uint32_t get_stride(vw &all)
Definition: vw.h:189
float x
Definition: feature_group.h:27
float get_confidence(example *ec)
Definition: parser.cc:954
void copy_example_metadata(bool, example *dst, example *src)
Definition: example.cc:48
float get_importance(example *ec)
Definition: parser.cc:915
void setup_example(vw &all, example *ae)
Definition: parser.cc:678
const char * get_tag(example *ec)
Definition: parser.cc:950
void copy_example_data(bool audit, example *dst, example *src)
Definition: example.cc:72
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
example * get_example(parser *p)
Definition: parser.cc:909
void dealloc_example(void(*delete_label)(void *), example &ec, void(*delete_prediction)(void *))
Definition: example.cc:219
void set_feature(primitive_feature_space &fs, size_t index, uint64_t feature_hash, float value)
Definition: vw.h:197
uint32_t stride()
the core definition of a set of features.
void delete_label(void *v)
Definition: cb.cc:98
uint64_t hash_feature_static(const std::string &s, uint64_t u, const std::string &h, uint32_t num_bits)
Definition: vw.h:160
char ** get_argv_from_string(std::string s, int &argc)
Definition: parse_args.cc:1646
const char * are_features_compatible(vw &vw1, vw &vw2)
Definition: parse_args.cc:501
hash_func_t hasher
Definition: parser.h:73
void save_predictor(vw &all, std::string reg_name)
void finish(vw &all, bool delete_all)
Definition: parse_args.cc:1823
hash_func_t getHasher(const std::string &s)
uint64_t weight_index
Definition: feature_group.h:28
example * alloc_examples(size_t, size_t count=1)
Definition: example.cc:204
parser * p
Definition: global_data.h:377
feature * get_features(vw &all, example *ec, size_t &feature_map_len)
Definition: example.cc:125
primitive_feature_space * export_example(vw &all, example *ec, size_t &len)
Definition: parser.cc:812
example * import_example(vw &all, const std::string &label, primitive_feature_space *features, size_t len)
Definition: parser.cc:791
void start_parser(vw &all)
Definition: parser.cc:974
uint64_t hash_feature_cstr(vw &all, char *fstr, uint64_t u)
Definition: vw.h:169
void add_label(example *ec, float label, float weight, float base)
Definition: parser.cc:784
float get_weight(vw &all, uint32_t index, uint32_t offset)
Definition: vw.h:177
void init_features(primitive_feature_space &fs, size_t features_count)
Definition: vw.h:191
void empty_example(vw &, example &ec)
Definition: parser.cc:857
unsigned char namespace_index
void end_parser(vw &all)
Definition: parser.cc:1007
float get_initial(example *ec)
Definition: parser.cc:917
char ** to_argv_escaped(std::string const &s, int &argc)
Definition: parse_args.cc:1604
v_array< float > & get_cost_sensitive_prediction_confidence_scores(example *ec)
Definition: parser.cc:923
size_t get_tag_length(example *ec)
Definition: parser.cc:948
void set_weight(vw &all, uint32_t index, uint32_t offset, float value)
Definition: vw.h:182
void add_constant_feature(vw &vw, example *ec)
Definition: parser.cc:774
unsigned char name
Definition: vw.h:66
vw * initialize(options_i &options, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
Definition: parse_args.cc:1654
float get_prediction(example *ec)
Definition: parser.cc:919
Definition: io_buf.h:54
void cmd_string_replace_value(std::stringstream *&ss, std::string flag_to_replace, std::string new_value)
Definition: parse_args.cc:1570
void finish_example(vw &, example &)
Definition: parser.cc:881
float get_action_score(example *ec, size_t i)
Definition: parser.cc:932
float weight
size_t get_feature_number(example *ec)
Definition: parser.cc:952
std::vector< example * > multi_ex
Definition: example.h:122
uint64_t parse_mask
Definition: global_data.h:453
vw * initialize_escaped(std::string const &s, io_buf *model, bool skipModelLoad, trace_message_t trace_listener, void *trace_context)
Definition: parse_args.cc:1731
example * read_example(vw &all, char *example_line)
Definition: parser.cc:761
void(* trace_message_t)(void *context, const std::string &)
Definition: autolink.cc:11
example * new_unused_example(vw &all)
Definition: parser.cc:753
float get_cost_sensitive_prediction(example *ec)
Definition: parser.cc:921
void move_feature_namespace(example *dst, example *src, namespace_index c)
Definition: example.cc:92
uint64_t hash_feature(vw &all, const std::string &s, uint64_t u)
Definition: vw.h:153
uint64_t hash_space(vw &all, const std::string &s)
Definition: vw.h:138
uint32_t stride_shift()
uint32_t hash_seed
Definition: global_data.h:401
Definition: parser.h:38
uint64_t hash_space_static(const std::string &s, const std::string &hash)
Definition: vw.h:145
uint32_t num_weights(vw &all)
Definition: vw.h:187
float get_label(example *ec)
Definition: parser.cc:913
uint32_t * get_multilabel_predictions(example *ec, size_t &len)
Definition: parser.cc:925
float get_topic_prediction(example *ec, size_t i)
Definition: parser.cc:911
constexpr uint64_t c
Definition: rand48.cc:12
void sync_stats(vw &all)
Definition: parse_args.cc:1804
size_t get_action_score_length(example *ec)
Definition: parser.cc:946
float f
Definition: cache.cc:40
void releaseFeatureSpace(primitive_feature_space *features, size_t len)
Definition: parser.cc:840
void setup_examples(vw &all, v_array< example *> &examples)
Definition: parser.cc:673