Vowpal Wabbit
parser.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #pragma once
7 #include "io_buf.h"
8 #include "parse_primitives.h"
9 #include "example.h"
10 #include "future_compat.h"
11 
12 // Mutex and CV cannot be used in managed C++, tell the compiler that this is unmanaged even if included in a managed
13 // project.
14 #ifdef _M_CEE
15 #pragma managed(push, off)
16 #undef _M_CEE
17 #include <mutex>
18 #include <condition_variable>
19 #define _M_CEE 001
20 #pragma managed(pop)
21 #else
22 #include <mutex>
23 #include <condition_variable>
24 #endif
25 
26 #include <memory>
27 #include "queue.h"
28 #include "object_pool.h"
29 
30 struct vw;
31 struct input_options;
32 
34 {
36 };
37 
38 struct parser
39 {
40  parser(size_t ring_size, bool strict_parse_)
41  : example_pool{ring_size}, ready_parsed_examples{ring_size}, ring_size{ring_size}, strict_parse{strict_parse_}
42  {
43  this->input = new io_buf{};
44  this->output = new io_buf{};
45  this->lp = simple_label;
46 
47  // Free parser must still be used for the following fields.
48  this->words = v_init<substring>();
49  this->name = v_init<substring>();
50  this->parse_name = v_init<substring>();
51  this->gram_mask = v_init<size_t>();
52  this->ids = v_init<size_t>();
53  this->counts = v_init<size_t>();
54  }
55 
57  {
58  delete input;
59  delete output;
60  }
61 
62  // helper(s) for text parsing
65 
68 
69  io_buf* input = nullptr; // Input source(s)
70  int (*reader)(vw*, v_array<example*>& examples);
71  void (*text_reader)(vw*, char*, size_t, v_array<example*>&);
72 
74  bool resettable; // Whether or not the input can be reset.
75  io_buf* output = nullptr; // Where to output the cache.
76  bool write_cache = false;
77  bool sort_features = false;
78  bool sorted_cache = false;
79 
80  const size_t ring_size;
81  uint64_t begin_parsed_examples = 0; // The index of the beginning parsed example.
82  uint64_t end_parsed_examples = 0; // The index of the fully parsed example.
83  uint32_t in_pass_counter = 0;
84  bool emptylines_separate_examples = false; // true if you want to have holdout computed on a per-block basis rather
85  // than a per-line basis
86 
87  std::mutex output_lock;
88  std::condition_variable output_done;
89 
90  bool done = false;
92 
93  v_array<size_t> ids; // unique ids for sources
94  v_array<size_t> counts; // partial examples received from sources
95  size_t finished_count; // the number of finished examples;
96  int label_sock = 0;
97  int bound_sock = 0;
98  int max_fd = 0;
99 
101 
102  label_parser lp; // moved from vw
103 
104  bool audit = false;
105  bool decision_service_json = false;
106 
108  std::exception_ptr exc_ptr;
109 };
110 
111 void enable_sources(vw& all, bool quiet, size_t passes, input_options& input_options);
112 
113 VW_DEPRECATED("Function is no longer used")
114 void adjust_used_index(vw& all);
115 
116 // parser control
117 void lock_done(parser& p);
118 void set_done(vw& all);
119 
120 // source control functions
121 void reset_source(vw& all, size_t numbits);
122 void finalize_source(parser* source);
123 void set_compressed(parser* par);
124 void free_parser(vw& all);
std::exception_ptr exc_ptr
Definition: parser.h:108
label_parser simple_label
VW::object_pool< example, example_initializer > example_pool
Definition: parser.h:66
size_t finished_count
Definition: parser.h:95
v_array< substring > words
Definition: parser.h:63
VW::ptr_queue< example > ready_parsed_examples
Definition: parser.h:67
v_array< size_t > counts
Definition: parser.h:94
hash_func_t hasher
Definition: parser.h:73
void lock_done(parser &p)
Definition: parser.cc:571
v_array< substring > name
Definition: parser.h:64
v_array< substring > parse_name
Definition: parser.h:100
bool strict_parse
Definition: parser.h:107
std::mutex output_lock
Definition: parser.h:87
void free_parser(vw &all)
Definition: parser.cc:976
v_array< size_t > ids
Definition: parser.h:93
v_array< size_t > gram_mask
Definition: parser.h:91
void enable_sources(vw &all, bool quiet, size_t passes, input_options &input_options)
Definition: parser.cc:312
const size_t ring_size
Definition: parser.h:80
Definition: io_buf.h:54
~parser()
Definition: parser.h:56
parser(size_t ring_size, bool strict_parse_)
Definition: parser.h:40
void reset_source(vw &all, size_t numbits)
Definition: parser.cc:126
void set_compressed(parser *par)
Definition: parser.cc:82
Definition: parser.h:38
void finalize_source(parser *source)
Definition: parser.cc:206
std::condition_variable output_done
Definition: parser.h:88
example * operator()(example *ex)
Definition: parser.cc:957
void adjust_used_index(vw &all)
Definition: parser.cc:968
bool resettable
Definition: parser.h:74
label_parser lp
Definition: parser.h:102
void set_done(vw &all)
Definition: parser.cc:578
uint64_t(* hash_func_t)(substring, uint64_t)