Vowpal Wabbit
Namespaces | Typedefs | Functions
parse_primitives.h File Reference
#include <cmath>
#include <string>
#include <vector>
#include <iostream>
#include <stdint.h>
#include <math.h>
#include "v_array.h"
#include "hashstring.h"

Go to the source code of this file.

Namespaces

 VW
 

Typedefs

typedef example &(* VW::example_factory_t) (void *)
 
typedef uint64_t(* hash_func_t) (substring, uint64_t)
 

Functions

std::ostream & operator<< (std::ostream &os, const substring &ss)
 
std::ostream & operator<< (std::ostream &os, const v_array< substring > &ss)
 
template<typename ContainerT >
void tokenize (char delim, substring s, ContainerT &ret, bool allow_empty=false)
 
bool substring_equal (const substring &a, const substring &b)
 
bool substring_equal (const substring &ss, const char *str)
 
bool operator== (const substring &ss, const char *str)
 
bool operator== (const char *str, const substring &ss)
 
bool operator== (const substring &ss1, const substring &ss2)
 
bool operator!= (const substring &ss, const char *str)
 
bool operator!= (const char *str, const substring &ss)
 
bool operator!= (const substring &ss1, const substring &ss2)
 
size_t substring_len (substring &s)
 
char * safe_index (char *start, char v, char *max)
 
std::vector< substringescaped_tokenize (char delim, substring s, bool allow_empty=false)
 
void print_substring (substring s)
 
hash_func_t getHasher (const std::string &s)
 
float parseFloat (char *p, char **end, char *endLine=nullptr)
 
float float_of_substring (substring s)
 
int int_of_substring (substring s)
 

Typedef Documentation

◆ hash_func_t

typedef uint64_t(* hash_func_t) (substring, uint64_t)

Definition at line 79 of file parse_primitives.h.

Function Documentation

◆ escaped_tokenize()

std::vector<substring> escaped_tokenize ( char  delim,
substring  s,
bool  allow_empty = false 
)

Definition at line 58 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by safe_index(), and VW::to_argv_escaped().

59 {
60  std::vector<substring> tokens;
61  substring current;
62  current.begin = s.begin;
63  bool in_escape = false;
64  char* reading_head = s.begin;
65  char* writing_head = s.begin;
66 
67  while (reading_head < s.end)
68  {
69  char current_character = *reading_head++;
70 
71  if (in_escape)
72  {
73  *writing_head++ = current_character;
74  in_escape = false;
75  }
76  else
77  {
78  if (current_character == delim)
79  {
80  current.end = writing_head++;
81  *current.end = '\0';
82  if (current.begin != current.end || allow_empty)
83  {
84  tokens.push_back(current);
85  }
86 
87  // Regardless of whether the token was saved, we need to reset the current token.
88  current.begin = writing_head;
89  current.end = writing_head;
90  }
91  else if (current_character == '\\')
92  {
93  in_escape = !in_escape;
94  }
95  else
96  {
97  *writing_head++ = current_character;
98  }
99  }
100  }
101 
102  current.end = writing_head;
103  *current.end = '\0';
104  if (current.begin != current.end || allow_empty)
105  {
106  tokens.push_back(current);
107  }
108 
109  return tokens;
110 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ float_of_substring()

float float_of_substring ( substring  s)
inline

Definition at line 146 of file parse_primitives.h.

References substring::begin, substring::end, f, and parseFloat().

Referenced by CCB::convert_to_score(), COST_SENSITIVE::name_value(), MULTICLASS::parse_label(), CB::parse_label(), COST_SENSITIVE::parse_label(), CCB::parse_outcome(), and parse_simple_label().

147 {
148  char* endptr = s.end;
149  float f = parseFloat(s.begin, &endptr);
150  if ((endptr == s.begin && s.begin != s.end) || std::isnan(f))
151  {
152  std::cout << "warning: " << std::string(s.begin, s.end - s.begin).c_str()
153  << " is not a good float, replacing with 0" << std::endl;
154  f = 0;
155  }
156  return f;
157 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
float parseFloat(char *p, char **end, char *endLine=nullptr)
float f
Definition: cache.cc:40

◆ getHasher()

hash_func_t getHasher ( const std::string &  s)

Definition at line 36 of file parse_primitives.cc.

References hashall(), hashstring(), and THROW.

Referenced by VW::hash_feature_static(), VW::hash_space_static(), and parse_feature_tweaks().

37 {
38  if (s == "strings")
39  return hashstring;
40  else if (s == "all")
41  return hashall;
42  else
43  THROW("Unknown hash function: " << s);
44 }
VW_STD14_CONSTEXPR uint64_t hashall(substring s, uint64_t h)
Definition: hashstring.h:13
VW_STD14_CONSTEXPR uint64_t hashstring(substring s, uint64_t h)
Definition: hashstring.h:18
#define THROW(args)
Definition: vw_exception.h:181

◆ int_of_substring()

int int_of_substring ( substring  s)
inline

Definition at line 159 of file parse_primitives.h.

References substring::begin, and substring::end.

Referenced by CCB::convert_to_score(), CCB::parse_explicit_inclusions(), MULTICLASS::parse_label(), and Search::parse_neighbor_features().

160 {
161  char* endptr = s.end;
162  int i = strtol(s.begin, &endptr, 10);
163  if (endptr == s.begin && s.begin != s.end)
164  {
165  std::cout << "warning: " << std::string(s.begin, s.end - s.begin).c_str() << " is not a good int, replacing with 0"
166  << std::endl;
167  i = 0;
168  }
169 
170  return i;
171 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ operator!=() [1/3]

bool operator!= ( const substring ss,
const char *  str 
)

Definition at line 52 of file parse_primitives.cc.

Referenced by tokenize().

52 { return !(ss == str); }

◆ operator!=() [2/3]

bool operator!= ( const char *  str,
const substring ss 
)

Definition at line 54 of file parse_primitives.cc.

54 { return !(ss == str); }

◆ operator!=() [3/3]

bool operator!= ( const substring ss1,
const substring ss2 
)

Definition at line 56 of file parse_primitives.cc.

56 { return !(ss1 == ss2); }

◆ operator<<() [1/2]

std::ostream& operator<< ( std::ostream &  os,
const substring ss 
)

Definition at line 112 of file parse_primitives.cc.

References substring::begin, and substring::end.

113 {
114  std::string s(ss.begin, ss.end - ss.begin);
115  return os << s;
116 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ operator<<() [2/2]

std::ostream& operator<< ( std::ostream &  os,
const v_array< substring > &  ss 
)

Definition at line 118 of file parse_primitives.cc.

119 {
120  substring* it = ss.cbegin();
121 
122  if (it == ss.cend())
123  {
124  return os;
125  }
126 
127  os << *it;
128 
129  for (it++; it != ss.cend(); it++)
130  {
131  os << ",";
132  os << *it;
133  }
134 
135  return os;
136 }
T * cbegin() const
Definition: v_array.h:48
T * cend() const
Definition: v_array.h:49

◆ operator==() [1/3]

bool operator== ( const substring ss,
const char *  str 
)

Definition at line 46 of file parse_primitives.cc.

References substring_equal().

Referenced by tokenize().

46 { return substring_equal(ss, str); }
bool substring_equal(const substring &a, const substring &b)

◆ operator==() [2/3]

bool operator== ( const char *  str,
const substring ss 
)

Definition at line 48 of file parse_primitives.cc.

References substring_equal().

48 { return substring_equal(ss, str); }
bool substring_equal(const substring &a, const substring &b)

◆ operator==() [3/3]

bool operator== ( const substring ss1,
const substring ss2 
)

Definition at line 50 of file parse_primitives.cc.

References substring_equal().

50 { return substring_equal(ss1, ss2); }
bool substring_equal(const substring &a, const substring &b)

◆ parseFloat()

float parseFloat ( char *  p,
char **  end,
char *  endLine = nullptr 
)
inline

Definition at line 88 of file parse_primitives.h.

References ldamath::powf().

Referenced by TC_parser< audit >::featureValue(), float_of_substring(), is_number(), and TC_parser< audit >::nameSpaceInfoValue().

89 {
90  char* start = p;
91  bool endLine_is_null = endLine == nullptr;
92 
93  if (!*p)
94  {
95  *end = p;
96  return 0;
97  }
98  int s = 1;
99  while ((*p == ' ') && (endLine_is_null || p < endLine)) p++;
100 
101  if (*p == '-')
102  {
103  s = -1;
104  p++;
105  }
106 
107  float acc = 0;
108  while (*p >= '0' && *p <= '9' && (endLine_is_null || p < endLine)) acc = acc * 10 + *p++ - '0';
109 
110  int num_dec = 0;
111  if (*p == '.')
112  {
113  while (*(++p) >= '0' && *p <= '9' && (endLine_is_null || p < endLine))
114  {
115  if (num_dec < 35)
116  {
117  acc = acc * 10 + (*p - '0');
118  num_dec++;
119  }
120  }
121  }
122 
123  int exp_acc = 0;
124  if ((*p == 'e' || *p == 'E') && (endLine_is_null || p < endLine))
125  {
126  p++;
127  int exp_s = 1;
128  if (*p == '-' && (endLine_is_null || p < endLine))
129  {
130  exp_s = -1;
131  p++;
132  }
133  while (*p >= '0' && *p <= '9' && (endLine_is_null || p < endLine)) exp_acc = exp_acc * 10 + *p++ - '0';
134  exp_acc *= exp_s;
135  }
136  if (*p == ' ' || *p == '\n' || *p == '\t' || p == endLine) // easy case succeeded.
137  {
138  acc *= powf(10, (float)(exp_acc - num_dec));
139  *end = p;
140  return s * acc;
141  }
142  else
143  return (float)strtod(start, end);
144 }
T powf(T, T)
Definition: lda_core.cc:428

◆ print_substring()

void print_substring ( substring  s)
inline

Definition at line 70 of file parse_primitives.h.

References substring::begin, and substring::end.

Referenced by no_label::parse_no_label(), and parse_simple_label().

70 { std::cout.write(s.begin, s.end - s.begin); }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ safe_index()

char* safe_index ( char *  start,
char  v,
char *  max 
)
inline

Definition at line 61 of file parse_primitives.h.

References escaped_tokenize().

Referenced by substring_to_example().

62 {
63  while (start != max && *start != v) start++;
64  return start;
65 }

◆ substring_equal() [1/2]

bool substring_equal ( const substring a,
const substring b 
)

Definition at line 19 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by namedlabels::namedlabels(), operator==(), parse_dictionary_argument(), CB::parse_label(), COST_SENSITIVE::parse_label(), CCB::parse_label(), and tokenize().

20 {
21  return (a.end - a.begin == b.end - b.begin) // same length
22  && (strncmp(a.begin, b.begin, a.end - a.begin) == 0);
23 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ substring_equal() [2/2]

bool substring_equal ( const substring ss,
const char *  str 
)

Definition at line 25 of file parse_primitives.cc.

References substring::begin, and substring::end.

26 {
27  size_t len_ss = ss.end - ss.begin;
28  size_t len_str = strlen(str);
29  if (len_ss != len_str)
30  return false;
31  return (strncmp(ss.begin, str, len_ss) == 0);
32 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ substring_len()

size_t substring_len ( substring s)

Definition at line 34 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by VW::cb_sample_data::learn_or_predict(), and tokenize().

34 { return s.end - s.begin; }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ tokenize()

template<typename ContainerT >
void tokenize ( char  delim,
substring  s,
ContainerT &  ret,
bool  allow_empty = false 
)

Definition at line 27 of file parse_primitives.h.

References a, substring::begin, substring::end, operator!=(), operator==(), substring_equal(), and substring_len().

Referenced by COST_SENSITIVE::name_value(), namedlabels::namedlabels(), VW::parse_example_label(), MULTILABEL::parse_label(), CB::parse_label(), CCB::parse_label(), Search::parse_neighbor_features(), CCB::parse_outcome(), substring_to_example(), and VW::to_argv().

28 {
29  ret.clear();
30  char* last = s.begin;
31  for (; s.begin != s.end; s.begin++)
32  {
33  if (*s.begin == delim)
34  {
35  if (allow_empty || (s.begin != last))
36  {
37  substring temp = {last, s.begin};
38  ret.push_back(temp);
39  }
40  last = s.begin + 1;
41  }
42  }
43  if (allow_empty || (s.begin != last))
44  {
45  substring final_substring = {last, s.begin};
46  ret.push_back(final_substring);
47  }
48 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9