Vowpal Wabbit
parse_primitives.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #pragma once
7 #include <cmath>
8 #include <string>
9 #include <vector>
10 #include <iostream>
11 #include <stdint.h>
12 #include <math.h>
13 #include "v_array.h"
14 #include "hashstring.h"
15 
16 #ifdef _WIN32
17 #define NOMINMAX
18 #include <WinSock2.h>
19 #include <Windows.h>
20 #endif
21 
22 std::ostream& operator<<(std::ostream& os, const substring& ss);
23 std::ostream& operator<<(std::ostream& os, const v_array<substring>& ss);
24 
25 // chop up the string into a v_array or any compatible container of substring.
26 template <typename ContainerT>
27 void tokenize(char delim, substring s, ContainerT& ret, bool allow_empty = false)
28 {
29  ret.clear();
30  char* last = s.begin;
31  for (; s.begin != s.end; s.begin++)
32  {
33  if (*s.begin == delim)
34  {
35  if (allow_empty || (s.begin != last))
36  {
37  substring temp = {last, s.begin};
38  ret.push_back(temp);
39  }
40  last = s.begin + 1;
41  }
42  }
43  if (allow_empty || (s.begin != last))
44  {
45  substring final_substring = {last, s.begin};
46  ret.push_back(final_substring);
47  }
48 }
49 
50 bool substring_equal(const substring& a, const substring& b);
51 bool substring_equal(const substring& ss, const char* str);
52 
53 bool operator==(const substring& ss, const char* str);
54 bool operator==(const char* str, const substring& ss);
55 bool operator==(const substring& ss1, const substring& ss2);
56 bool operator!=(const substring& ss, const char* str);
57 bool operator!=(const char* str, const substring& ss);
58 bool operator!=(const substring& ss1, const substring& ss2);
59 size_t substring_len(substring& s);
60 
61 inline char* safe_index(char* start, char v, char* max)
62 {
63  while (start != max && *start != v) start++;
64  return start;
65 }
66 
67 // Note this will destructively parse the passed in substring as it replaces delimiters with '\0'
68 std::vector<substring> escaped_tokenize(char delim, substring s, bool allow_empty = false);
69 
70 inline void print_substring(substring s) { std::cout.write(s.begin, s.end - s.begin); }
71 
72 // can't type as it forces C++/CLI part to include rapidjson, which leads to name clashes...
73 struct example;
74 namespace VW
75 {
76 typedef example& (*example_factory_t)(void*);
77 }
78 
79 typedef uint64_t (*hash_func_t)(substring, uint64_t);
80 
81 hash_func_t getHasher(const std::string& s);
82 
83 // The following function is a home made strtof. The
84 // differences are :
85 // - much faster (around 50% but depends on the string to parse)
86 // - less error control, but utilised inside a very strict parser
87 // in charge of error detection.
88 inline float parseFloat(char* p, char** end, char* endLine = nullptr)
89 {
90  char* start = p;
91  bool endLine_is_null = endLine == nullptr;
92 
93  if (!*p)
94  {
95  *end = p;
96  return 0;
97  }
98  int s = 1;
99  while ((*p == ' ') && (endLine_is_null || p < endLine)) p++;
100 
101  if (*p == '-')
102  {
103  s = -1;
104  p++;
105  }
106 
107  float acc = 0;
108  while (*p >= '0' && *p <= '9' && (endLine_is_null || p < endLine)) acc = acc * 10 + *p++ - '0';
109 
110  int num_dec = 0;
111  if (*p == '.')
112  {
113  while (*(++p) >= '0' && *p <= '9' && (endLine_is_null || p < endLine))
114  {
115  if (num_dec < 35)
116  {
117  acc = acc * 10 + (*p - '0');
118  num_dec++;
119  }
120  }
121  }
122 
123  int exp_acc = 0;
124  if ((*p == 'e' || *p == 'E') && (endLine_is_null || p < endLine))
125  {
126  p++;
127  int exp_s = 1;
128  if (*p == '-' && (endLine_is_null || p < endLine))
129  {
130  exp_s = -1;
131  p++;
132  }
133  while (*p >= '0' && *p <= '9' && (endLine_is_null || p < endLine)) exp_acc = exp_acc * 10 + *p++ - '0';
134  exp_acc *= exp_s;
135  }
136  if (*p == ' ' || *p == '\n' || *p == '\t' || p == endLine) // easy case succeeded.
137  {
138  acc *= powf(10, (float)(exp_acc - num_dec));
139  *end = p;
140  return s * acc;
141  }
142  else
143  return (float)strtod(start, end);
144 }
145 
147 {
148  char* endptr = s.end;
149  float f = parseFloat(s.begin, &endptr);
150  if ((endptr == s.begin && s.begin != s.end) || std::isnan(f))
151  {
152  std::cout << "warning: " << std::string(s.begin, s.end - s.begin).c_str()
153  << " is not a good float, replacing with 0" << std::endl;
154  f = 0;
155  }
156  return f;
157 }
158 
160 {
161  char* endptr = s.end;
162  int i = strtol(s.begin, &endptr, 10);
163  if (endptr == s.begin && s.begin != s.end)
164  {
165  std::cout << "warning: " << std::string(s.begin, s.end - s.begin).c_str() << " is not a good int, replacing with 0"
166  << std::endl;
167  i = 0;
168  }
169 
170  return i;
171 }
bool substring_equal(const substring &a, const substring &b)
int int_of_substring(substring s)
std::vector< substring > escaped_tokenize(char delim, substring s, bool allow_empty=false)
size_t substring_len(substring &s)
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
bool operator==(const substring &ss, const char *str)
hash_func_t getHasher(const std::string &s)
bool operator!=(const substring &ss, const char *str)
float float_of_substring(substring s)
T powf(T, T)
Definition: lda_core.cc:428
char * safe_index(char *start, char v, char *max)
void tokenize(char delim, substring s, ContainerT &ret, bool allow_empty=false)
std::ostream & operator<<(std::ostream &os, const substring &ss)
constexpr uint64_t a
Definition: rand48.cc:11
Definition: autolink.cc:11
void print_substring(substring s)
float parseFloat(char *p, char **end, char *endLine=nullptr)
float f
Definition: cache.cc:40
uint64_t(* hash_func_t)(substring, uint64_t)