Vowpal Wabbit
parse_primitives.cc
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include <iostream>
7 #ifndef WIN32
8 #include <strings.h>
9 #else
10 #include <string>
11 #endif
12 #include <stdexcept>
13 #include <sstream>
14 
15 #include "parse_primitives.h"
16 #include "hash.h"
17 #include "vw_exception.h"
18 
19 bool substring_equal(const substring& a, const substring& b)
20 {
21  return (a.end - a.begin == b.end - b.begin) // same length
22  && (strncmp(a.begin, b.begin, a.end - a.begin) == 0);
23 }
24 
25 bool substring_equal(const substring& ss, const char* str)
26 {
27  size_t len_ss = ss.end - ss.begin;
28  size_t len_str = strlen(str);
29  if (len_ss != len_str)
30  return false;
31  return (strncmp(ss.begin, str, len_ss) == 0);
32 }
33 
34 size_t substring_len(substring& s) { return s.end - s.begin; }
35 
36 hash_func_t getHasher(const std::string& s)
37 {
38  if (s == "strings")
39  return hashstring;
40  else if (s == "all")
41  return hashall;
42  else
43  THROW("Unknown hash function: " << s);
44 }
45 
46 bool operator==(const substring& ss, const char* str) { return substring_equal(ss, str); }
47 
48 bool operator==(const char* str, const substring& ss) { return substring_equal(ss, str); }
49 
50 bool operator==(const substring& ss1, const substring& ss2) { return substring_equal(ss1, ss2); }
51 
52 bool operator!=(const substring& ss, const char* str) { return !(ss == str); }
53 
54 bool operator!=(const char* str, const substring& ss) { return !(ss == str); }
55 
56 bool operator!=(const substring& ss1, const substring& ss2) { return !(ss1 == ss2); }
57 
58 std::vector<substring> escaped_tokenize(char delim, substring s, bool allow_empty)
59 {
60  std::vector<substring> tokens;
61  substring current;
62  current.begin = s.begin;
63  bool in_escape = false;
64  char* reading_head = s.begin;
65  char* writing_head = s.begin;
66 
67  while (reading_head < s.end)
68  {
69  char current_character = *reading_head++;
70 
71  if (in_escape)
72  {
73  *writing_head++ = current_character;
74  in_escape = false;
75  }
76  else
77  {
78  if (current_character == delim)
79  {
80  current.end = writing_head++;
81  *current.end = '\0';
82  if (current.begin != current.end || allow_empty)
83  {
84  tokens.push_back(current);
85  }
86 
87  // Regardless of whether the token was saved, we need to reset the current token.
88  current.begin = writing_head;
89  current.end = writing_head;
90  }
91  else if (current_character == '\\')
92  {
93  in_escape = !in_escape;
94  }
95  else
96  {
97  *writing_head++ = current_character;
98  }
99  }
100  }
101 
102  current.end = writing_head;
103  *current.end = '\0';
104  if (current.begin != current.end || allow_empty)
105  {
106  tokens.push_back(current);
107  }
108 
109  return tokens;
110 }
111 
112 std::ostream& operator<<(std::ostream& os, const substring& ss)
113 {
114  std::string s(ss.begin, ss.end - ss.begin);
115  return os << s;
116 }
117 
118 std::ostream& operator<<(std::ostream& os, const v_array<substring>& ss)
119 {
120  substring* it = ss.cbegin();
121 
122  if (it == ss.cend())
123  {
124  return os;
125  }
126 
127  os << *it;
128 
129  for (it++; it != ss.cend(); it++)
130  {
131  os << ",";
132  os << *it;
133  }
134 
135  return os;
136 }
size_t substring_len(substring &s)
bool operator!=(const substring &ss, const char *str)
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9
bool operator==(const substring &ss, const char *str)
std::ostream & operator<<(std::ostream &os, const substring &ss)
hash_func_t getHasher(const std::string &s)
std::vector< substring > escaped_tokenize(char delim, substring s, bool allow_empty)
VW_STD14_CONSTEXPR uint64_t hashall(substring s, uint64_t h)
Definition: hashstring.h:13
constexpr uint64_t a
Definition: rand48.cc:11
VW_STD14_CONSTEXPR uint64_t hashstring(substring s, uint64_t h)
Definition: hashstring.h:18
bool substring_equal(const substring &a, const substring &b)
#define THROW(args)
Definition: vw_exception.h:181
uint64_t(* hash_func_t)(substring, uint64_t)