Vowpal Wabbit
Functions
parse_primitives.cc File Reference
#include <iostream>
#include <strings.h>
#include <stdexcept>
#include <sstream>
#include "parse_primitives.h"
#include "hash.h"
#include "vw_exception.h"

Go to the source code of this file.

Functions

bool substring_equal (const substring &a, const substring &b)
 
bool substring_equal (const substring &ss, const char *str)
 
size_t substring_len (substring &s)
 
hash_func_t getHasher (const std::string &s)
 
bool operator== (const substring &ss, const char *str)
 
bool operator== (const char *str, const substring &ss)
 
bool operator== (const substring &ss1, const substring &ss2)
 
bool operator!= (const substring &ss, const char *str)
 
bool operator!= (const char *str, const substring &ss)
 
bool operator!= (const substring &ss1, const substring &ss2)
 
std::vector< substringescaped_tokenize (char delim, substring s, bool allow_empty)
 
std::ostream & operator<< (std::ostream &os, const substring &ss)
 
std::ostream & operator<< (std::ostream &os, const v_array< substring > &ss)
 

Function Documentation

◆ escaped_tokenize()

std::vector<substring> escaped_tokenize ( char  delim,
substring  s,
bool  allow_empty 
)

Definition at line 58 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by safe_index(), and VW::to_argv_escaped().

59 {
60  std::vector<substring> tokens;
61  substring current;
62  current.begin = s.begin;
63  bool in_escape = false;
64  char* reading_head = s.begin;
65  char* writing_head = s.begin;
66 
67  while (reading_head < s.end)
68  {
69  char current_character = *reading_head++;
70 
71  if (in_escape)
72  {
73  *writing_head++ = current_character;
74  in_escape = false;
75  }
76  else
77  {
78  if (current_character == delim)
79  {
80  current.end = writing_head++;
81  *current.end = '\0';
82  if (current.begin != current.end || allow_empty)
83  {
84  tokens.push_back(current);
85  }
86 
87  // Regardless of whether the token was saved, we need to reset the current token.
88  current.begin = writing_head;
89  current.end = writing_head;
90  }
91  else if (current_character == '\\')
92  {
93  in_escape = !in_escape;
94  }
95  else
96  {
97  *writing_head++ = current_character;
98  }
99  }
100  }
101 
102  current.end = writing_head;
103  *current.end = '\0';
104  if (current.begin != current.end || allow_empty)
105  {
106  tokens.push_back(current);
107  }
108 
109  return tokens;
110 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ getHasher()

hash_func_t getHasher ( const std::string &  s)

Definition at line 36 of file parse_primitives.cc.

References hashall(), hashstring(), and THROW.

Referenced by VW::hash_feature_static(), VW::hash_space_static(), and parse_feature_tweaks().

37 {
38  if (s == "strings")
39  return hashstring;
40  else if (s == "all")
41  return hashall;
42  else
43  THROW("Unknown hash function: " << s);
44 }
VW_STD14_CONSTEXPR uint64_t hashall(substring s, uint64_t h)
Definition: hashstring.h:13
VW_STD14_CONSTEXPR uint64_t hashstring(substring s, uint64_t h)
Definition: hashstring.h:18
#define THROW(args)
Definition: vw_exception.h:181

◆ operator!=() [1/3]

bool operator!= ( const substring ss,
const char *  str 
)

Definition at line 52 of file parse_primitives.cc.

Referenced by tokenize().

52 { return !(ss == str); }

◆ operator!=() [2/3]

bool operator!= ( const char *  str,
const substring ss 
)

Definition at line 54 of file parse_primitives.cc.

54 { return !(ss == str); }

◆ operator!=() [3/3]

bool operator!= ( const substring ss1,
const substring ss2 
)

Definition at line 56 of file parse_primitives.cc.

56 { return !(ss1 == ss2); }

◆ operator<<() [1/2]

std::ostream& operator<< ( std::ostream &  os,
const substring ss 
)

Definition at line 112 of file parse_primitives.cc.

References substring::begin, and substring::end.

113 {
114  std::string s(ss.begin, ss.end - ss.begin);
115  return os << s;
116 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ operator<<() [2/2]

std::ostream& operator<< ( std::ostream &  os,
const v_array< substring > &  ss 
)

Definition at line 118 of file parse_primitives.cc.

119 {
120  substring* it = ss.cbegin();
121 
122  if (it == ss.cend())
123  {
124  return os;
125  }
126 
127  os << *it;
128 
129  for (it++; it != ss.cend(); it++)
130  {
131  os << ",";
132  os << *it;
133  }
134 
135  return os;
136 }
T * cbegin() const
Definition: v_array.h:48
T * cend() const
Definition: v_array.h:49

◆ operator==() [1/3]

bool operator== ( const substring ss,
const char *  str 
)

Definition at line 46 of file parse_primitives.cc.

References substring_equal().

Referenced by tokenize().

46 { return substring_equal(ss, str); }
bool substring_equal(const substring &a, const substring &b)

◆ operator==() [2/3]

bool operator== ( const char *  str,
const substring ss 
)

Definition at line 48 of file parse_primitives.cc.

References substring_equal().

48 { return substring_equal(ss, str); }
bool substring_equal(const substring &a, const substring &b)

◆ operator==() [3/3]

bool operator== ( const substring ss1,
const substring ss2 
)

Definition at line 50 of file parse_primitives.cc.

References substring_equal().

50 { return substring_equal(ss1, ss2); }
bool substring_equal(const substring &a, const substring &b)

◆ substring_equal() [1/2]

bool substring_equal ( const substring a,
const substring b 
)

Definition at line 19 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by namedlabels::namedlabels(), operator==(), parse_dictionary_argument(), CB::parse_label(), COST_SENSITIVE::parse_label(), CCB::parse_label(), and tokenize().

20 {
21  return (a.end - a.begin == b.end - b.begin) // same length
22  && (strncmp(a.begin, b.begin, a.end - a.begin) == 0);
23 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ substring_equal() [2/2]

bool substring_equal ( const substring ss,
const char *  str 
)

Definition at line 25 of file parse_primitives.cc.

References substring::begin, and substring::end.

26 {
27  size_t len_ss = ss.end - ss.begin;
28  size_t len_str = strlen(str);
29  if (len_ss != len_str)
30  return false;
31  return (strncmp(ss.begin, str, len_ss) == 0);
32 }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9

◆ substring_len()

size_t substring_len ( substring s)

Definition at line 34 of file parse_primitives.cc.

References substring::begin, and substring::end.

Referenced by VW::cb_sample_data::learn_or_predict(), and tokenize().

34 { return s.end - s.begin; }
char * end
Definition: hashstring.h:10
char * begin
Definition: hashstring.h:9