Vowpal Wabbit
Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Private Attributes | Static Private Attributes | List of all members
io_buf Class Reference

#include <io_buf.h>

Inheritance diagram for io_buf:
comp_io_buf memory_io_buf

Public Member Functions

void verify_hash (bool verify)
 
uint32_t hash ()
 
virtual int open_file (const char *name, bool stdin_off)
 
virtual int open_file (const char *name, bool stdin_off, int flag=READ)
 
virtual void reset_file (int f)
 
 io_buf ()
 
virtual ~io_buf ()
 
void set (char *p)
 
virtual size_t num_files ()
 
virtual ssize_t read_file (int f, void *buf, size_t nbytes)
 
ssize_t fill (int f)
 
virtual ssize_t write_file (int f, const void *buf, size_t nbytes)
 
virtual void flush ()
 
virtual bool close_file ()
 
virtual bool compressed ()
 
void close_files ()
 
void buf_write (char *&pointer, size_t n)
 
size_t buf_read (char *&pointer, size_t n)
 
size_t bin_read_fixed (char *data, size_t len, const char *read_message)
 
size_t bin_write_fixed (const char *data, size_t len)
 

Static Public Member Functions

static ssize_t read_file_or_socket (int f, void *buf, size_t nbytes)
 
static ssize_t write_file_or_socket (int f, const void *buf, size_t nbytes)
 
static void close_file_or_socket (int f)
 
static bool is_socket (int f)
 

Public Attributes

v_array< char > space
 
v_array< int > files
 
size_t count
 
size_t current
 
char * head
 
v_array< char > currentname
 
v_array< char > finalname
 

Static Public Attributes

static constexpr int READ = 1
 
static constexpr int WRITE = 2
 

Private Attributes

bool _verify_hash
 
uint32_t _hash
 

Static Private Attributes

static constexpr size_t INITIAL_BUFF_SIZE = 1 << 16
 

Detailed Description

Definition at line 54 of file io_buf.h.

Constructor & Destructor Documentation

◆ io_buf()

io_buf::io_buf ( )
inline

Definition at line 147 of file io_buf.h.

References v_array< T >::begin(), and v_array< T >::resize().

147  : _verify_hash{false}, _hash{0}, count{0}, current{0}
148  {
149  space = v_init<char>();
150  files = v_init<int>();
151  currentname = v_init<char>();
152  finalname = v_init<char>();
154  head = space.begin();
155  }
void resize(size_t length)
Definition: v_array.h:69
size_t current
Definition: io_buf.h:66
static constexpr size_t INITIAL_BUFF_SIZE
Definition: io_buf.h:59
size_t count
Definition: io_buf.h:65
T *& begin()
Definition: v_array.h:42
v_array< char > finalname
Definition: io_buf.h:69
char * head
Definition: io_buf.h:67
v_array< int > files
Definition: io_buf.h:64
v_array< char > currentname
Definition: io_buf.h:68
v_array< char > space
Definition: io_buf.h:62
bool _verify_hash
Definition: io_buf.h:57
uint32_t _hash
Definition: io_buf.h:58

◆ ~io_buf()

virtual io_buf::~io_buf ( )
inlinevirtual

Definition at line 157 of file io_buf.h.

References v_array< T >::delete_v().

158  {
159  files.delete_v();
160  space.delete_v();
161  }
v_array< int > files
Definition: io_buf.h:64
v_array< char > space
Definition: io_buf.h:62
void delete_v()
Definition: v_array.h:98

Member Function Documentation

◆ bin_read_fixed()

size_t io_buf::bin_read_fixed ( char *  data,
size_t  len,
const char *  read_message 
)
inline

Definition at line 230 of file io_buf.h.

References buf_read(), THROW, and uniform_hash().

Referenced by bin_read(), bin_text_read_write_fixed(), save_load(), save_load_flat_example(), save_load_header(), GD::save_load_online_state(), GD::save_load_regressor(), save_load_regularizer(), and save_load_sampling().

231  {
232  if (len > 0)
233  {
234  char* p;
235  // if the model is corrupt the number of bytes can be less then specified (as there isn't enought data available
236  // in the file)
237  len = buf_read(p, len);
238 
239  // compute hash for check-sum
240  if (_verify_hash)
241  _hash = (uint32_t)uniform_hash(p, len, _hash);
242 
243  if (*read_message == '\0')
244  memcpy(data, p, len);
245  else if (memcmp(data, p, len) != 0)
246  THROW(read_message);
247  return len;
248  }
249  return 0;
250  }
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
bool _verify_hash
Definition: io_buf.h:57
uint32_t _hash
Definition: io_buf.h:58
#define THROW(args)
Definition: vw_exception.h:181
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ bin_write_fixed()

size_t io_buf::bin_write_fixed ( const char *  data,
size_t  len 
)
inline

Definition at line 252 of file io_buf.h.

References buf_write(), isbinary(), readto(), and uniform_hash().

Referenced by audit_regressor_feature(), audit_regressor_lda(), bin_text_write(), bin_text_write_fixed(), bin_write(), and save_load_flat_example().

253  {
254  if (len > 0)
255  {
256  char* p;
257  buf_write(p, len);
258 
259  memcpy(p, data, len);
260 
261  // compute hash for check-sum
262  if (_verify_hash)
263  _hash = (uint32_t)uniform_hash(p, len, _hash);
264  }
265  return len;
266  }
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
bool _verify_hash
Definition: io_buf.h:57
uint32_t _hash
Definition: io_buf.h:58

◆ buf_read()

size_t io_buf::buf_read ( char *&  pointer,
size_t  n 
)

Definition at line 12 of file io_buf.cc.

References v_array< T >::begin(), current, v_array< T >::end(), files, fill(), head, v_array< T >::size(), and space.

Referenced by bin_read_fixed(), MULTILABEL::bufread_label(), CB::bufread_label(), COST_SENSITIVE::bufread_label(), close_files(), read_cached_features(), MULTICLASS::read_cached_label(), CCB::read_cached_label(), MULTILABEL::read_cached_label(), CB::read_cached_label(), COST_SENSITIVE::read_cached_label(), CB_EVAL::read_cached_label(), and read_cached_simple_label().

13 {
14  // return a pointer to the next n bytes. n must be smaller than the maximum size.
15  if (head + n <= space.end())
16  {
17  pointer = head;
18  head += n;
19  return n;
20  }
21  else // out of bytes, so refill.
22  {
23  if (head != space.begin()) // There exists room to shift.
24  {
25  // Out of buffer so swap to beginning.
26  size_t left = space.end() - head;
27  memmove(space.begin(), head, left);
28  head = space.begin();
29  space.end() = space.begin() + left;
30  }
31  if (fill(files[current]) > 0) // read more bytes from current file if present
32  return buf_read(pointer, n); // more bytes are read.
33  else if (++current < files.size())
34  return buf_read(pointer, n); // No more bytes, so go to next file and try again.
35  else
36  {
37  // no more bytes to read, return all that we have left.
38  pointer = head;
39  head = space.end();
40  return space.end() - pointer;
41  }
42  }
43 }
size_t current
Definition: io_buf.h:66
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
char * head
Definition: io_buf.h:67
ssize_t fill(int f)
Definition: io_buf.h:171
v_array< int > files
Definition: io_buf.h:64
v_array< char > space
Definition: io_buf.h:62
T *& end()
Definition: v_array.h:43
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ buf_write()

void io_buf::buf_write ( char *&  pointer,
size_t  n 
)

Definition at line 94 of file io_buf.cc.

References v_array< T >::begin(), v_array< T >::end(), v_array< T >::end_array, flush(), head, v_array< T >::resize(), and space.

Referenced by bin_write_fixed(), MULTICLASS::cache_label(), MULTILABEL::cache_label(), CB::cache_label(), COST_SENSITIVE::cache_label(), CCB::cache_label(), CB_EVAL::cache_label(), cache_simple_label(), cache_tag(), close_files(), output_byte(), and output_features().

95 {
96  // return a pointer to the next n bytes to write into.
97  if (head + n <= space.end_array)
98  {
99  pointer = head;
100  head += n;
101  }
102  else // Time to dump the file
103  {
104  if (head != space.begin())
105  flush();
106  else // Array is short, so increase size.
107  {
108  space.resize(2 * (space.end_array - space.begin()));
109  space.end() = space.begin();
110  head = space.begin();
111  }
112  buf_write(pointer, n);
113  }
114 }
void resize(size_t length)
Definition: v_array.h:69
T *& begin()
Definition: v_array.h:42
char * head
Definition: io_buf.h:67
virtual void flush()
Definition: io_buf.h:194
v_array< char > space
Definition: io_buf.h:62
T *& end()
Definition: v_array.h:43
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
T * end_array
Definition: v_array.h:38

◆ close_file()

virtual bool io_buf::close_file ( )
inlinevirtual

Reimplemented in memory_io_buf, and comp_io_buf.

Definition at line 204 of file io_buf.h.

References close_file_or_socket(), v_array< T >::empty(), and v_array< T >::pop().

Referenced by close_files(), dump_regressor(), end_examples(), load_input_model(), parse_cache(), parse_dictionary_argument(), parse_mask_regressor_args(), parse_sources(), and reset_source().

205  {
206  if (!files.empty())
207  {
209  return true;
210  }
211  return false;
212  }
T pop()
Definition: v_array.h:58
static void close_file_or_socket(int f)
Definition: io_buf.cc:152
v_array< int > files
Definition: io_buf.h:64
bool empty() const
Definition: v_array.h:59

◆ close_file_or_socket()

void io_buf::close_file_or_socket ( int  f)
static

Definition at line 152 of file io_buf.cc.

References is_socket().

Referenced by close_file(), compressed(), VW::finish(), and reset_source().

153 {
154 #ifdef _WIN32
155  if (io_buf::is_socket(f))
156  closesocket(f);
157  else
158  _close(f);
159 #else
160  close(f);
161 #endif
162 }
static bool is_socket(int f)
Definition: io_buf.cc:116
float f
Definition: cache.cc:40

◆ close_files()

void io_buf::close_files ( )
inline

Definition at line 218 of file io_buf.h.

References buf_read(), buf_write(), close_file(), and is_socket().

Referenced by finalize_source().

219  {
220  while (close_file())
221  ;
222  }
virtual bool close_file()
Definition: io_buf.h:204

◆ compressed()

virtual bool io_buf::compressed ( )
inlinevirtual

Reimplemented in comp_io_buf.

Definition at line 214 of file io_buf.h.

References close_file_or_socket().

Referenced by reset_source().

214 { return false; }

◆ fill()

ssize_t io_buf::fill ( int  f)
inline

Definition at line 171 of file io_buf.h.

References v_array< T >::begin(), v_array< T >::end(), v_array< T >::end_array, read_file(), and v_array< T >::resize().

Referenced by buf_read(), isbinary(), and readto().

172  { // if the loaded values have reached the allocated space
173  if (space.end_array - space.end() == 0)
174  { // reallocate to twice as much space
175  size_t head_loc = head - space.begin();
176  space.resize(2 * (space.end_array - space.begin()));
177  head = space.begin() + head_loc;
178  }
179  // read more bytes from file up to the remaining allocated space
180  ssize_t num_read = read_file(f, space.end(), space.end_array - space.end());
181  if (num_read >= 0)
182  { // if some bytes were actually loaded, update the end of loaded values
183  space.end() = space.end() + num_read;
184  return num_read;
185  }
186  else
187  return 0;
188  }
void resize(size_t length)
Definition: v_array.h:69
T *& begin()
Definition: v_array.h:42
virtual ssize_t read_file(int f, void *buf, size_t nbytes)
Definition: io_buf.h:167
char * head
Definition: io_buf.h:67
v_array< char > space
Definition: io_buf.h:62
T *& end()
Definition: v_array.h:43
float f
Definition: cache.cc:40
T * end_array
Definition: v_array.h:38

◆ flush()

virtual void io_buf::flush ( )
inlinevirtual

Reimplemented in comp_io_buf.

Definition at line 194 of file io_buf.h.

References v_array< T >::begin(), v_array< T >::empty(), and write_file().

Referenced by buf_write(), dump_regressor(), end_examples(), reset_source(), and send_features().

195  {
196  if (!files.empty())
197  {
198  if (write_file(files[0], space.begin(), head - space.begin()) != (int)(head - space.begin()))
199  std::cerr << "error, failed to write example\n";
200  head = space.begin();
201  }
202  }
T *& begin()
Definition: v_array.h:42
char * head
Definition: io_buf.h:67
v_array< int > files
Definition: io_buf.h:64
v_array< char > space
Definition: io_buf.h:62
bool empty() const
Definition: v_array.h:59
virtual ssize_t write_file(int f, const void *buf, size_t nbytes)
Definition: io_buf.h:190

◆ hash()

uint32_t io_buf::hash ( )
inline

Definition at line 83 of file io_buf.h.

References _hash, and THROW.

Referenced by save_load_header().

84  {
85  if (!_verify_hash)
86  THROW("HASH WAS NOT CALCULATED");
87  return _hash;
88  }
bool _verify_hash
Definition: io_buf.h:57
uint32_t _hash
Definition: io_buf.h:58
#define THROW(args)
Definition: vw_exception.h:181

◆ is_socket()

bool io_buf::is_socket ( int  f)
static

Definition at line 116 of file io_buf.cc.

Referenced by close_file_or_socket(), close_files(), read_file_or_socket(), and write_file_or_socket().

117 {
118  // this appears to work in practice, but could probably be done in a cleaner fashion
119 #ifdef _WIN32
120  const int _nhandle = _getmaxstdio() / 2;
121  return f >= _nhandle;
122 #else
123  const int _nhandle = 32;
124  return f >= _nhandle;
125 #endif
126 }
float f
Definition: cache.cc:40

◆ num_files()

virtual size_t io_buf::num_files ( )
inlinevirtual

Reimplemented in comp_io_buf.

Definition at line 165 of file io_buf.h.

References v_array< T >::size().

Referenced by reset_source().

165 { return files.size(); }
size_t size() const
Definition: v_array.h:68
v_array< int > files
Definition: io_buf.h:64

◆ open_file() [1/2]

virtual int io_buf::open_file ( const char *  name,
bool  stdin_off 
)
inlinevirtual

Definition at line 90 of file io_buf.h.

References open_file().

Referenced by dump_regressor(), enable_sources(), make_write_cache(), open_file(), parse_cache(), parse_dictionary_argument(), parse_mask_regressor_args(), read_regressor_file(), and reset_source().

90 { return open_file(name, stdin_off, READ); }
static constexpr int READ
Definition: io_buf.h:71
virtual int open_file(const char *name, bool stdin_off)
Definition: io_buf.h:90

◆ open_file() [2/2]

virtual int io_buf::open_file ( const char *  name,
bool  stdin_off,
int  flag = READ 
)
inlinevirtual

Reimplemented in comp_io_buf.

Definition at line 92 of file io_buf.h.

References O_LARGEFILE, v_array< T >::push_back(), READ, THROWERRNO, and WRITE.

93  {
94  int ret = -1;
95  switch (flag)
96  {
97  case READ:
98  if (*name != '\0')
99  {
100 #ifdef _WIN32
101  // _O_SEQUENTIAL hints to OS that we'll be reading sequentially, so cache aggressively.
102  _sopen_s(&ret, name, _O_RDONLY | _O_BINARY | _O_SEQUENTIAL, _SH_DENYWR, 0);
103 #else
104  ret = open(name, O_RDONLY | O_LARGEFILE);
105 #endif
106  }
107  else if (!stdin_off)
108 #ifdef _WIN32
109  ret = _fileno(stdin);
110 #else
111  ret = fileno(stdin);
112 #endif
113  if (ret != -1)
114  files.push_back(ret);
115  break;
116 
117  case WRITE:
118 #ifdef _WIN32
119  _sopen_s(&ret, name, _O_CREAT | _O_WRONLY | _O_BINARY | _O_TRUNC, _SH_DENYWR, _S_IREAD | _S_IWRITE);
120 #else
121  ret = open(name, O_CREAT | O_WRONLY | O_LARGEFILE | O_TRUNC, 0666);
122 #endif
123  if (ret != -1)
124  files.push_back(ret);
125  break;
126 
127  default:
128  std::cerr << "Unknown file operation. Something other than READ/WRITE specified" << std::endl;
129  ret = -1;
130  }
131  if (ret == -1 && *name != '\0')
132  THROWERRNO("can't open: " << name);
133  return ret;
134  }
#define O_LARGEFILE
Definition: io_buf.h:24
static constexpr int WRITE
Definition: io_buf.h:72
static constexpr int READ
Definition: io_buf.h:71
void push_back(const T &new_ele)
Definition: v_array.h:107
v_array< int > files
Definition: io_buf.h:64
#define THROWERRNO(args)
Definition: vw_exception.h:167

◆ read_file()

virtual ssize_t io_buf::read_file ( int  f,
void *  buf,
size_t  nbytes 
)
inlinevirtual

Reimplemented in memory_io_buf, and comp_io_buf.

Definition at line 167 of file io_buf.h.

References f, and read_file_or_socket().

Referenced by cache_numbits(), fill(), hash_file_contents(), and parse_dictionary_argument().

167 { return read_file_or_socket(f, buf, nbytes); }
static ssize_t read_file_or_socket(int f, void *buf, size_t nbytes)
Definition: io_buf.cc:128
float f
Definition: cache.cc:40

◆ read_file_or_socket()

ssize_t io_buf::read_file_or_socket ( int  f,
void *  buf,
size_t  nbytes 
)
static

Definition at line 128 of file io_buf.cc.

References is_socket().

Referenced by read_file().

129 {
130 #ifdef _WIN32
131  if (is_socket(f))
132  return recv(f, reinterpret_cast<char*>(buf), static_cast<int>(nbytes), 0);
133  else
134  return _read(f, buf, (unsigned int)nbytes);
135 #else
136  return read(f, buf, (unsigned int)nbytes);
137 #endif
138 }
static bool is_socket(int f)
Definition: io_buf.cc:116
float f
Definition: cache.cc:40

◆ reset_file()

virtual void io_buf::reset_file ( int  f)
inlinevirtual

Reimplemented in comp_io_buf.

Definition at line 136 of file io_buf.h.

References v_array< T >::begin(), and v_array< T >::end().

Referenced by reset_source().

137  {
138 #ifdef _WIN32
139  _lseek(f, 0, SEEK_SET);
140 #else
141  lseek(f, 0, SEEK_SET);
142 #endif
143  space.end() = space.begin();
144  head = space.begin();
145  }
T *& begin()
Definition: v_array.h:42
char * head
Definition: io_buf.h:67
v_array< char > space
Definition: io_buf.h:62
T *& end()
Definition: v_array.h:43
float f
Definition: cache.cc:40

◆ set()

void io_buf::set ( char *  p)
inline

Definition at line 163 of file io_buf.h.

Referenced by cache_tag(), output_byte(), output_features(), and read_cached_features().

163 { head = p; }
char * head
Definition: io_buf.h:67

◆ verify_hash()

void io_buf::verify_hash ( bool  verify)
inline

Definition at line 74 of file io_buf.h.

Referenced by save_load_header().

75  {
76  _verify_hash = verify;
77  // reset the hash so that the io_buf can be re-used for loading
78  // as it is done for Reload()
79  if (!verify)
80  _hash = 0;
81  }
bool _verify_hash
Definition: io_buf.h:57
uint32_t _hash
Definition: io_buf.h:58

◆ write_file()

virtual ssize_t io_buf::write_file ( int  f,
const void *  buf,
size_t  nbytes 
)
inlinevirtual

Reimplemented in memory_io_buf, and comp_io_buf.

Definition at line 190 of file io_buf.h.

References write_file_or_socket().

Referenced by flush(), and make_write_cache().

190 { return write_file_or_socket(f, buf, nbytes); }
static ssize_t write_file_or_socket(int f, const void *buf, size_t nbytes)
Definition: io_buf.cc:140
float f
Definition: cache.cc:40

◆ write_file_or_socket()

ssize_t io_buf::write_file_or_socket ( int  f,
const void *  buf,
size_t  nbytes 
)
static

Definition at line 140 of file io_buf.cc.

References is_socket().

Referenced by active_print_result(), confidence_print_result(), CB_ADF::global_print_newline(), CSOAA::global_print_newline(), ACTION_SCORE::print_action_score(), CCB::print_decision_scores(), print_raw_text(), print_result(), MWT::print_scalars(), and write_file().

141 {
142 #ifdef _WIN32
143  if (is_socket(f))
144  return send(f, reinterpret_cast<const char*>(buf), static_cast<int>(nbytes), 0);
145  else
146  return _write(f, buf, (unsigned int)nbytes);
147 #else
148  return write(f, buf, (unsigned int)nbytes);
149 #endif
150 }
static bool is_socket(int f)
Definition: io_buf.cc:116
float f
Definition: cache.cc:40

Member Data Documentation

◆ _hash

uint32_t io_buf::_hash
private

Definition at line 58 of file io_buf.h.

Referenced by hash().

◆ _verify_hash

bool io_buf::_verify_hash
private

Definition at line 57 of file io_buf.h.

◆ count

size_t io_buf::count

Definition at line 65 of file io_buf.h.

Referenced by enable_sources().

◆ current

size_t io_buf::current

Definition at line 66 of file io_buf.h.

Referenced by buf_read(), enable_sources(), isbinary(), readto(), and reset_source().

◆ currentname

v_array<char> io_buf::currentname

Definition at line 68 of file io_buf.h.

Referenced by free_parser(), make_write_cache(), and reset_source().

◆ files

v_array<int> io_buf::files

◆ finalname

v_array<char> io_buf::finalname

Definition at line 69 of file io_buf.h.

Referenced by free_parser(), make_write_cache(), and reset_source().

◆ head

char* io_buf::head

◆ INITIAL_BUFF_SIZE

constexpr size_t io_buf::INITIAL_BUFF_SIZE = 1 << 16
staticprivate

Definition at line 59 of file io_buf.h.

◆ READ

constexpr int io_buf::READ = 1
static

◆ space

v_array<char> io_buf::space

◆ WRITE

constexpr int io_buf::WRITE = 2
static

The documentation for this class was generated from the following files: