Vowpal Wabbit
Namespaces | Functions
cache.h File Reference
#include "v_array.h"
#include "io_buf.h"
#include "example.h"

Go to the source code of this file.

Namespaces

 VW
 

Functions

char * run_len_decode (char *p, size_t &i)
 
char * run_len_encode (char *p, size_t i)
 
int read_cached_features (vw *all, v_array< example *> &examples)
 
void cache_tag (io_buf &cache, v_array< char > tag)
 
void cache_features (io_buf &cache, example *ae, uint64_t mask)
 
void output_byte (io_buf &cache, unsigned char s)
 
void output_features (io_buf &cache, unsigned char index, features &fs, uint64_t mask)
 
uint32_t VW::convert (size_t number)
 

Function Documentation

◆ cache_features()

void cache_features ( io_buf cache,
example ae,
uint64_t  mask 
)

Definition at line 203 of file cache.cc.

References cache_tag(), example_predict::feature_space, example_predict::indices, output_byte(), output_features(), v_array< T >::size(), and example::tag.

Referenced by VW::setup_example().

204 {
205  cache_tag(cache, ae->tag);
206  output_byte(cache, (unsigned char)ae->indices.size());
207 
208  for (namespace_index ns : ae->indices) output_features(cache, ns, ae->feature_space[ns], mask);
209 }
v_array< char > tag
Definition: example.h:63
v_array< namespace_index > indices
void output_byte(io_buf &cache, unsigned char s)
Definition: cache.cc:144
size_t size() const
Definition: v_array.h:68
std::array< features, NUM_NAMESPACES > feature_space
void output_features(io_buf &cache, unsigned char index, features &fs, uint64_t mask)
Definition: cache.cc:153
unsigned char namespace_index
void cache_tag(io_buf &cache, v_array< char > tag)
Definition: cache.cc:192

◆ cache_tag()

void cache_tag ( io_buf cache,
v_array< char >  tag 
)

Definition at line 192 of file cache.cc.

References v_array< T >::begin(), io_buf::buf_write(), c, io_buf::set(), and v_array< T >::size().

Referenced by cache_features(), and learn().

193 {
194  char* c;
195  cache.buf_write(c, sizeof(size_t) + tag.size());
196  *(size_t*)c = tag.size();
197  c += sizeof(size_t);
198  memcpy(c, tag.begin(), tag.size());
199  c += tag.size();
200  cache.set(c);
201 }
void set(char *p)
Definition: io_buf.h:163
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
constexpr uint64_t c
Definition: rand48.cc:12

◆ output_byte()

void output_byte ( io_buf cache,
unsigned char  s 
)

Definition at line 144 of file cache.cc.

References io_buf::buf_write(), c, and io_buf::set().

Referenced by cache_features(), and send_features().

145 {
146  char* c;
147 
148  cache.buf_write(c, 1);
149  *(c++) = s;
150  cache.set(c);
151 }
void set(char *p)
Definition: io_buf.h:163
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
constexpr uint64_t c
Definition: rand48.cc:12

◆ output_features()

void output_features ( io_buf cache,
unsigned char  index,
features fs,
uint64_t  mask 
)

Definition at line 153 of file cache.cc.

References io_buf::buf_write(), c, one_float::f, general, int_size, neg_1, run_len_encode(), io_buf::set(), features::size(), features::values, and ZigZagEncode().

Referenced by cache_features(), and send_features().

154 {
155  char* c;
156  size_t storage = fs.size() * int_size;
157  for (feature_value f : fs.values)
158  if (f != 1. && f != -1.)
159  storage += sizeof(feature_value);
160 
161  cache.buf_write(c, sizeof(index) + storage + sizeof(size_t));
162  *reinterpret_cast<unsigned char*>(c) = index;
163  c += sizeof(index);
164 
165  char* storage_size_loc = c;
166  c += sizeof(size_t);
167 
168  uint64_t last = 0;
169  for (features::iterator& f : fs)
170  {
171  feature_index fi = f.index() & mask;
172  int64_t s_diff = (fi - last);
173  uint64_t diff = ZigZagEncode(s_diff) << 2;
174  last = fi;
175 
176  if (f.value() == 1.)
177  c = run_len_encode(c, diff);
178  else if (f.value() == -1.)
179  c = run_len_encode(c, diff | neg_1);
180  else
181  {
182  c = run_len_encode(c, diff | general);
183  memcpy(c, &f.value(), sizeof(feature_value));
184  c += sizeof(feature_value);
185  }
186  }
187 
188  cache.set(c);
189  *(size_t*)storage_size_loc = c - storage_size_loc - sizeof(size_t);
190 }
uint64_t ZigZagEncode(int64_t n)
Definition: cache.cc:138
constexpr size_t int_size
Definition: cache.cc:11
float feature_value
Definition: feature_group.h:20
v_array< feature_value > values
void set(char *p)
Definition: io_buf.h:163
constexpr size_t neg_1
Definition: cache.cc:13
size_t size() const
uint64_t feature_index
Definition: feature_group.h:21
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
iterator over values and indicies
char * run_len_encode(char *p, uint64_t i)
Definition: cache.cc:25
constexpr size_t general
Definition: cache.cc:14
constexpr uint64_t c
Definition: rand48.cc:12
float f
Definition: cache.cc:40

◆ read_cached_features()

int read_cached_features ( vw all,
v_array< example *> &  examples 
)

Definition at line 65 of file cache.cc.

References io_buf::buf_read(), c, char_size, example_predict::feature_space, general, example_predict::indices, parser::input, example::l, parser::lp, neg_1, vw::p, v_array< T >::push_back(), features::push_back(), label_parser::read_cached_label, read_cached_tag(), run_len_decode(), vw::sd, io_buf::set(), example::sorted, parser::sorted_cache, vw::trace_message, and ZigZagDecode().

Referenced by enable_sources(), parse_cache(), and reset_source().

66 {
67  example* ae = examples[0];
68  ae->sorted = all->p->sorted_cache;
69  io_buf* input = all->p->input;
70 
71  size_t total = all->p->lp.read_cached_label(all->sd, &ae->l, *input);
72  if (total == 0)
73  return 0;
74  if (read_cached_tag(*input, ae) == 0)
75  return 0;
76  char* c;
77  unsigned char num_indices = 0;
78  if (input->buf_read(c, sizeof(num_indices)) < sizeof(num_indices))
79  return 0;
80  num_indices = *(unsigned char*)c;
81  c += sizeof(num_indices);
82 
83  all->p->input->set(c);
84  for (; num_indices > 0; num_indices--)
85  {
86  size_t temp;
87  unsigned char index = 0;
88  if ((temp = input->buf_read(c, sizeof(index) + sizeof(size_t))) < sizeof(index) + sizeof(size_t))
89  {
90  all->trace_message << "truncated example! " << temp << " " << char_size + sizeof(size_t) << std::endl;
91  return 0;
92  }
93 
94  index = *(unsigned char*)c;
95  c += sizeof(index);
96  ae->indices.push_back((size_t)index);
97  features& ours = ae->feature_space[index];
98  size_t storage = *(size_t*)c;
99  c += sizeof(size_t);
100  all->p->input->set(c);
101  total += storage;
102  if (input->buf_read(c, storage) < storage)
103  {
104  all->trace_message << "truncated example! wanted: " << storage << " bytes" << std::endl;
105  return 0;
106  }
107 
108  char* end = c + storage;
109 
110  uint64_t last = 0;
111 
112  for (; c != end;)
113  {
114  feature_index i = 0;
115  c = run_len_decode(c, i);
116  feature_value v = 1.f;
117  if (i & neg_1)
118  v = -1.;
119  else if (i & general)
120  {
121  v = ((one_float*)c)->f;
122  c += sizeof(float);
123  }
124  uint64_t diff = i >> 2;
125  int64_t s_diff = ZigZagDecode(diff);
126  if (s_diff < 0)
127  ae->sorted = false;
128  i = last + s_diff;
129  last = i;
130  ours.push_back(v, i);
131  }
132  all->p->input->set(c);
133  }
134 
135  return (int)total;
136 }
v_array< namespace_index > indices
size_t read_cached_tag(io_buf &cache, example *ae)
Definition: cache.cc:39
void push_back(feature_value v, feature_index i)
constexpr size_t char_size
Definition: cache.cc:12
bool sorted_cache
Definition: parser.h:78
bool sorted
Definition: example.h:78
the core definition of a set of features.
float feature_value
Definition: feature_group.h:20
io_buf * input
Definition: parser.h:69
char * run_len_decode(char *p, uint64_t &i)
Definition: cache.cc:16
void set(char *p)
Definition: io_buf.h:163
constexpr size_t neg_1
Definition: cache.cc:13
parser * p
Definition: global_data.h:377
std::array< features, NUM_NAMESPACES > feature_space
void push_back(const T &new_ele)
Definition: v_array.h:107
shared_data * sd
Definition: global_data.h:375
vw_ostream trace_message
Definition: global_data.h:424
uint64_t feature_index
Definition: feature_group.h:21
Definition: io_buf.h:54
polylabel l
Definition: example.h:57
int64_t ZigZagDecode(uint64_t n)
Definition: cache.cc:37
size_t(* read_cached_label)(shared_data *, void *, io_buf &cache)
Definition: label_parser.h:15
constexpr size_t general
Definition: cache.cc:14
constexpr uint64_t c
Definition: rand48.cc:12
label_parser lp
Definition: parser.h:102
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ run_len_decode()

char* run_len_decode ( char *  p,
size_t &  i 
)

◆ run_len_encode()

char* run_len_encode ( char *  p,
size_t  i 
)