Vowpal Wabbit
Classes | Functions | Variables
cache.cc File Reference
#include "cache.h"
#include "unique_sort.h"
#include "global_data.h"
#include "vw.h"

Go to the source code of this file.

Classes

struct  one_float
 

Functions

char * run_len_decode (char *p, uint64_t &i)
 
char * run_len_encode (char *p, uint64_t i)
 
int64_t ZigZagDecode (uint64_t n)
 
size_t read_cached_tag (io_buf &cache, example *ae)
 
struct one_float __attribute__ ((packed))
 
int read_cached_features (vw *all, v_array< example *> &examples)
 
uint64_t ZigZagEncode (int64_t n)
 
void output_byte (io_buf &cache, unsigned char s)
 
void output_features (io_buf &cache, unsigned char index, features &fs, uint64_t mask)
 
void cache_tag (io_buf &cache, v_array< char > tag)
 
void cache_features (io_buf &cache, example *ae, uint64_t mask)
 

Variables

constexpr size_t int_size = 11
 
constexpr size_t char_size = 2
 
constexpr size_t neg_1 = 1
 
constexpr size_t general = 2
 
float f
 

Function Documentation

◆ __attribute__()

struct one_float __attribute__ ( (packed)  )

◆ cache_features()

void cache_features ( io_buf cache,
example ae,
uint64_t  mask 
)

Definition at line 203 of file cache.cc.

References cache_tag(), example_predict::feature_space, example_predict::indices, output_byte(), output_features(), v_array< T >::size(), and example::tag.

Referenced by VW::setup_example().

204 {
205  cache_tag(cache, ae->tag);
206  output_byte(cache, (unsigned char)ae->indices.size());
207 
208  for (namespace_index ns : ae->indices) output_features(cache, ns, ae->feature_space[ns], mask);
209 }
v_array< char > tag
Definition: example.h:63
v_array< namespace_index > indices
void output_byte(io_buf &cache, unsigned char s)
Definition: cache.cc:144
size_t size() const
Definition: v_array.h:68
std::array< features, NUM_NAMESPACES > feature_space
void output_features(io_buf &cache, unsigned char index, features &fs, uint64_t mask)
Definition: cache.cc:153
unsigned char namespace_index
void cache_tag(io_buf &cache, v_array< char > tag)
Definition: cache.cc:192

◆ cache_tag()

void cache_tag ( io_buf cache,
v_array< char >  tag 
)

Definition at line 192 of file cache.cc.

References v_array< T >::begin(), io_buf::buf_write(), c, io_buf::set(), and v_array< T >::size().

Referenced by cache_features(), and learn().

193 {
194  char* c;
195  cache.buf_write(c, sizeof(size_t) + tag.size());
196  *(size_t*)c = tag.size();
197  c += sizeof(size_t);
198  memcpy(c, tag.begin(), tag.size());
199  c += tag.size();
200  cache.set(c);
201 }
void set(char *p)
Definition: io_buf.h:163
T *& begin()
Definition: v_array.h:42
size_t size() const
Definition: v_array.h:68
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
constexpr uint64_t c
Definition: rand48.cc:12

◆ output_byte()

void output_byte ( io_buf cache,
unsigned char  s 
)

Definition at line 144 of file cache.cc.

References io_buf::buf_write(), c, and io_buf::set().

Referenced by cache_features(), and send_features().

145 {
146  char* c;
147 
148  cache.buf_write(c, 1);
149  *(c++) = s;
150  cache.set(c);
151 }
void set(char *p)
Definition: io_buf.h:163
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
constexpr uint64_t c
Definition: rand48.cc:12

◆ output_features()

void output_features ( io_buf cache,
unsigned char  index,
features fs,
uint64_t  mask 
)

Definition at line 153 of file cache.cc.

References io_buf::buf_write(), c, one_float::f, general, int_size, neg_1, run_len_encode(), io_buf::set(), features::size(), features::values, and ZigZagEncode().

Referenced by cache_features(), and send_features().

154 {
155  char* c;
156  size_t storage = fs.size() * int_size;
157  for (feature_value f : fs.values)
158  if (f != 1. && f != -1.)
159  storage += sizeof(feature_value);
160 
161  cache.buf_write(c, sizeof(index) + storage + sizeof(size_t));
162  *reinterpret_cast<unsigned char*>(c) = index;
163  c += sizeof(index);
164 
165  char* storage_size_loc = c;
166  c += sizeof(size_t);
167 
168  uint64_t last = 0;
169  for (features::iterator& f : fs)
170  {
171  feature_index fi = f.index() & mask;
172  int64_t s_diff = (fi - last);
173  uint64_t diff = ZigZagEncode(s_diff) << 2;
174  last = fi;
175 
176  if (f.value() == 1.)
177  c = run_len_encode(c, diff);
178  else if (f.value() == -1.)
179  c = run_len_encode(c, diff | neg_1);
180  else
181  {
182  c = run_len_encode(c, diff | general);
183  memcpy(c, &f.value(), sizeof(feature_value));
184  c += sizeof(feature_value);
185  }
186  }
187 
188  cache.set(c);
189  *(size_t*)storage_size_loc = c - storage_size_loc - sizeof(size_t);
190 }
uint64_t ZigZagEncode(int64_t n)
Definition: cache.cc:138
constexpr size_t int_size
Definition: cache.cc:11
float feature_value
Definition: feature_group.h:20
v_array< feature_value > values
void set(char *p)
Definition: io_buf.h:163
constexpr size_t neg_1
Definition: cache.cc:13
size_t size() const
uint64_t feature_index
Definition: feature_group.h:21
void buf_write(char *&pointer, size_t n)
Definition: io_buf.cc:94
iterator over values and indicies
char * run_len_encode(char *p, uint64_t i)
Definition: cache.cc:25
constexpr size_t general
Definition: cache.cc:14
constexpr uint64_t c
Definition: rand48.cc:12
float f
Definition: cache.cc:40

◆ read_cached_features()

int read_cached_features ( vw all,
v_array< example *> &  examples 
)

Definition at line 65 of file cache.cc.

References io_buf::buf_read(), c, char_size, example_predict::feature_space, general, example_predict::indices, parser::input, example::l, parser::lp, neg_1, vw::p, v_array< T >::push_back(), features::push_back(), label_parser::read_cached_label, read_cached_tag(), run_len_decode(), vw::sd, io_buf::set(), example::sorted, parser::sorted_cache, vw::trace_message, and ZigZagDecode().

Referenced by enable_sources(), parse_cache(), and reset_source().

66 {
67  example* ae = examples[0];
68  ae->sorted = all->p->sorted_cache;
69  io_buf* input = all->p->input;
70 
71  size_t total = all->p->lp.read_cached_label(all->sd, &ae->l, *input);
72  if (total == 0)
73  return 0;
74  if (read_cached_tag(*input, ae) == 0)
75  return 0;
76  char* c;
77  unsigned char num_indices = 0;
78  if (input->buf_read(c, sizeof(num_indices)) < sizeof(num_indices))
79  return 0;
80  num_indices = *(unsigned char*)c;
81  c += sizeof(num_indices);
82 
83  all->p->input->set(c);
84  for (; num_indices > 0; num_indices--)
85  {
86  size_t temp;
87  unsigned char index = 0;
88  if ((temp = input->buf_read(c, sizeof(index) + sizeof(size_t))) < sizeof(index) + sizeof(size_t))
89  {
90  all->trace_message << "truncated example! " << temp << " " << char_size + sizeof(size_t) << std::endl;
91  return 0;
92  }
93 
94  index = *(unsigned char*)c;
95  c += sizeof(index);
96  ae->indices.push_back((size_t)index);
97  features& ours = ae->feature_space[index];
98  size_t storage = *(size_t*)c;
99  c += sizeof(size_t);
100  all->p->input->set(c);
101  total += storage;
102  if (input->buf_read(c, storage) < storage)
103  {
104  all->trace_message << "truncated example! wanted: " << storage << " bytes" << std::endl;
105  return 0;
106  }
107 
108  char* end = c + storage;
109 
110  uint64_t last = 0;
111 
112  for (; c != end;)
113  {
114  feature_index i = 0;
115  c = run_len_decode(c, i);
116  feature_value v = 1.f;
117  if (i & neg_1)
118  v = -1.;
119  else if (i & general)
120  {
121  v = ((one_float*)c)->f;
122  c += sizeof(float);
123  }
124  uint64_t diff = i >> 2;
125  int64_t s_diff = ZigZagDecode(diff);
126  if (s_diff < 0)
127  ae->sorted = false;
128  i = last + s_diff;
129  last = i;
130  ours.push_back(v, i);
131  }
132  all->p->input->set(c);
133  }
134 
135  return (int)total;
136 }
v_array< namespace_index > indices
size_t read_cached_tag(io_buf &cache, example *ae)
Definition: cache.cc:39
void push_back(feature_value v, feature_index i)
constexpr size_t char_size
Definition: cache.cc:12
bool sorted_cache
Definition: parser.h:78
bool sorted
Definition: example.h:78
the core definition of a set of features.
float feature_value
Definition: feature_group.h:20
io_buf * input
Definition: parser.h:69
char * run_len_decode(char *p, uint64_t &i)
Definition: cache.cc:16
void set(char *p)
Definition: io_buf.h:163
constexpr size_t neg_1
Definition: cache.cc:13
parser * p
Definition: global_data.h:377
std::array< features, NUM_NAMESPACES > feature_space
void push_back(const T &new_ele)
Definition: v_array.h:107
shared_data * sd
Definition: global_data.h:375
vw_ostream trace_message
Definition: global_data.h:424
uint64_t feature_index
Definition: feature_group.h:21
Definition: io_buf.h:54
polylabel l
Definition: example.h:57
int64_t ZigZagDecode(uint64_t n)
Definition: cache.cc:37
size_t(* read_cached_label)(shared_data *, void *, io_buf &cache)
Definition: label_parser.h:15
constexpr size_t general
Definition: cache.cc:14
constexpr uint64_t c
Definition: rand48.cc:12
label_parser lp
Definition: parser.h:102
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ read_cached_tag()

size_t read_cached_tag ( io_buf cache,
example ae 
)

Definition at line 39 of file cache.cc.

Referenced by read_cached_features().

40 {
41  char* c;
42  size_t tag_size;
43  if (cache.buf_read(c, sizeof(tag_size)) < sizeof(tag_size))
44  return 0;
45  tag_size = *(size_t*)c;
46  c += sizeof(tag_size);
47  cache.set(c);
48  if (cache.buf_read(c, tag_size) < tag_size)
49  return 0;
50 
51  ae->tag.clear();
52  push_many(ae->tag, c, tag_size);
53  return tag_size + sizeof(tag_size);
54 }
v_array< char > tag
Definition: example.h:63
void set(char *p)
Definition: io_buf.h:163
void push_many(v_array< T > &v, const T *_begin, size_t num)
Definition: v_array.h:207
void clear()
Definition: v_array.h:88
constexpr uint64_t c
Definition: rand48.cc:12
size_t buf_read(char *&pointer, size_t n)
Definition: io_buf.cc:12

◆ run_len_decode()

char* run_len_decode ( char *  p,
uint64_t &  i 
)
inline

Definition at line 16 of file cache.cc.

Referenced by read_cached_features().

17 {
18  // read an int 7 bits at a time.
19  size_t count = 0;
20  while (*p & 128) i = i | ((uint64_t)(*(p++) & 127) << 7 * count++);
21  i = i | ((uint64_t)(*(p++)) << 7 * count);
22  return p;
23 }

◆ run_len_encode()

char* run_len_encode ( char *  p,
uint64_t  i 
)
inline

Definition at line 25 of file cache.cc.

Referenced by output_features().

26 {
27  // store an int 7 bits at a time.
28  while (i >= 128)
29  {
30  *(p++) = (i & 127) | 128;
31  i = i >> 7;
32  }
33  *(p++) = (i & 127);
34  return p;
35 }

◆ ZigZagDecode()

int64_t ZigZagDecode ( uint64_t  n)
inline

Definition at line 37 of file cache.cc.

Referenced by read_cached_features().

37 { return (n >> 1) ^ -static_cast<int64_t>(n & 1); }

◆ ZigZagEncode()

uint64_t ZigZagEncode ( int64_t  n)
inline

Definition at line 138 of file cache.cc.

Referenced by output_features().

139 {
140  uint64_t ret = (n << 1) ^ (n >> 63);
141  return ret;
142 }

Variable Documentation

◆ char_size

constexpr size_t char_size = 2

Definition at line 12 of file cache.cc.

Referenced by read_cached_features().

◆ f

float f

Definition at line 40 of file cache.cc.

Referenced by active_cover_setup(), DepParserTask::add_all_features(), DepParserTask::add_feature(), addbufs(), all_reduce(), AllReduceThreads::all_reduce(), AllReduceSockets::all_reduce_init(), Search::allowed_actions_to_label(), BASELINE::baseline_enabled(), bfgs_iter_middle(), VW::cb_explore_adf::regcb::cb_explore_adf_regcb::binary_search(), binarySearch(), boosting_setup(), bs_predict_vote(), cb_adf_setup(), cb_data_epsilon_0_skype_jb_test_runner(), cb_explore_setup(), cbify_setup(), cbifyldf_setup(), collision_cleanup(), Beam::beam< T >::compact(), compute_coherence_metrics(), OjaNewton::compute_delta(), recall_tree_ns::compute_recall_lbest(), cs_active_setup(), memory_tree_ns::diag_kronecker_prod_fs_test(), CSOAA::do_actual_learning_wap(), CB::ec_is_example_header(), enable_sources(), end_examples(), VW::export_example(), memory_tree_ns::F1_score_for_two_examples(), ldamath::fastdigamma(), fastexp(), ldamath::fastexp(), ldamath::fastlgamma(), fastpow2(), ldamath::fastpow2(), fasttanh(), finalize_preconditioner(), finalize_source(), find_cost_range(), find_in_path(), FloatToFloatState< audit >::Float(), float_of_substring(), Search::BaseTask::foreach_action(), GD::foreach_feature(), polar_normal_weights_wrapper< T >::func(), gd_mf_setup(), GEN_CS::gen_cs_example(), generate_cb_data_5(), get_active_coin_bias(), CLASSWEIGHTS::classweights::get_class_weight(), CB_EXPLORE::get_cover_probabilities(), get_pmin(), vw::get_random_state(), get_threshold(), hingeloss::getLoss(), logloss::getLoss(), poisson_loss::getLoss(), squaredloss::getUpdate(), glf1(), CB_ADF::global_print_newline(), CSOAA::global_print_newline(), Search::handle_condition_options(), EntityRelationTask::initialize(), ArgmaxTask::initialize(), OjaNewton::initialize_Z(), CSOAA::inner_loop(), inner_update_pistol_state_and_predict(), Beam::beam< T >::insert(), is_number(), kernel_svm_setup(), lda_loop(), lda_setup(), learn(), learn_batch(), learn_with_metrics(), memory_tree_ns::linear_kernel(), linear_kernel(), MARGINAL::make_marginal(), make_write_cache(), Search::BaseTask::maybe_override_prediction(), memory_tree_setup(), mf_print_offset_features(), OjaNewton_setup(), output_and_account_confidence_example(), output_and_account_example(), no_label::output_and_account_no_label_example(), MULTILABEL::output_example(), CB_EXPLORE::output_example(), parse_cache(), parse_example_tweaks(), CB::parse_label(), COST_SENSITIVE::parse_label(), CCB::parse_label(), parse_output_preds(), Beam::beam< T >::pop_best_item(), Search::BaseTask::post_prediction(), preconditioner_to_regularizer(), vw_slim::vw_predict< W >::predict(), MWT::predict_or_learn(), predict_or_learn(), predict_or_learn_active(), predict_or_learn_active_cover(), CB_EXPLORE::predict_or_learn_cover(), VW::cb_explore_adf::greedy::cb_explore_adf_greedy::predict_or_learn_impl(), VW::cb_explore_adf::bag::cb_explore_adf_bag::predict_or_learn_impl(), VW::cb_explore_adf::cover::cb_explore_adf_cover::predict_or_learn_impl(), predict_or_learn_multi(), GD::print_features(), GD::print_lda_features(), process_pass(), query_decision(), Search::random_policy(), Search::read_allowed_transitions(), comp_io_buf::read_file(), io_buf::read_file(), recall_tree_setup(), BASELINE::reset_baseline_disabled(), comp_io_buf::reset_file(), reset_source(), return_example(), ACTION_SCORE::reverse_order(), VW::SpanningTree::Run(), GraphTask::run(), run_predict_in_memory(), GEN_CS::safe_probability(), Search::safediv(), save_load(), save_load_sampling(), BASELINE::set_baseline_enabled(), LEARNER::learner< CB_EXPLORE::cb_explore, example >::set_end_examples(), LEARNER::learner< CB_EXPLORE::cb_explore, example >::set_end_pass(), LEARNER::learner< CB_EXPLORE::cb_explore, example >::set_finish(), LEARNER::learner< CB_EXPLORE::cb_explore, example >::set_finish_example(), LEARNER::learner< CB_EXPLORE::cb_explore, example >::set_init_driver(), VW::cb_explore_adf::softmax::setup(), VW::cb_explore_adf::greedy::setup(), VW::cb_explore_adf::cover::setup(), VW::cb_explore_adf::regcb::setup(), GD::setup(), Search::setup(), setup_lambdas(), sign(), memory_tree_ns::single_query_and_learn(), MultiState< audit >::StartArray(), vw_slim::stride_shift_guard::stride_shift_guard(), summarize_holdout_set(), TEST(), TEST_P(), recall_tree_ns::to_prob(), DepParserTask::transition_eager(), DepParserTask::transition_hybrid(), OjaNewton::update_eigenvalues(), VW_GetFeatureSpace(), VW_ImportExample(), VW_ReleaseFeatureSpace(), VW_SetFeatureSpace(), warm_cb_setup(), Search::BaseTask::with_output_string(), and vw_slim::stride_shift_guard::~stride_shift_guard().

◆ general

constexpr size_t general = 2

Definition at line 14 of file cache.cc.

Referenced by output_features(), and read_cached_features().

◆ int_size

constexpr size_t int_size = 11

Definition at line 11 of file cache.cc.

Referenced by output_features().

◆ neg_1

constexpr size_t neg_1 = 1

Definition at line 13 of file cache.cc.

Referenced by output_features(), and read_cached_features().