Vowpal Wabbit
ut_explore.cc
Go to the documentation of this file.
1 #include "gtest/gtest.h"
2 #include "gmock/gmock.h"
3 #include "ut_util.h"
4 #include <vector>
5 #include <sstream>
6 #include "explore.h"
7 #include "vw_slim_predict.h"
8 using namespace ::testing;
9 
10 TEST(ExploreTestSuite, EpsilonGreedy)
11 {
12  std::vector<float> pdf(4);
13  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_epsilon_greedy(0.4f, 2, begin(pdf), end(pdf)));
14  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-6f), std::vector<float>{0.1f, 0.1f, 0.7f, 0.1f}));
15 }
16 
17 TEST(ExploreTestSuite, EpsilonGreedyTopActionOutOfBounds)
18 {
19  std::vector<float> pdf(4);
20  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_epsilon_greedy(0.4f, 8, begin(pdf), end(pdf)));
21  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-6f), std::vector<float>{0.1f, 0.1f, 0.1f, 0.7f}));
22 }
23 
24 TEST(ExploreTestSuite, EpsilonGreedy_bad_range)
25 {
26  std::vector<float> pdf;
27  float x;
28  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::generate_epsilon_greedy(0.4f, 0, begin(pdf), end(pdf)));
29  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::generate_epsilon_greedy(0.4f, 0, &x, &x - 3));
30  EXPECT_THAT(pdf.size(), 0);
31 }
32 
33 TEST(ExploreTestSuite, Softmax)
34 {
35  std::vector<float> scores = {1, 2, 3, 8};
36  std::vector<float> pdf(4);
37  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_softmax(0.2f, begin(scores), end(scores), begin(pdf), end(pdf)));
38  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{0.128f, 0.157f, 0.192f, 0.522f}));
39 }
40 
41 TEST(ExploreTestSuite, SoftmaxInBalanced)
42 {
43  std::vector<float> scores = {1, 2, 3};
44  std::vector<float> pdf(4);
45  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_softmax(0.2f, begin(scores), end(scores), begin(pdf), end(pdf)));
46  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{0.269f, 0.328f, 0.401f, 0}));
47 }
48 
49 TEST(ExploreTestSuite, SoftmaxInBalanced2)
50 {
51  std::vector<float> scores = {1, 2, 3, 8, 4};
52  std::vector<float> pdf(4);
53  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_softmax(0.2f, begin(scores), end(scores), begin(pdf), end(pdf)));
54  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{0.128f, 0.157f, 0.192f, 0.522f}));
55 }
56 
57 TEST(ExploreTestSuite, Softmax_bad_range)
58 {
59  std::vector<float> scores;
60  std::vector<float> pdf;
61  float x;
62  EXPECT_THAT(
63  E_EXPLORATION_BAD_RANGE, exploration::generate_softmax(0.2f, begin(scores), end(scores), begin(pdf), end(pdf)));
64  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::generate_softmax(0.2f, begin(scores), end(scores), &x, &x - 3));
65 }
66 
67 TEST(ExploreTestSuite, Bag)
68 {
69  std::vector<uint16_t> top_actions = {0, 0, 1, 1};
70  std::vector<float> pdf(4);
71  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_bag(begin(top_actions), end(top_actions), begin(pdf), end(pdf)));
72  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{0, 0, 0.5, 0.5f}));
73 }
74 
75 TEST(ExploreTestSuite, Bag10)
76 {
77  std::vector<uint16_t> top_actions = {10};
78  std::vector<float> pdf(4);
79  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_bag(begin(top_actions), end(top_actions), begin(pdf), end(pdf)));
80  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{1.f, 0, 0, 0}));
81 }
82 
83 TEST(ExploreTestSuite, BagEmpty)
84 {
85  std::vector<uint16_t> top_actions;
86  std::vector<float> pdf(4);
87  EXPECT_THAT(S_EXPLORATION_OK, exploration::generate_bag(begin(top_actions), end(top_actions), begin(pdf), end(pdf)));
88  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{1.f, 0, 0, 0}));
89 }
90 
91 TEST(ExploreTestSuite, Bag_bad_range)
92 {
93  std::vector<uint16_t> top_actions;
94  std::vector<float> pdf;
95  float x;
96 
97  EXPECT_THAT(
98  E_EXPLORATION_BAD_RANGE, exploration::generate_bag(begin(top_actions), end(top_actions), begin(pdf), end(pdf)));
99  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::generate_bag(begin(top_actions), end(top_actions), &x, &x - 3));
100 }
101 
103 {
104  std::vector<float> pdf = {1.f, 0, 0};
105  exploration::enforce_minimum_probability(0.3f, true, begin(pdf), end(pdf));
106  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{.8f, .1f, .1f}));
107 }
108 
109 TEST(ExploreTestSuite, enforce_minimum_probability_no_zeros)
110 {
111  std::vector<float> pdf = {0.9f, 0.1f, 0};
112  EXPECT_THAT(S_EXPLORATION_OK, exploration::enforce_minimum_probability(0.6f, false, begin(pdf), end(pdf)));
113  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{.8f, .2f, .0f}));
114 }
115 
116 TEST(ExploreTestSuite, enforce_minimum_probability_uniform)
117 {
118  std::vector<float> pdf = {0.9f, 0.1f, 0, 0};
119  EXPECT_THAT(S_EXPLORATION_OK, exploration::enforce_minimum_probability(1.f, true, begin(pdf), end(pdf)));
120  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{.25f, .25f, .25f, .25f}));
121 }
122 
123 TEST(ExploreTestSuite, enforce_minimum_probability_uniform_no_zeros)
124 {
125  std::vector<float> pdf = {0.9f, 0.1f, 0};
126  EXPECT_THAT(S_EXPLORATION_OK, exploration::enforce_minimum_probability(1.f, false, begin(pdf), end(pdf)));
127  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-3f), std::vector<float>{.5f, .5f, .0f}));
128 }
129 
130 TEST(ExploreTestSuite, enforce_minimum_probability_bad_range)
131 {
132  std::vector<float> pdf;
133  float x;
134  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::enforce_minimum_probability(1.f, false, begin(pdf), end(pdf)));
135  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::enforce_minimum_probability(1.f, false, &x, &x - 3));
136 }
137 
138 TEST(ExploreTestSuite, sampling)
139 {
140  std::vector<float> pdf = {0.8f, 0.1f, 0.1f};
141  std::vector<float> histogram(3);
142 
143  size_t rep = 10000;
144  uint32_t chosen_index;
145  for (size_t i = 0; i < rep; i++)
146  {
147  std::stringstream s;
148  s << "abcde" << i;
149  ASSERT_EQ(0, exploration::sample_after_normalizing(s.str().c_str(), std::begin(pdf), std::end(pdf), chosen_index));
150 
151  histogram[chosen_index]++;
152  }
153  for (auto& d : histogram) d /= rep;
154 
155  EXPECT_THAT(pdf, Pointwise(FloatNearPointwise(1e-2f), histogram));
156 }
157 
158 TEST(PairIteratorTestSuite, simple_test)
159 {
160  using ActionType = size_t;
161  // Verify sort of actions using scores
162  const int num_actions = 3;
163  ActionType actions[num_actions];
164  float pdf[num_actions];
165  float n = 0.f;
166  std::generate(std::begin(pdf), std::end(pdf), [&n]() { return n++; });
167  std::iota(std::begin(actions), std::end(actions), 0);
168  float scores[] = {.4f, .1f, .2f};
169 
170  // Sort two vectors using scores
171  using FirstVal = ActionType;
172  using SecondVal = float;
173  using FirstIt = FirstVal*;
174  using SecondIt = SecondVal*;
178 
179  const iter begin_coll(std::begin(actions), std::begin(pdf));
180  const iter end_coll(std::end(actions), std::end(pdf));
181  size_t diff = end_coll - begin_coll;
182  std::sort(begin_coll, end_coll, [&scores](const loc& l, const loc& r) { return scores[l._val1] < scores[r._val1]; });
183 
184  EXPECT_THAT(actions, ElementsAre(1, 2, 0));
185  EXPECT_THAT(pdf, ElementsAre(1.0f, 2.0f, 0.0f));
186 }
187 
188 TEST(ExploreTestSuite, sampling_rank)
189 {
190  std::vector<float> scores = {0.f, 3.f, 1.f}; // 1,2,0 <-- top ranking
191  std::vector<float> histogram(scores.size() * scores.size());
192  std::vector<int> ranking(3);
193 
194  // std::fstream log("vwslim-debug.log", std::fstream::app);
195 
196  size_t rep = 50000;
197  for (size_t i = 0; i < rep; i++)
198  {
199  std::vector<float> pdf = {0.8f, 0.1f, 0.1f};
200 
201  std::stringstream s;
202  s << "abcde" << i;
203 
204  ASSERT_EQ(S_EXPLORATION_OK,
205  vw_slim::vw_predict<float>::sort_by_scores(std::begin(pdf), std::end(pdf), std::begin(scores), std::end(scores),
206  std::begin(ranking), std::end(ranking)));
207 
208  // Sample from the pdf
209  uint32_t chosen_action_idx;
210  ASSERT_EQ(S_EXPLORATION_OK,
211  exploration::sample_after_normalizing(s.str().c_str(), std::begin(pdf), std::end(pdf), chosen_action_idx));
212 
213  // Swap top element with chosen one (unless chosen is the top)
214  if (chosen_action_idx != 0)
215  {
216  std::iter_swap(std::begin(ranking), std::begin(ranking) + chosen_action_idx);
217  std::iter_swap(std::begin(pdf), std::begin(pdf) + chosen_action_idx);
218  }
219 
220  for (size_t i = 0; i < ranking.size(); i++) histogram[i * ranking.size() + ranking[i]]++;
221  }
222 
223  for (auto& d : histogram) d /= rep;
224 
225  // best order is 0, 2, 1
226  // rows: slots
227  // cols: actions
228  std::vector<float> ranking_pdf = {
229  // see top action 1 w / 0.8
230  0.8f, 0.1f, 0.1f, // slot 0
231  // most of the time we should see action 0
232  // in 10% we should see the top action 1 swapped from top-slot to here
233  0.1f, 0.0f, 0.9f, // slot 1
234  // most of the time we should see action 2
235  // in 10% we should see the top action swapped from top-slot to here
236  0.1f, 0.9f, 0.0f, // slot 2
237  };
238 
239  // for (size_t i = 0; i < 3; i++)
240  //{
241  // log << "slot " << i << " ";
242  // for (size_t j = 0; j < 3; j++)
243  // log << histogram[i * 3 + j] << " ";
244  // log << std::endl;
245  //}
246 
247  EXPECT_THAT(histogram, Pointwise(FloatNearPointwise(1e-2f), ranking_pdf));
248 }
249 
250 TEST(ExploreTestSuite, sampling_rank_bad_range)
251 {
252  std::vector<float> pdf;
253  std::vector<float> scores;
254  std::vector<int> ranking(3);
255  float x;
256  uint32_t chosen_index;
257 
258  EXPECT_THAT(E_EXPLORATION_BAD_RANGE,
259  exploration::sample_after_normalizing("abc", std::begin(pdf), std::end(pdf), chosen_index));
260  EXPECT_THAT(E_EXPLORATION_BAD_RANGE, exploration::sample_after_normalizing("abc", &x, &x - 3, chosen_index));
261 }
262 
263 TEST(ExploreTestSuite, sampling_rank_zero_pdf)
264 {
265  std::vector<float> pdf = {0.f, 0.f, 0.f};
266  std::vector<float> expected_pdf = {1.f, 0.f, 0.f};
267 
268  uint32_t chosen_index;
269 
270  EXPECT_THAT(
271  S_EXPLORATION_OK, exploration::sample_after_normalizing("abc", std::begin(pdf), std::end(pdf), chosen_index));
272 
273  EXPECT_THAT(expected_pdf, Pointwise(FloatNearPointwise(1e-2f), pdf));
274 }
275 
276 TEST(ExploreTestSuite, sampling_rank_negative_pdf)
277 {
278  std::vector<float> pdf = {1.0f, -2.f, -3.f};
279  std::vector<float> expected_pdf = {1.f, 0.f, 0.f};
280  std::vector<int> ranking(3);
281  uint32_t chosen_index;
282 
283  EXPECT_THAT(
284  S_EXPLORATION_OK, exploration::sample_after_normalizing("abc", std::begin(pdf), std::end(pdf), chosen_index));
285  EXPECT_THAT(expected_pdf, Pointwise(FloatNearPointwise(1e-2f), pdf));
286  EXPECT_THAT(0, chosen_index);
287 }
int generate_bag(InputIt top_actions_first, InputIt top_actions_last, OutputIt pdf_first, OutputIt pdf_last)
Generates an exploration distribution according to votes on actions.
int sample_after_normalizing(uint64_t seed, It pdf_first, It pdf_last, uint32_t &chosen_index)
Sample an index from the provided pdf. If the pdf is not normalized it will be updated in-place...
Vowpal Wabbit slim predictor. Supports: regression, multi-class classification and contextual bandits...
int generate_softmax(float lambda, InputIt scores_first, InputIt scores_last, OutputIt pdf_first, OutputIt pdf_last)
Generates softmax style exploration distribution.
TEST(ExploreTestSuite, EpsilonGreedy)
Definition: ut_explore.cc:10
int generate_epsilon_greedy(float epsilon, uint32_t top_action, It pdf_first, It pdf_last)
Generates epsilon-greedy style exploration distribution.
#define E_EXPLORATION_BAD_RANGE
Definition: explore.h:4
int enforce_minimum_probability(float minimum_uniform, bool update_zero_elements, It pdf_first, It pdf_last)
Updates the pdf to ensure each action is explored with at least minimum_uniform/num_actions.
#define S_EXPLORATION_OK
Definition: explore.h:3
float f
Definition: cache.cc:40