Vowpal Wabbit
parse_example_json.h
Go to the documentation of this file.
1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5 */
6 
7 #pragma once
8 
9 #include "parse_primitives.h"
10 #include "v_array.h"
11 
12 // seems to help with skipping spaces
13 //#define RAPIDJSON_SIMD
14 //#define RAPIDJSON_SSE42
15 
16 // Let MSVC know that it should not even try to compile RapidJSON as managed
17 // - pragma documentation: https://docs.microsoft.com/en-us/cpp/preprocessor/managed-unmanaged?view=vs-2017
18 // - /clr compilation detection: https://docs.microsoft.com/en-us/cpp/dotnet/how-to-detect-clr-compilation?view=vs-2017
19 #if (_MANAGED == 1) || (_M_CEE == 1)
20 #pragma managed(push, off)
21 #endif
22 
23 #include <rapidjson/reader.h>
24 #include <rapidjson/error/en.h>
25 
26 #if (_MANAGED == 1) || (_M_CEE == 1)
27 #pragma managed(pop)
28 #endif
29 
30 #include "cb.h"
32 
33 #include "best_constant.h"
34 
35 #include <algorithm>
36 #include <vector>
37 
38 // portability fun
39 #ifndef _WIN32
40 #define _stricmp strcasecmp
41 #endif
42 
43 using namespace rapidjson;
44 
45 struct vw;
46 
47 template <bool audit>
48 struct BaseState;
49 
50 template <bool audit>
51 struct Context;
52 
53 template <bool audit>
54 struct Namespace
55 {
59  size_t feature_count;
61  const char* name;
62 
63  void AddFeature(feature_value v, feature_index i, const char* feature_name)
64  {
65  // filter out 0-values
66  if (v == 0)
67  return;
68 
69  ftrs->push_back(v, i);
70  feature_count++;
71 
72  if (audit)
73  ftrs->space_names.push_back(audit_strings_ptr(new audit_strings(name, feature_name)));
74  }
75 
76  void AddFeature(vw* all, const char* str)
77  {
78  ftrs->push_back(1., VW::hash_feature(*all, str, namespace_hash));
79  feature_count++;
80 
81  if (audit)
83  }
84 };
85 
86 template <bool audit>
87 struct BaseState
88 {
89  const char* name;
90 
91  BaseState(const char* pname) : name(pname) {}
92 
94  {
95  // ignore Null by default and stay in the current state
96  return ctx.previous_state == nullptr ? this : ctx.previous_state;
97  }
98 
99  virtual BaseState<audit>* Bool(Context<audit>& ctx, bool b)
100  {
101  ctx.error() << "Unexpected token: bool (" << (b ? "true" : "false") << ")";
102  return nullptr;
103  }
104 
105  virtual BaseState<audit>* Float(Context<audit>& ctx, float v)
106  {
107  ctx.error() << "Unexpected token: float (" << v << ")";
108  return nullptr;
109  }
110 
111  virtual BaseState<audit>* Uint(Context<audit>& ctx, unsigned v)
112  {
113  ctx.error() << "Unexpected token: uint (" << v << ")";
114  return nullptr;
115  }
116 
117  virtual BaseState<audit>* String(Context<audit>& ctx, const char* str, rapidjson::SizeType len, bool)
118  {
119  ctx.error() << "Unexpected token: std::string('" << str << "' len: " << len << ")";
120  return nullptr;
121  }
122 
124  {
125  ctx.error() << "Unexpected token: {";
126  return nullptr;
127  }
128 
129  virtual BaseState<audit>* Key(Context<audit>& ctx, const char* str, rapidjson::SizeType len, bool /* copy */)
130  {
131  ctx.error() << "Unexpected token: key('" << str << "' len: " << len << ")";
132  return nullptr;
133  }
134 
135  virtual BaseState<audit>* EndObject(Context<audit>& ctx, rapidjson::SizeType)
136  {
137  ctx.error() << "Unexpected token: }";
138  return nullptr;
139  }
140 
142  {
143  ctx.error() << "Unexpected token: [";
144  return nullptr;
145  }
146 
147  virtual BaseState<audit>* EndArray(Context<audit>& ctx, rapidjson::SizeType)
148  {
149  ctx.error() << "Unexpected token: ]";
150  return nullptr;
151  }
152 };
153 
154 template <bool audit>
155 class LabelObjectState : public BaseState<audit>
156 {
157  private:
159 
160  public:
162  bool found;
163  bool found_cb;
164  std::vector<unsigned int> actions;
165  std::vector<float> probs;
166  std::vector<unsigned int> inc;
167 
168  LabelObjectState() : BaseState<audit>("LabelObject") {}
169 
170  void init(vw* /* all */)
171  {
172  found = found_cb = false;
173 
174  cb_label = {0., 0, 0., 0.};
175  }
176 
178  {
179  ctx.all->p->lp.default_label(&ctx.ex->l);
180 
181  // don't allow { { { } } }
182  if (ctx.previous_state == this)
183  {
184  ctx.error() << "invalid label object. nested objected.";
185  return nullptr;
186  }
187 
188  // keep previous state
189  return_state = ctx.previous_state;
190 
191  return this;
192  }
193 
194  BaseState<audit>* Key(Context<audit>& ctx, const char* str, rapidjson::SizeType len, bool /* copy */) override
195  {
196  ctx.key = str;
197  ctx.key_length = len;
198  return this;
199  }
200 
201  BaseState<audit>* Float(Context<audit>& ctx, float v) override
202  {
203  // simple
204  if (!_stricmp(ctx.key, "Label"))
205  {
206  ctx.ex->l.simple.label = v;
207  found = true;
208  }
209  else if (!_stricmp(ctx.key, "Initial"))
210  {
211  ctx.ex->l.simple.initial = v;
212  found = true;
213  }
214  else if (!_stricmp(ctx.key, "Weight"))
215  {
216  ctx.ex->l.simple.weight = v;
217  found = true;
218  }
219  // CB
220  else if (!_stricmp(ctx.key, "Action"))
221  {
222  cb_label.action = (uint32_t)v;
223  found_cb = true;
224  }
225  else if (!_stricmp(ctx.key, "Cost"))
226  {
227  cb_label.cost = v;
228  found_cb = true;
229  }
230  else if (!_stricmp(ctx.key, "Probability"))
231  {
232  cb_label.probability = v;
233  found_cb = true;
234  }
235  else
236  {
237  ctx.error() << "Unsupported label property: '" << ctx.key << "' len: " << ctx.key_length;
238  return nullptr;
239  }
240 
241  return this;
242  }
243 
244  BaseState<audit>* Uint(Context<audit>& ctx, unsigned v) override { return Float(ctx, (float)v); }
245 
246  BaseState<audit>* EndObject(Context<audit>& ctx, rapidjson::SizeType) override
247  {
248  if (ctx.all->label_type == label_type::ccb)
249  {
250  auto ld = (CCB::label*)&ctx.ex->l;
251 
252  for (auto id : inc)
253  {
254  ld->explicit_included_actions.push_back(id);
255  }
256  inc.clear();
257 
258  if ((actions.size() != 0) && (probs.size() != 0))
259  {
260  auto outcome = new CCB::conditional_contextual_bandit_outcome();
261  outcome->cost = cb_label.cost;
262  if (actions.size() != probs.size())
263  {
264  THROW("Actions and probabilties must be the same length.");
265  }
266 
267  for (size_t i = 0; i < this->actions.size(); i++)
268  {
269  outcome->probabilities.push_back({actions[i], probs[i]});
270  }
271  actions.clear();
272  probs.clear();
273 
274  ld->outcome = outcome;
275  cb_label = {0., 0, 0., 0.};
276  }
277  }
278  else if (found_cb)
279  {
280  CB::label* ld = (CB::label*)&ctx.ex->l;
281  ld->costs.push_back(cb_label);
282 
283  found_cb = false;
284  cb_label = {0., 0, 0., 0.};
285  }
286  else if (found)
287  {
288  count_label(ctx.all->sd, ctx.ex->l.simple.label);
289 
290  found = false;
291  }
292 
293  return return_state;
294  }
295 };
296 
297 // "_label_*":
298 template <bool audit>
300 {
301  LabelSinglePropertyState() : BaseState<audit>("LabelSingleProperty") {}
302 
303  // forward _label
304  BaseState<audit>* Float(Context<audit>& ctx, float v) override
305  {
306  // skip "_label_"
307  ctx.key += 7;
308  ctx.key_length -= 7;
309 
310  if (ctx.label_object_state.Float(ctx, v) == nullptr)
311  return nullptr;
312 
313  return ctx.previous_state;
314  }
315 
316  BaseState<audit>* Uint(Context<audit>& ctx, unsigned v) override
317  {
318  // skip "_label_"
319  ctx.key += 7;
320  ctx.key_length -= 7;
321 
322  if (ctx.label_object_state.Uint(ctx, v) == nullptr)
323  return nullptr;
324 
325  return ctx.previous_state;
326  }
327 };
328 
329 template <bool audit>
330 struct LabelIndexState : BaseState<audit>
331 {
332  int index;
333 
334  LabelIndexState() : BaseState<audit>("LabelIndex"), index(-1) {}
335 
336  BaseState<audit>* Uint(Context<audit>& ctx, unsigned int v) override
337  {
338  index = v;
339  return ctx.previous_state;
340  }
341 };
342 
343 // "_label":"1"
344 // Note: doesn't support labelIndex
345 template <bool audit>
346 struct LabelState : BaseState<audit>
347 {
348  LabelState() : BaseState<audit>("Label") {}
349 
350  BaseState<audit>* StartObject(Context<audit>& ctx) override { return ctx.label_object_state.StartObject(ctx); }
351 
352  BaseState<audit>* String(Context<audit>& ctx, const char* str, rapidjson::SizeType /* len */, bool) override
353  {
354  VW::parse_example_label(*ctx.all, *ctx.ex, str);
355  return ctx.previous_state;
356  }
357 
358  BaseState<audit>* Float(Context<audit>& ctx, float v) override
359  {
360  // TODO: once we introduce label types, check here
361  ctx.ex->l.simple.label = v;
362  return ctx.previous_state;
363  }
364 
365  BaseState<audit>* Uint(Context<audit>& ctx, unsigned v) override
366  {
367  // TODO: once we introduce label types, check here
368  ctx.ex->l.simple.label = (float)v;
369  return ctx.previous_state;
370  }
371 };
372 
373 // "_text":"a b c"
374 template <bool audit>
375 struct TextState : BaseState<audit>
376 {
377  TextState() : BaseState<audit>("text") {}
378 
379  BaseState<audit>* String(Context<audit>& ctx, const char* str, rapidjson::SizeType length, bool)
380  {
381  auto& ns = ctx.CurrentNamespace();
382 
383  // split into individual features
384  const char* start = str;
385  const char* end = str + length;
386  for (char* p = (char*)str; p != end; p++)
387  {
388  switch (*p)
389  {
390  // split on space and tab
391  case ' ':
392  case '\t':
393  *p = '\0';
394  if (p - start > 0)
395  ns.AddFeature(ctx.all, start);
396 
397  start = p + 1;
398  break;
399  // escape chars
400  case ':':
401  case '|':
402  *p = '_';
403  break;
404  }
405  }
406 
407  if (start < end)
408  ns.AddFeature(ctx.all, start);
409 
410  return ctx.previous_state;
411  }
412 };
413 
414 template <bool audit>
415 struct TagState : BaseState<audit>
416 {
417  // "_tag":"abc"
418  TagState() : BaseState<audit>("tag") {}
419 
420  BaseState<audit>* String(Context<audit>& ctx, const char* str, SizeType length, bool)
421  {
422  push_many(ctx.ex->tag, str, length);
423 
424  return ctx.previous_state;
425  }
426 };
427 
428 template <bool audit>
429 struct MultiState : BaseState<audit>
430 {
431  MultiState() : BaseState<audit>("Multi") {}
432 
434  {
435  // mark shared example
436  if (ctx.all->label_type == label_type::cb)
437  {
438  CB::label* ld = &ctx.ex->l.cb;
439  CB::cb_class f;
440 
441  f.partial_prediction = 0.;
442  f.action = (uint32_t)uniform_hash("shared", 6, 0);
443  f.cost = FLT_MAX;
444  f.probability = -1.f;
445 
446  ld->costs.push_back(f);
447  }
448  else if (ctx.all->label_type == label_type::ccb)
449  {
452  }
453  else
454  THROW("label type is not CB or CCB")
455 
456  return this;
457  }
458 
460  {
461  // allocate new example
462  ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context);
463  ctx.all->p->lp.default_label(&ctx.ex->l);
464  if (ctx.all->label_type == label_type::ccb)
465  {
467  }
468 
469  ctx.examples->push_back(ctx.ex);
470 
471  // setup default namespace
472  ctx.PushNamespace(" ", this);
473 
474  return &ctx.default_state;
475  }
476 
477  BaseState<audit>* EndArray(Context<audit>& ctx, rapidjson::SizeType) override
478  {
479  // return to shared example
480  ctx.ex = (*ctx.examples)[0];
481 
482  return &ctx.default_state;
483  }
484 };
485 
486 // This state makes the assumption we are in CCB
487 template <bool audit>
488 struct SlotsState : BaseState<audit>
489 {
490  SlotsState() : BaseState<audit>("Slots") {}
493 
495  {
496  // drain existing added namespace
497  // todo check bounds
498  saved = ctx.PopNamespace();
499  saved_root_state = ctx.root_state;
500  ctx.root_state = this;
501  return this;
502  }
503 
505  {
506  // allocate new example
507  ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context);
508  ctx.all->p->lp.default_label(&ctx.ex->l);
510 
511  ctx.examples->push_back(ctx.ex);
512 
513  // The end object logic assumes shared example so we need to take an extra one here.
514  ctx.label_index_state.index = static_cast<int>(ctx.examples->size()) - 2;
515 
516  // setup default namespace
517  ctx.PushNamespace(" ", this);
518 
519  return &ctx.default_state;
520  }
521 
522  BaseState<audit>* EndArray(Context<audit>& ctx, rapidjson::SizeType) override
523  {
524  // return to shared example
525  ctx.ex = (*ctx.examples)[0];
526 
527  ctx.PushNamespace(" ", saved);
528  ctx.root_state = saved_root_state;
529 
530  return &ctx.default_state;
531  }
532 };
533 
534 // "...":[Numbers only]
535 template <bool audit>
536 class ArrayState : public BaseState<audit>
537 {
539 
540  public:
541  ArrayState() : BaseState<audit>("Array") {}
542 
544  {
545  if (ctx.previous_state == this)
546  {
547  ctx.error() << "Nested arrays are not supported";
548  return nullptr;
549  }
550 
551  ctx.PushNamespace(ctx.key, ctx.previous_state);
552 
553  array_hash = ctx.CurrentNamespace().namespace_hash;
554 
555  return this;
556  }
557 
558  BaseState<audit>* Float(Context<audit>& ctx, float f) override
559  {
560  if (audit)
561  {
562  std::stringstream str;
563  str << '[' << (array_hash - ctx.CurrentNamespace().namespace_hash) << ']';
564 
565  ctx.CurrentNamespace().AddFeature(f, array_hash, str.str().c_str());
566  }
567  else
568  ctx.CurrentNamespace().AddFeature(f, array_hash, nullptr);
569  array_hash++;
570 
571  return this;
572  }
573 
574  BaseState<audit>* Uint(Context<audit>& ctx, unsigned f) override { return Float(ctx, (float)f); }
575 
576  BaseState<audit>* Null(Context<audit>& /* ctx */) override
577  {
578  // ignore null values and stay in current state
579  return this;
580  }
581 
583  {
584  // parse properties
585  ctx.PushNamespace(ctx.namespace_path.size() > 0 ? ctx.CurrentNamespace().name : " ", this);
586 
587  return &ctx.default_state;
588  }
589 
590  BaseState<audit>* EndArray(Context<audit>& ctx, rapidjson::SizeType /* elementCount */) override
591  {
592  return ctx.PopNamespace();
593  }
594 };
595 
596 // only 0 is valid as DefaultState::Ignore injected that into the source stream
597 template <bool audit>
598 struct IgnoreState : BaseState<audit>
599 {
600  IgnoreState() : BaseState<audit>("Ignore") {}
601 
602  BaseState<audit>* Uint(Context<audit>& ctx, unsigned) override { return ctx.previous_state; }
603 };
604 
605 template <bool audit>
606 class DefaultState : public BaseState<audit>
607 {
608  public:
609  DefaultState() : BaseState<audit>("Default") {}
610 
611  BaseState<audit>* Ignore(Context<audit>& ctx, rapidjson::SizeType length)
612  {
613  // fast ignore
614  // skip key + \0 + "
615  char* head = ctx.stream->src_ + length + 2;
616  if (head >= ctx.stream_end || *head != ':')
617  {
618  ctx.error() << "Expected ':' found '" << *head << "'";
619  return nullptr;
620  }
621  head++;
622 
623  // scan for ,}
624  // support { { ... } }
625  int depth = 0, sq_depth = 0;
626  bool stop = false;
627  while (!stop)
628  {
629  switch (*head)
630  {
631  case '\0':
632  ctx.error() << "Found EOF";
633  return nullptr;
634  case '"':
635  {
636  // skip strings
637  bool stopInner = false;
638  while (!stopInner)
639  {
640  head++;
641  switch (*head)
642  {
643  case '\0':
644  ctx.error() << "Found EOF";
645  return nullptr;
646  case '\\':
647  head++;
648  break;
649  case '"':
650  stopInner = true;
651  break;
652  }
653  }
654  break;
655  }
656  case '{':
657  depth++;
658  break;
659  case '}':
660  if (depth == 0 && sq_depth == 0)
661  stop = true;
662  else
663  depth--;
664  break;
665  case '[':
666  sq_depth++;
667  break;
668  case ']':
669  if (depth == 0 && sq_depth == 0)
670  stop = true;
671  else
672  sq_depth--;
673  break;
674  case ',':
675  if (depth == 0 && sq_depth == 0)
676  stop = true;
677  break;
678  }
679  head++;
680  }
681 
682  // skip key + \0 + ":
683  char* value = ctx.stream->src_ + length + 3;
684  if (value >= ctx.stream_end)
685  {
686  ctx.error() << "Found EOF";
687  return nullptr;
688  }
689 
690  *value = '0';
691  value++;
692  memset(value, ' ', head - value - 1);
693 
694  return &ctx.ignore_state;
695  }
696 
697  BaseState<audit>* Key(Context<audit>& ctx, const char* str, rapidjson::SizeType length, bool) override
698  {
699  ctx.key = str;
700  ctx.key_length = length;
701 
702  if (length > 0 && str[0] == '_')
703  {
704  // match _label*
705  if (ctx.key_length >= 6 && !strncmp(ctx.key, "_label", 6))
706  {
707  if (ctx.key_length >= 7 && ctx.key[6] == '_')
708  return &ctx.label_single_property_state;
709  else if (ctx.key_length == 6)
710  return &ctx.label_state;
711  else if (ctx.key_length == 11 && !_stricmp(ctx.key, "_labelIndex"))
712  return &ctx.label_index_state;
713  else
714  {
715  ctx.error() << "Unsupported key '" << ctx.key << "' len: " << length;
716  return nullptr;
717  }
718  }
719 
720  if (ctx.key_length == 5 && !strcmp(ctx.key, "_text"))
721  return &ctx.text_state;
722 
723  // TODO: _multi in _multi...
724  if (ctx.key_length == 6 && !strcmp(ctx.key, "_multi"))
725  return &ctx.multi_state;
726 
727  if (ctx.key_length == 6 && !strcmp(ctx.key, "_slots"))
728  return &ctx.slots_state;
729 
730  if (ctx.key_length == 4 && !_stricmp(ctx.key, "_tag"))
731  return &ctx.tag_state;
732 
733  if (ctx.key_length == 4 && !_stricmp(ctx.key, "_inc"))
734  {
736  ctx.array_uint_state.return_state = this;
737  return &ctx.array_uint_state;
738  }
739 
740  if (ctx.key_length == 2 && ctx.key[1] == 'a')
741  {
743  ctx.array_uint_state.return_state = this;
744  return &ctx.array_uint_state;
745  }
746 
747  if (ctx.key_length == 2 && ctx.key[1] == 'p')
748  {
750  ctx.array_float_state.return_state = this;
751  return &ctx.array_float_state;
752  }
753 
754  return Ignore(ctx, length);
755  }
756 
757  return this;
758  }
759 
760  BaseState<audit>* String(Context<audit>& ctx, const char* str, rapidjson::SizeType length, bool) override
761  {
762  // string escape
763  const char* end = str + length;
764  for (char* p = (char*)str; p != end; p++)
765  {
766  switch (*p)
767  {
768  case ' ':
769  case '\t':
770  case '|':
771  case ':':
772  *p = '_';
773  }
774  }
775 
776  char* prepend = (char*)str - ctx.key_length;
777  memmove(prepend, ctx.key, ctx.key_length);
778 
779  ctx.CurrentNamespace().AddFeature(ctx.all, prepend);
780 
781  return this;
782  }
783 
784  BaseState<audit>* Bool(Context<audit>& ctx, bool b) override
785  {
786  if (b)
787  ctx.CurrentNamespace().AddFeature(ctx.all, ctx.key);
788 
789  return this;
790  }
791 
793  {
794  ctx.PushNamespace(ctx.key, this);
795  return this;
796  }
797 
798  BaseState<audit>* EndObject(Context<audit>& ctx, rapidjson::SizeType memberCount) override
799  {
800  BaseState<audit>* return_state = ctx.PopNamespace();
801 
802  if (ctx.namespace_path.empty())
803  {
804  int label_index = ctx.label_index_state.index;
805  // we're at the end of the example
806  if (label_index >= 0)
807  {
808  // skip shared example
809  label_index++;
810  if (label_index >= (int)ctx.examples->size())
811  {
812  ctx.error() << "Out of bounds error: _labelIndex must be smaller than number of actions! _labelIndex="
813  << (label_index - 1) << " Number of actions=" << ctx.examples->size() - 1 << " ";
814  return nullptr;
815  }
816 
817  // apply labelIndex
818  ctx.ex = (*ctx.examples)[label_index];
819 
820  // reset for next example
821  ctx.label_index_state.index = -1;
822  }
823 
824  // inject label
825  ctx.label_object_state.EndObject(ctx, memberCount);
826 
827  // If we are in CCB mode and there have been no slots. Check label cost, prob and action were passed. In that
828  // case this is CB, so generate a single slot with this info.
829  if (ctx.all->label_type == label_type::ccb)
830  {
831  auto num_slots = std::count_if(ctx.examples->begin(), ctx.examples->end(),
833  if (num_slots == 0 && ctx.label_object_state.found_cb)
834  {
835  ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context);
836  ctx.all->p->lp.default_label(&ctx.ex->l);
838  ctx.examples->push_back(ctx.ex);
839 
840  auto outcome = new CCB::conditional_contextual_bandit_outcome();
841  outcome->cost = ctx.label_object_state.cb_label.cost;
842  outcome->probabilities.push_back(
843  {ctx.label_object_state.cb_label.action, ctx.label_object_state.cb_label.probability});
844  ctx.ex->l.conditional_contextual_bandit.outcome = outcome;
845  }
846  }
847  }
848 
849  // if we're at the top-level go back to ds_state
850  return ctx.namespace_path.empty() ? ctx.root_state : return_state;
851  }
852 
853  BaseState<audit>* Float(Context<audit>& ctx, float f) override
854  {
855  auto& ns = ctx.CurrentNamespace();
856  ns.AddFeature(f, VW::hash_feature(*ctx.all, ctx.key, ns.namespace_hash), ctx.key);
857 
858  return this;
859  }
860 
861  BaseState<audit>* Uint(Context<audit>& ctx, unsigned f) override { return Float(ctx, (float)f); }
862 
863  BaseState<audit>* StartArray(Context<audit>& ctx) override { return ctx.array_state.StartArray(ctx); }
864 };
865 
866 template <bool audit, typename T>
867 class ArrayToVectorState : public BaseState<audit>
868 {
869  public:
870  ArrayToVectorState() : BaseState<audit>("ArrayToVectorState") {}
871 
872  std::vector<T>* output_array;
874 
875  // Allows for single value handling.
876  bool has_seen_array_start = false;
877 
879  {
880  if (ctx.previous_state == this)
881  {
882  ctx.error() << "Nested arrays are not supported";
883  return nullptr;
884  }
885 
886  has_seen_array_start = true;
887 
888  return this;
889  }
890 
891  BaseState<audit>* Uint(Context<audit>& /* ctx */, unsigned f) override
892  {
893  output_array->push_back(static_cast<T>(f));
894 
895  if (!has_seen_array_start)
896  {
897  has_seen_array_start = false;
898  return return_state;
899  }
900 
901  return this;
902  }
903 
904  BaseState<audit>* Float(Context<audit>& /* ctx */, float f) override
905  {
906  output_array->push_back(static_cast<T>(f));
907 
908  if (!has_seen_array_start)
909  {
910  has_seen_array_start = false;
911  return return_state;
912  }
913 
914  return this;
915  }
916 
917  BaseState<audit>* Null(Context<audit>& /* ctx */) override
918  {
919  if (!has_seen_array_start)
920  {
921  has_seen_array_start = false;
922  return return_state;
923  }
924 
925  // ignore null values and stay in current state
926  return this;
927  }
928 
929  BaseState<audit>* EndArray(Context<audit>& /*ctx*/, rapidjson::SizeType /*length*/) override
930  {
931  has_seen_array_start = false;
932  return return_state;
933  }
934 };
935 
936 template <bool audit>
937 class StringToStringState : public BaseState<audit>
938 {
939  public:
940  StringToStringState() : BaseState<audit>("StringToStringState") {}
941 
942  std::string* output_string;
944 
946  Context<audit>& /*ctx*/, const char* str, rapidjson::SizeType length, bool /* copy */) override
947  {
948  output_string->assign(str, str + length);
949  return return_state;
950  }
951 
952  BaseState<audit>* Null(Context<audit>& /*ctx*/) override { return return_state; }
953 };
954 
955 template <bool audit>
956 class FloatToFloatState : public BaseState<audit>
957 {
958  public:
959  FloatToFloatState() : BaseState<audit>("FloatToFloatState") {}
960 
961  float* output_float;
963 
964  BaseState<audit>* Float(Context<audit>& /*ctx*/, float f) override
965  {
966  *output_float = f;
967  return return_state;
968  }
969 
971  {
972  *output_float = 0.f;
973  return return_state;
974  }
975 };
976 
977 template <bool audit>
978 class BoolToBoolState : public BaseState<audit>
979 {
980  public:
981  BoolToBoolState() : BaseState<audit>("BoolToBoolState") {}
982 
983  bool* output_bool;
985 
986  BaseState<audit>* Bool(Context<audit>& /*ctx*/, bool b) override
987  {
988  *output_bool = b;
989  return return_state;
990  }
991 };
992 
993 // Decision Service JSON header information - required to construct final label
995 {
996  std::string eventId;
997  std::vector<unsigned> actions;
998  std::vector<float> probabilities;
999  float probabilityOfDrop = 0.f;
1000  bool skipLearn{false};
1001 };
1002 
1003 template <bool audit>
1004 class CCBOutcomeList : public BaseState<audit>
1005 {
1006  int slot_object_index = 0;
1007 
1008  std::vector<uint32_t> actions;
1009  std::vector<float> probs;
1010  float cost;
1011 
1013 
1014  public:
1016 
1017  CCBOutcomeList() : BaseState<audit>("CCBOutcomeList") {}
1018 
1020  {
1021  slot_object_index = 0;
1022 
1023  // Find start index of slot objects by iterating until we find the first slot example.
1024  for (auto ex : *ctx.examples)
1025  {
1026  if (ex->l.conditional_contextual_bandit.type != CCB::example_type::slot)
1027  {
1028  slot_object_index++;
1029  }
1030  }
1031  old_root = ctx.root_state;
1032  ctx.root_state = this;
1033 
1034  if (slot_object_index == 0)
1035  {
1036  THROW("Badly formed ccb example. Shared example is required.")
1037  }
1038 
1039  return this;
1040  }
1041 
1043  {
1044  // Set current example so that default state correctly sets the label.
1045  ctx.ex = (*ctx.examples)[slot_object_index];
1046  // The end object logic assumes shared example so we need to take one here.
1047  ctx.label_index_state.index = slot_object_index - 1;
1048 
1049  slot_object_index++;
1050 
1051  // Push a namespace so that default state can get back here when it reaches the end of the object.
1052  ctx.PushNamespace(" ", this);
1053 
1054  return &ctx.default_state;
1055  }
1056 
1057  BaseState<audit>* EndArray(Context<audit>& ctx, rapidjson::SizeType) override
1058  {
1059  // DSJson requires the interaction object to be filled. After reading all slot outcomes fill out the top actions.
1060  for (auto ex : *ctx.examples)
1061  {
1062  if (ex->l.conditional_contextual_bandit.type == CCB::example_type::slot)
1063  {
1064  if (ex->l.conditional_contextual_bandit.outcome)
1065  {
1066  interactions->actions.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].action);
1067  interactions->probabilities.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].score);
1068  }
1069  }
1070  }
1071 
1072  ctx.root_state = old_root;
1073  return &ctx.decision_service_state;
1074  }
1075 };
1076 
1077 template <bool audit>
1078 class DecisionServiceState : public BaseState<audit>
1079 {
1080  public:
1081  DecisionServiceState() : BaseState<audit>("DecisionService") {}
1082 
1084 
1086  {
1087  // TODO: improve validation
1088  return this;
1089  }
1090 
1091  BaseState<audit>* EndObject(Context<audit>& /*ctx*/, rapidjson::SizeType /* memberCount */) override
1092  {
1093  // TODO: improve validation
1094  return this;
1095  }
1096 
1097  BaseState<audit>* Key(Context<audit>& ctx, const char* str, rapidjson::SizeType length, bool /* copy */) override
1098  {
1099  if (length == 1)
1100  {
1101  switch (str[0])
1102  {
1103  case 'a':
1104  ctx.array_uint_state.output_array = &data->actions;
1105  ctx.array_uint_state.return_state = this;
1106  return &ctx.array_uint_state;
1107  case 'p':
1109  ctx.array_float_state.return_state = this;
1110  return &ctx.array_float_state;
1111  case 'c':
1112  ctx.key = " ";
1113  ctx.key_length = 1;
1114  return &ctx.default_state;
1115  }
1116  }
1117  else if (length == 5 && !strcmp(str, "pdrop"))
1118  {
1119  ctx.float_state.output_float = &data->probabilityOfDrop;
1120  ctx.float_state.return_state = this;
1121  return &ctx.float_state;
1122  }
1123  else if (length == 7 && !strcmp(str, "EventId"))
1124  {
1125  ctx.string_state.output_string = &data->eventId;
1126  ctx.string_state.return_state = this;
1127  return &ctx.string_state;
1128  }
1129  else if (length > 0 && str[0] == '_')
1130  {
1131  // match _label*
1132  if (length >= 6 && !strncmp(str, "_label", 6))
1133  {
1134  ctx.key = str;
1135  ctx.key_length = length;
1136  if (length >= 7 && ctx.key[6] == '_')
1137  return &ctx.label_single_property_state;
1138  else if (length == 6)
1139  return &ctx.label_state;
1140  else if (length == 11 && !_stricmp(str, "_labelIndex"))
1141  return &ctx.label_index_state;
1142  }
1143  else if (length == 10 && !strncmp(str, "_skipLearn", 10))
1144  {
1145  ctx.bool_state.output_bool = &data->skipLearn;
1146  ctx.bool_state.return_state = this;
1147  return &ctx.bool_state;
1148  }
1149  else if (length == 9 && !strncmp(str, "_outcomes", 9))
1150  {
1151  ctx.ccb_outcome_list_state.interactions = data;
1152  return &ctx.ccb_outcome_list_state;
1153  }
1154  }
1155 
1156  // ignore unknown properties
1157  return ctx.default_state.Ignore(ctx, length);
1158  }
1159 };
1160 
1161 template <bool audit>
1162 struct Context
1163 {
1164  private:
1165  std::unique_ptr<std::stringstream> error_ptr;
1166 
1167  public:
1169 
1170  // last "<key>": encountered
1171  const char* key;
1172  rapidjson::SizeType key_length;
1173 
1176 
1177  // the path of namespaces
1178  std::vector<Namespace<audit>> namespace_path;
1179 
1182  rapidjson::InsituStringStream* stream;
1183  const char* stream_end;
1184 
1187 
1188  // states
1200 
1201  // DecisionServiceState
1209 
1211 
1213  {
1214  current_state = &default_state;
1215  root_state = &default_state;
1216  }
1217 
1218  void init(vw* pall)
1219  {
1220  all = pall;
1221  key = " ";
1222  key_length = 1;
1223  previous_state = nullptr;
1224  label_object_state.init(pall);
1225  }
1226 
1227  std::stringstream& error()
1228  {
1229  if (!error_ptr)
1230  error_ptr.reset(new std::stringstream{});
1231 
1232  return *error_ptr;
1233  }
1234 
1236  {
1237  decision_service_state.data = data;
1238  current_state = root_state = &decision_service_state;
1239  }
1240 
1241  void PushNamespace(const char* ns, BaseState<audit>* return_state)
1242  {
1243  Namespace<audit> n;
1244  n.feature_group = ns[0];
1245  n.namespace_hash = VW::hash_space(*all, ns);
1246  n.ftrs = ex->feature_space.data() + ns[0];
1247  n.feature_count = 0;
1248  n.return_state = return_state;
1249 
1250  n.name = ns;
1251 
1252  namespace_path.push_back(n);
1253  }
1254 
1256  {
1257  auto& ns = CurrentNamespace();
1258  if (ns.feature_count > 0)
1259  {
1260  auto feature_group = ns.feature_group;
1261  // Do not insert feature_group if it already exists.
1262  if (std::find(ex->indices.begin(), ex->indices.end(), feature_group) == ex->indices.end())
1263  {
1264  ex->indices.push_back(feature_group);
1265  }
1266  }
1267 
1268  auto return_state = namespace_path.back().return_state;
1269  namespace_path.pop_back();
1270  return return_state;
1271  }
1272 
1273  Namespace<audit>& CurrentNamespace() { return namespace_path.back(); }
1274 
1276  {
1277  if (next_state == nullptr)
1278  return false;
1279 
1280  previous_state = current_state;
1281  current_state = next_state;
1282 
1283  return true;
1284  }
1285 };
1286 
1287 template <bool audit>
1288 struct VWReaderHandler : public rapidjson::BaseReaderHandler<rapidjson::UTF8<>, VWReaderHandler<audit>>
1289 {
1291 
1292  void init(vw* all, v_array<example*>* examples, rapidjson::InsituStringStream* stream, const char* stream_end,
1293  VW::example_factory_t example_factory, void* example_factory_context)
1294  {
1295  ctx.init(all);
1296  ctx.examples = examples;
1297  ctx.ex = (*examples)[0];
1298  all->p->lp.default_label(&ctx.ex->l);
1299 
1300  ctx.stream = stream;
1301  ctx.stream_end = stream_end;
1302  ctx.example_factory = example_factory;
1303  ctx.example_factory_context = example_factory_context;
1304  }
1305 
1306  // virtual dispatch to current state
1307  bool Bool(bool v) { return ctx.TransitionState(ctx.current_state->Bool(ctx, v)); }
1308  bool Int(int v) { return ctx.TransitionState(ctx.current_state->Float(ctx, (float)v)); }
1309  bool Uint(unsigned v) { return ctx.TransitionState(ctx.current_state->Uint(ctx, v)); }
1310  bool Int64(int64_t v) { return ctx.TransitionState(ctx.current_state->Float(ctx, (float)v)); }
1311  bool Uint64(uint64_t v) { return ctx.TransitionState(ctx.current_state->Float(ctx, (float)v)); }
1312  bool Double(double v) { return ctx.TransitionState(ctx.current_state->Float(ctx, (float)v)); }
1313  bool String(const char* str, SizeType len, bool copy)
1314  {
1315  return ctx.TransitionState(ctx.current_state->String(ctx, str, len, copy));
1316  }
1317  bool StartObject() { return ctx.TransitionState(ctx.current_state->StartObject(ctx)); }
1318  bool Key(const char* str, SizeType len, bool copy)
1319  {
1320  return ctx.TransitionState(ctx.current_state->Key(ctx, str, len, copy));
1321  }
1322  bool EndObject(SizeType count) { return ctx.TransitionState(ctx.current_state->EndObject(ctx, count)); }
1323  bool StartArray() { return ctx.TransitionState(ctx.current_state->StartArray(ctx)); }
1324  bool EndArray(SizeType count) { return ctx.TransitionState(ctx.current_state->EndArray(ctx, count)); }
1325  bool Null() { return ctx.TransitionState(ctx.current_state->Null(ctx)); }
1326 
1327  bool VWReaderHandlerNull() { return true; }
1328  bool VWReaderHandlerDefault() { return false; }
1329 
1330  // alternative to above if we want to re-use the VW float parser...
1331  bool RawNumber(const char* /* str */, rapidjson::SizeType /* length */, bool /* copy */) { return false; }
1332 
1333  std::stringstream& error() { return ctx.error(); }
1334 
1336 };
1337 
1338 template <bool audit>
1340 {
1341  rapidjson::Reader reader;
1343 };
1344 
1345 namespace VW
1346 {
1347 template <bool audit>
1349  vw& all, v_array<example*>& examples, char* line, example_factory_t example_factory, void* ex_factory_context)
1350 {
1351  // string line_copy(line);
1352  // destructive parsing
1353  InsituStringStream ss(line);
1355 
1356  VWReaderHandler<audit>& handler = parser.handler;
1357  handler.init(&all, &examples, &ss, line + strlen(line), example_factory, ex_factory_context);
1358 
1359  ParseResult result =
1360  parser.reader.template Parse<kParseInsituFlag, InsituStringStream, VWReaderHandler<audit>>(ss, handler);
1361  if (!result.IsError())
1362  return;
1363 
1364  BaseState<audit>* current_state = handler.current_state();
1365 
1366  THROW("JSON parser error at " << result.Offset() << ": " << GetParseError_En(result.Code())
1367  << ". "
1368  "Handler: "
1369  << handler.error().str()
1370  << "State: " << (current_state ? current_state->name : "null")); // <<
1371  // "Line: '"<< line_copy << "'");
1372 }
1373 
1374 inline void apply_pdrop(vw& all, float pdrop, v_array<example*>& examples)
1375 {
1377  {
1378  for (auto& e : examples)
1379  {
1380  e->l.cb.weight = 1 - pdrop;
1381  }
1382  }
1383  else if (all.label_type == label_type::label_type_t::ccb)
1384  {
1385  for (auto& e : examples)
1386  {
1387  e->l.conditional_contextual_bandit.weight = 1 - pdrop;
1388  }
1389  }
1390 }
1391 
1392 template <bool audit>
1393 void read_line_decision_service_json(vw& all, v_array<example*>& examples, char* line, size_t length, bool copy_line,
1394  example_factory_t example_factory, void* ex_factory_context, DecisionServiceInteraction* data)
1395 {
1396  std::vector<char> line_vec;
1397  if (copy_line)
1398  {
1399  line_vec.insert(line_vec.end(), line, line + length);
1400  line = &line_vec.front();
1401  }
1402 
1403  InsituStringStream ss(line);
1405 
1406  VWReaderHandler<audit>& handler = parser.handler;
1407  handler.init(&all, &examples, &ss, line + length, example_factory, ex_factory_context);
1408  handler.ctx.SetStartStateToDecisionService(data);
1409 
1410  ParseResult result =
1411  parser.reader.template Parse<kParseInsituFlag, InsituStringStream, VWReaderHandler<audit>>(ss, handler);
1412 
1413  apply_pdrop(all, data->probabilityOfDrop, examples);
1414 
1415  if (!result.IsError())
1416  return;
1417 
1418  BaseState<audit>* current_state = handler.current_state();
1419 
1420  THROW("JSON parser error at " << result.Offset() << ": " << GetParseError_En(result.Code())
1421  << ". "
1422  "Handler: "
1423  << handler.error().str()
1424  << "State: " << (current_state ? current_state->name : "null"));
1425 } // namespace VW
1426 } // namespace VW
1427 
1428 template <bool audit>
1429 bool parse_line_json(vw* all, char* line, size_t num_chars, v_array<example*>& examples)
1430 {
1431  if (all->p->decision_service_json)
1432  {
1433  // Skip lines that do not start with "{"
1434  if (line[0] != '{')
1435  {
1436  return false;
1437  }
1438 
1439  DecisionServiceInteraction interaction;
1440  VW::template read_line_decision_service_json<audit>(*all, examples, line, num_chars, false,
1441  reinterpret_cast<VW::example_factory_t>(&VW::get_unused_example), all, &interaction);
1442 
1443  // TODO: In refactoring the parser to be usable standalone, we need to ensure that we
1444  // stop suppressing "skipLearn" interactions. Also, not sure if this is the right logic
1445  // for counterfactual. (@marco)
1446  if (interaction.skipLearn)
1447  {
1448  VW::return_multiple_example(*all, examples);
1449  examples.push_back(&VW::get_unused_example(all));
1450  return false;
1451  }
1452 
1453  // let's ask to continue reading data until we find a line with actions provided
1454  if (interaction.actions.size() == 0)
1455  {
1456  // VW::return_multiple_example(*all, examples);
1457  // examples.push_back(&VW::get_unused_example(all));
1458  return false;
1459  }
1460  }
1461  else
1462  VW::template read_line_json<audit>(
1463  *all, examples, line, reinterpret_cast<VW::example_factory_t>(&VW::get_unused_example), all);
1464 
1465  return true;
1466 }
1467 
1468 inline void prepare_for_learner(vw* all, v_array<example*>& examples)
1469 {
1470  // note: the json parser does single pass parsing and cannot determine if a shared example is needed.
1471  // since the communication between the parsing thread the main learner expects examples to be requested in order (as
1472  // they're layed out in memory) there is no way to determine upfront if a shared example exists thus even if there are
1473  // no features for the shared example, still an empty example is returned.
1474 
1475  // insert new line example at the end
1476  if (examples.size() > 1)
1477  {
1478  example& ae = VW::get_unused_example(all);
1479  char empty = '\0';
1480  substring example = {&empty, &empty};
1481  substring_to_example(all, &ae, example);
1482 
1483  examples.push_back(&ae);
1484  }
1485 }
1486 
1487 // This is used by the python parser
1488 template <bool audit>
1489 void line_to_examples_json(vw* all, char* line, size_t num_chars, v_array<example*>& examples)
1490 {
1491  bool good_example = parse_line_json<audit>(all, line, num_chars, examples);
1492  if (!good_example)
1493  {
1494  VW::return_multiple_example(*all, examples);
1495  examples.push_back(&VW::get_unused_example(all));
1496  return;
1497  }
1498 
1499  prepare_for_learner(all, examples);
1500 }
1501 
1502 template <bool audit>
1504 {
1505  // Keep reading lines until a valid set of examples is produced.
1506  bool reread;
1507  do
1508  {
1509  reread = false;
1510 
1511  char* line;
1512  size_t num_chars;
1513  size_t num_chars_initial = read_features(all, line, num_chars);
1514  if (num_chars_initial < 1)
1515  return (int)num_chars_initial;
1516 
1517  // Ensure there is a null terminator.
1518  line[num_chars] = '\0';
1519 
1520  reread = !parse_line_json<audit>(all, line, num_chars, examples);
1521  } while (reread);
1522 
1523  prepare_for_learner(all, examples);
1524 
1525  return 1;
1526 }
BaseState< audit > * root_state
void return_multiple_example(vw &all, v_array< example *> &examples)
Definition: example.cc:251
virtual BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool)
v_array< char > tag
Definition: example.h:63
virtual BaseState< audit > * Bool(Context< audit > &ctx, bool b)
v_array< namespace_index > indices
void parse_example_label(vw &all, example &ec, std::string label)
Definition: parser.cc:846
LabelObjectState< audit > label_object_state
BaseState< audit > * Null(Context< audit > &) override
BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType memberCount) override
virtual BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType)
BaseState< audit > * EndObject(Context< audit > &, rapidjson::SizeType) override
std::string * output_string
std::vector< Namespace< audit > > namespace_path
BaseState< audit > * StartObject(Context< audit > &ctx) override
const char * key
BaseState< audit > * Uint(Context< audit > &, unsigned f) override
void push_back(feature_value v, feature_index i)
example &(* example_factory_t)(void *)
BaseState< audit > * Null(Context< audit > &) override
BaseState< audit > * Float(Context< audit > &ctx, float f) override
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
BaseState< audit > * saved_root_state
BaseState< audit > * Uint(Context< audit > &ctx, unsigned int v) override
std::shared_ptr< audit_strings > audit_strings_ptr
Definition: feature_group.h:23
rapidjson::SizeType key_length
CB::label cb
Definition: example.h:31
features * ftrs
void init(vw *all, v_array< example *> *examples, rapidjson::InsituStringStream *stream, const char *stream_end, VW::example_factory_t example_factory, void *example_factory_context)
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
ArrayToVectorState< audit, float > array_float_state
BaseState< audit > * Float(Context< audit > &ctx, float v) override
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * Ignore(Context< audit > &ctx, rapidjson::SizeType length)
LabelIndexState< audit > label_index_state
void(* default_label)(void *)
Definition: label_parser.h:12
label_type::label_type_t label_type
Definition: global_data.h:550
void count_label(shared_data *sd, float l)
Definition: best_constant.h:5
BaseState< audit > * Bool(Context< audit > &ctx, bool b) override
example * ex
BaseState< audit > * Float(Context< audit > &ctx, float f) override
BaseState< audit > * Float(Context< audit > &ctx, float v) override
LabelState< audit > label_state
MultiState< audit > multi_state
VWReaderHandler< audit > handler
bool RawNumber(const char *, rapidjson::SizeType, bool)
BaseState< audit > * return_state
the core definition of a set of features.
v_array< cb_class > costs
Definition: cb.h:27
size_t feature_count
float feature_value
Definition: feature_group.h:20
std::stringstream & error()
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
Definition: hash.h:67
void AddFeature(vw *all, const char *str)
BaseState< audit > * StartArray(Context< audit > &ctx) override
uint32_t action
Definition: search.h:19
void line_to_examples_json(vw *all, char *line, size_t num_chars, v_array< example *> &examples)
float weight
Definition: simple_label.h:15
BaseState< audit > * StartObject(Context< audit > &ctx) override
std::vector< float > probs
feature_index namespace_hash
float label
Definition: simple_label.h:14
BaseState< audit > * EndArray(Context< audit > &, rapidjson::SizeType) override
label_data simple
Definition: example.h:28
ArrayState< audit > array_state
BaseState< audit > * StartArray(Context< audit > &ctx) override
LabelSinglePropertyState< audit > label_single_property_state
void SetStartStateToDecisionService(DecisionServiceInteraction *data)
BaseState< audit > * return_state
BaseState< audit > * StartObject(Context< audit > &ctx) override
void AddFeature(feature_value v, feature_index i, const char *feature_name)
bool EndObject(SizeType count)
virtual BaseState< audit > * Float(Context< audit > &ctx, float v)
BaseState< audit > * Null(Context< audit > &) override
ArrayToVectorState< audit, unsigned > array_uint_state
void read_line_json(vw &all, v_array< example *> &examples, char *line, example_factory_t example_factory, void *ex_factory_context)
T *& begin()
Definition: v_array.h:42
BaseState< audit > * StartArray(Context< audit > &ctx) override
DecisionServiceInteraction * interactions
rapidjson::Reader reader
size_t size() const
Definition: v_array.h:68
#define _stricmp
virtual BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool)
BaseState< audit > * Float(Context< audit > &, float f) override
StringToStringState< audit > string_state
void * example_factory_context
parser * p
Definition: global_data.h:377
std::array< features, NUM_NAMESPACES > feature_space
CCBOutcomeList< audit > ccb_outcome_list_state
Namespace< audit > & CurrentNamespace()
BaseState(const char *pname)
BaseState< audit > * StartArray(Context< audit > &ctx) override
void substring_to_example(vw *all, example *ae, substring example)
std::unique_ptr< std::stringstream > error_ptr
BaseState< audit > * Float(Context< audit > &, float f) override
bool String(const char *str, SizeType len, bool copy)
FloatToFloatState< audit > float_state
BoolToBoolState< audit > bool_state
const char * name
uint32_t action
Definition: cb.h:18
BaseState< audit > * old_root
void push_many(v_array< T > &v, const T *_begin, size_t num)
Definition: v_array.h:207
VW::example_factory_t example_factory
float partial_prediction
Definition: cb.h:21
virtual BaseState< audit > * StartObject(Context< audit > &ctx)
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType, bool) override
void push_back(const T &new_ele)
Definition: v_array.h:107
std::vector< unsigned int > inc
shared_data * sd
Definition: global_data.h:375
bool Double(double v)
const char * stream_end
float probability
Definition: cb.h:19
BaseState< audit > * Uint(Context< audit > &ctx, unsigned) override
BaseState< audit > * current_state()
BaseState< audit > * Float(Context< audit > &ctx, float v) override
BaseState< audit > * StartObject(Context< audit > &) override
DecisionServiceInteraction * data
BaseState< audit > * Bool(Context< audit > &, bool b) override
std::stringstream & error()
v_array< example * > * examples
uint64_t feature_index
Definition: feature_group.h:21
void prepare_for_learner(vw *all, v_array< example *> &examples)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
BaseState< audit > * String(Context< audit > &, const char *str, rapidjson::SizeType length, bool) override
float initial
Definition: simple_label.h:16
SlotsState< audit > slots_state
BaseState< audit > * PopNamespace()
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * return_state
bool Int64(int64_t v)
DecisionServiceState< audit > decision_service_state
DefaultState< audit > default_state
void init(vw *pall)
virtual BaseState< audit > * StartArray(Context< audit > &ctx)
T *& end()
Definition: v_array.h:43
rapidjson::InsituStringStream * stream
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool) override
bool Uint64(uint64_t v)
bool decision_service_json
Definition: parser.h:105
example_type type
Definition: ccb_label.h:32
BaseState< audit > * StartArray(Context< audit > &ctx) override
v_array< audit_strings_ptr > space_names
BaseState< audit > * return_state
std::vector< float > probabilities
virtual BaseState< audit > * Null(Context< audit > &ctx)
polylabel l
Definition: example.h:57
BaseState< audit > * return_state
node_pred * find(recall_tree &b, uint32_t cn, example &ec)
Definition: recall_tree.cc:126
bool TransitionState(BaseState< audit > *next_state)
void read_line_decision_service_json(vw &all, v_array< example *> &examples, char *line, size_t length, bool copy_line, example_factory_t example_factory, void *ex_factory_context, DecisionServiceInteraction *data)
std::vector< unsigned > actions
BaseState< audit > * saved
Definition: cb.h:25
float cost
Definition: cb.h:17
BaseState< audit > * return_state
Definition: autolink.cc:11
std::vector< unsigned int > actions
CCB::label conditional_contextual_bandit
Definition: example.h:32
bool Uint(unsigned v)
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType) override
void apply_pdrop(vw &all, float pdrop, v_array< example *> &examples)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
uint64_t hash_feature(vw &all, const std::string &s, uint64_t u)
Definition: vw.h:153
uint64_t hash_space(vw &all, const std::string &s)
Definition: vw.h:138
const char * name
Definition: parser.h:38
size_t read_features(vw *all, char *&line, size_t &num_chars)
bool parse_line_json(vw *all, char *line, size_t num_chars, v_array< example *> &examples)
Context< audit > ctx
bool EndArray(SizeType count)
BaseState< audit > * StartObject(Context< audit > &ctx) override
std::vector< float > probs
BaseState< audit > * Null(Context< audit > &) override
BaseState< audit > * Uint(Context< audit > &ctx, unsigned f) override
virtual BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType)
feature_index array_hash
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
TextState< audit > text_state
example & get_unused_example(vw *all)
Definition: parser.cc:664
IgnoreState< audit > ignore_state
BaseState< audit > * current_state
bool Key(const char *str, SizeType len, bool copy)
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * Uint(Context< audit > &ctx, unsigned f) override
#define THROW(args)
Definition: vw_exception.h:181
BaseState< audit > * previous_state
TagState< audit > tag_state
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
std::vector< uint32_t > actions
float f
Definition: cache.cc:40
conditional_contextual_bandit_outcome * outcome
Definition: ccb_label.h:34
std::vector< T > * output_array
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
BaseState< audit > * String(Context< audit > &ctx, const char *str, SizeType length, bool)
label_parser lp
Definition: parser.h:102
void PushNamespace(const char *ns, BaseState< audit > *return_state)
BaseState< audit > * StartArray(Context< audit > &ctx) override
std::pair< std::string, std::string > audit_strings
Definition: feature_group.h:22
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
int read_features_json(vw *all, v_array< example *> &examples)
virtual BaseState< audit > * Uint(Context< audit > &ctx, unsigned v)