19 #if (_MANAGED == 1) || (_M_CEE == 1) 20 #pragma managed(push, off) 23 #include <rapidjson/reader.h> 24 #include <rapidjson/error/en.h> 26 #if (_MANAGED == 1) || (_M_CEE == 1) 40 #define _stricmp strcasecmp 101 ctx.
error() <<
"Unexpected token: bool (" << (b ?
"true" :
"false") <<
")";
107 ctx.
error() <<
"Unexpected token: float (" << v <<
")";
113 ctx.
error() <<
"Unexpected token: uint (" << v <<
")";
119 ctx.
error() <<
"Unexpected token: std::string('" << str <<
"' len: " << len <<
")";
125 ctx.
error() <<
"Unexpected token: {";
131 ctx.
error() <<
"Unexpected token: key('" << str <<
"' len: " << len <<
")";
137 ctx.
error() <<
"Unexpected token: }";
143 ctx.
error() <<
"Unexpected token: [";
149 ctx.
error() <<
"Unexpected token: ]";
154 template <
bool audit>
166 std::vector<unsigned int>
inc;
172 found = found_cb =
false;
174 cb_label = {0., 0, 0., 0.};
184 ctx.
error() <<
"invalid label object. nested objected.";
222 cb_label.
action = (uint32_t)v;
237 ctx.
error() <<
"Unsupported label property: '" << ctx.
key <<
"' len: " << ctx.
key_length;
254 ld->explicit_included_actions.push_back(
id);
258 if ((actions.size() != 0) && (probs.size() != 0))
261 outcome->cost = cb_label.
cost;
262 if (actions.size() != probs.size())
264 THROW(
"Actions and probabilties must be the same length.");
267 for (
size_t i = 0; i < this->actions.size(); i++)
269 outcome->probabilities.push_back({actions[i], probs[i]});
274 ld->outcome = outcome;
275 cb_label = {0., 0, 0., 0.};
281 ld->
costs.push_back(cb_label);
284 cb_label = {0., 0, 0., 0.};
298 template <
bool audit>
329 template <
bool audit>
345 template <
bool audit>
374 template <
bool audit>
384 const char* start = str;
385 const char* end = str + length;
386 for (
char* p = (
char*)str; p != end; p++)
395 ns.AddFeature(ctx.
all, start);
408 ns.AddFeature(ctx.
all, start);
414 template <
bool audit>
428 template <
bool audit>
446 ld->
costs.push_back(f);
454 THROW(
"label type is not CB or CCB")
487 template <
bool audit>
535 template <
bool audit>
547 ctx.
error() <<
"Nested arrays are not supported";
562 std::stringstream str;
597 template <
bool audit>
605 template <
bool audit>
615 char* head = ctx.
stream->src_ + length + 2;
618 ctx.
error() <<
"Expected ':' found '" << *head <<
"'";
625 int depth = 0, sq_depth = 0;
632 ctx.
error() <<
"Found EOF";
637 bool stopInner =
false;
644 ctx.
error() <<
"Found EOF";
660 if (depth == 0 && sq_depth == 0)
669 if (depth == 0 && sq_depth == 0)
675 if (depth == 0 && sq_depth == 0)
683 char* value = ctx.
stream->src_ + length + 3;
686 ctx.
error() <<
"Found EOF";
692 memset(value,
' ', head - value - 1);
702 if (length > 0 && str[0] ==
'_')
715 ctx.
error() <<
"Unsupported key '" << ctx.
key <<
"' len: " << length;
754 return Ignore(ctx, length);
763 const char* end = str + length;
764 for (
char* p = (
char*)str; p != end; p++)
806 if (label_index >= 0)
812 ctx.
error() <<
"Out of bounds error: _labelIndex must be smaller than number of actions! _labelIndex=" 813 << (label_index - 1) <<
" Number of actions=" << ctx.
examples->
size() - 1 <<
" ";
842 outcome->probabilities.push_back(
866 template <
bool audit,
typename T>
876 bool has_seen_array_start =
false;
882 ctx.
error() <<
"Nested arrays are not supported";
886 has_seen_array_start =
true;
893 output_array->push_back(static_cast<T>(f));
895 if (!has_seen_array_start)
897 has_seen_array_start =
false;
906 output_array->push_back(static_cast<T>(f));
908 if (!has_seen_array_start)
910 has_seen_array_start =
false;
919 if (!has_seen_array_start)
921 has_seen_array_start =
false;
931 has_seen_array_start =
false;
936 template <
bool audit>
946 Context<audit>& ,
const char* str, rapidjson::SizeType length,
bool )
override 948 output_string->assign(str, str + length);
955 template <
bool audit>
977 template <
bool audit>
999 float probabilityOfDrop = 0.f;
1000 bool skipLearn{
false};
1003 template <
bool audit>
1006 int slot_object_index = 0;
1021 slot_object_index = 0;
1028 slot_object_index++;
1034 if (slot_object_index == 0)
1036 THROW(
"Badly formed ccb example. Shared example is required.")
1049 slot_object_index++;
1064 if (ex->l.conditional_contextual_bandit.outcome)
1066 interactions->
actions.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].action);
1067 interactions->
probabilities.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].score);
1077 template <
bool audit>
1117 else if (length == 5 && !strcmp(str,
"pdrop"))
1123 else if (length == 7 && !strcmp(str,
"EventId"))
1129 else if (length > 0 && str[0] ==
'_')
1132 if (length >= 6 && !strncmp(str,
"_label", 6))
1136 if (length >= 7 && ctx.
key[6] ==
'_')
1138 else if (length == 6)
1140 else if (length == 11 && !
_stricmp(str,
"_labelIndex"))
1143 else if (length == 10 && !strncmp(str,
"_skipLearn", 10))
1149 else if (length == 9 && !strncmp(str,
"_outcomes", 9))
1161 template <
bool audit>
1214 current_state = &default_state;
1215 root_state = &default_state;
1223 previous_state =
nullptr;
1224 label_object_state.
init(pall);
1230 error_ptr.reset(
new std::stringstream{});
1237 decision_service_state.
data = data;
1238 current_state = root_state = &decision_service_state;
1252 namespace_path.push_back(n);
1257 auto& ns = CurrentNamespace();
1258 if (ns.feature_count > 0)
1260 auto feature_group = ns.feature_group;
1268 auto return_state = namespace_path.back().return_state;
1269 namespace_path.pop_back();
1270 return return_state;
1277 if (next_state ==
nullptr)
1280 previous_state = current_state;
1281 current_state = next_state;
1287 template <
bool audit>
1288 struct VWReaderHandler :
public rapidjson::BaseReaderHandler<rapidjson::UTF8<>, VWReaderHandler<audit>>
1297 ctx.
ex = (*examples)[0];
1313 bool String(
const char* str, SizeType len,
bool copy)
1318 bool Key(
const char* str, SizeType len,
bool copy)
1331 bool RawNumber(
const char* , rapidjson::SizeType ,
bool ) {
return false; }
1338 template <
bool audit>
1347 template <
bool audit>
1353 InsituStringStream ss(line);
1357 handler.
init(&all, &examples, &ss, line + strlen(line), example_factory, ex_factory_context);
1359 ParseResult result =
1360 parser.
reader.template Parse<kParseInsituFlag, InsituStringStream, VWReaderHandler<audit>>(ss, handler);
1361 if (!result.IsError())
1366 THROW(
"JSON parser error at " << result.Offset() <<
": " << GetParseError_En(result.Code())
1369 << handler.
error().str()
1370 <<
"State: " << (current_state ? current_state->
name :
"null"));
1378 for (
auto& e : examples)
1380 e->l.cb.weight = 1 - pdrop;
1385 for (
auto& e : examples)
1387 e->l.conditional_contextual_bandit.weight = 1 - pdrop;
1392 template <
bool audit>
1396 std::vector<char> line_vec;
1399 line_vec.insert(line_vec.end(), line, line + length);
1400 line = &line_vec.front();
1403 InsituStringStream ss(line);
1407 handler.
init(&all, &examples, &ss, line + length, example_factory, ex_factory_context);
1408 handler.
ctx.SetStartStateToDecisionService(data);
1410 ParseResult result =
1411 parser.
reader.template Parse<kParseInsituFlag, InsituStringStream, VWReaderHandler<audit>>(ss, handler);
1415 if (!result.IsError())
1420 THROW(
"JSON parser error at " << result.Offset() <<
": " << GetParseError_En(result.Code())
1423 << handler.
error().str()
1424 <<
"State: " << (current_state ? current_state->
name :
"null"));
1428 template <
bool audit>
1440 VW::template read_line_decision_service_json<audit>(*all, examples, line, num_chars,
false,
1454 if (interaction.
actions.size() == 0)
1462 VW::template read_line_json<audit>(
1476 if (examples.
size() > 1)
1488 template <
bool audit>
1491 bool good_example = parse_line_json<audit>(all, line, num_chars, examples);
1502 template <
bool audit>
1513 size_t num_chars_initial =
read_features(all, line, num_chars);
1514 if (num_chars_initial < 1)
1515 return (
int)num_chars_initial;
1518 line[num_chars] =
'\0';
1520 reread = !parse_line_json<audit>(all, line, num_chars, examples);
BaseState< audit > * root_state
void return_multiple_example(vw &all, v_array< example *> &examples)
virtual BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool)
virtual BaseState< audit > * Bool(Context< audit > &ctx, bool b)
v_array< namespace_index > indices
void parse_example_label(vw &all, example &ec, std::string label)
LabelObjectState< audit > label_object_state
BaseState< audit > * Null(Context< audit > &) override
BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType memberCount) override
virtual BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType)
BaseState< audit > * EndObject(Context< audit > &, rapidjson::SizeType) override
std::string * output_string
std::vector< Namespace< audit > > namespace_path
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * Uint(Context< audit > &, unsigned f) override
void push_back(feature_value v, feature_index i)
example &(* example_factory_t)(void *)
BaseState< audit > * Null(Context< audit > &) override
bool VWReaderHandlerNull()
BaseState< audit > * Float(Context< audit > &ctx, float f) override
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
BaseState< audit > * saved_root_state
BaseState< audit > * Uint(Context< audit > &ctx, unsigned int v) override
std::shared_ptr< audit_strings > audit_strings_ptr
rapidjson::SizeType key_length
bool VWReaderHandlerDefault()
void init(vw *all, v_array< example *> *examples, rapidjson::InsituStringStream *stream, const char *stream_end, VW::example_factory_t example_factory, void *example_factory_context)
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
ArrayToVectorState< audit, float > array_float_state
BaseState< audit > * Float(Context< audit > &ctx, float v) override
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * Ignore(Context< audit > &ctx, rapidjson::SizeType length)
LabelIndexState< audit > label_index_state
void(* default_label)(void *)
label_type::label_type_t label_type
void count_label(shared_data *sd, float l)
BaseState< audit > * Bool(Context< audit > &ctx, bool b) override
BaseState< audit > * Float(Context< audit > &ctx, float f) override
BaseState< audit > * Float(Context< audit > &ctx, float v) override
LabelState< audit > label_state
MultiState< audit > multi_state
VWReaderHandler< audit > handler
bool RawNumber(const char *, rapidjson::SizeType, bool)
BaseState< audit > * return_state
the core definition of a set of features.
v_array< cb_class > costs
std::stringstream & error()
VW_STD14_CONSTEXPR uint64_t uniform_hash(const void *key, size_t len, uint64_t seed)
void AddFeature(vw *all, const char *str)
BaseState< audit > * StartArray(Context< audit > &ctx) override
void line_to_examples_json(vw *all, char *line, size_t num_chars, v_array< example *> &examples)
BaseState< audit > * StartObject(Context< audit > &ctx) override
std::vector< float > probs
feature_index namespace_hash
BaseState< audit > * EndArray(Context< audit > &, rapidjson::SizeType) override
LabelSinglePropertyState()
ArrayState< audit > array_state
BaseState< audit > * StartArray(Context< audit > &ctx) override
LabelSinglePropertyState< audit > label_single_property_state
void SetStartStateToDecisionService(DecisionServiceInteraction *data)
BaseState< audit > * return_state
BaseState< audit > * StartObject(Context< audit > &ctx) override
void AddFeature(feature_value v, feature_index i, const char *feature_name)
bool EndObject(SizeType count)
virtual BaseState< audit > * Float(Context< audit > &ctx, float v)
BaseState< audit > * Null(Context< audit > &) override
ArrayToVectorState< audit, unsigned > array_uint_state
void read_line_json(vw &all, v_array< example *> &examples, char *line, example_factory_t example_factory, void *ex_factory_context)
BaseState< audit > * StartArray(Context< audit > &ctx) override
DecisionServiceInteraction * interactions
virtual BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool)
BaseState< audit > * Float(Context< audit > &, float f) override
StringToStringState< audit > string_state
void * example_factory_context
std::array< features, NUM_NAMESPACES > feature_space
CCBOutcomeList< audit > ccb_outcome_list_state
Namespace< audit > & CurrentNamespace()
BaseState(const char *pname)
BaseState< audit > * StartArray(Context< audit > &ctx) override
void substring_to_example(vw *all, example *ae, substring example)
std::unique_ptr< std::stringstream > error_ptr
BaseState< audit > * Float(Context< audit > &, float f) override
bool String(const char *str, SizeType len, bool copy)
FloatToFloatState< audit > float_state
BoolToBoolState< audit > bool_state
BaseState< audit > * old_root
void push_many(v_array< T > &v, const T *_begin, size_t num)
VW::example_factory_t example_factory
virtual BaseState< audit > * StartObject(Context< audit > &ctx)
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType, bool) override
void push_back(const T &new_ele)
std::vector< unsigned int > inc
BaseState< audit > * Uint(Context< audit > &ctx, unsigned) override
BaseState< audit > * current_state()
BaseState< audit > * Float(Context< audit > &ctx, float v) override
BaseState< audit > * StartObject(Context< audit > &) override
DecisionServiceInteraction * data
BaseState< audit > * Bool(Context< audit > &, bool b) override
std::stringstream & error()
v_array< example * > * examples
void prepare_for_learner(vw *all, v_array< example *> &examples)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
BaseState< audit > * String(Context< audit > &, const char *str, rapidjson::SizeType length, bool) override
SlotsState< audit > slots_state
BaseState< audit > * PopNamespace()
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * return_state
DecisionServiceState< audit > decision_service_state
DefaultState< audit > default_state
virtual BaseState< audit > * StartArray(Context< audit > &ctx)
rapidjson::InsituStringStream * stream
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType len, bool) override
bool decision_service_json
BaseState< audit > * StartArray(Context< audit > &ctx) override
v_array< audit_strings_ptr > space_names
BaseState< audit > * return_state
std::vector< float > probabilities
virtual BaseState< audit > * Null(Context< audit > &ctx)
BaseState< audit > * return_state
node_pred * find(recall_tree &b, uint32_t cn, example &ec)
bool TransitionState(BaseState< audit > *next_state)
void read_line_decision_service_json(vw &all, v_array< example *> &examples, char *line, size_t length, bool copy_line, example_factory_t example_factory, void *ex_factory_context, DecisionServiceInteraction *data)
std::vector< unsigned > actions
BaseState< audit > * saved
BaseState< audit > * return_state
std::vector< unsigned int > actions
CCB::label conditional_contextual_bandit
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType) override
void apply_pdrop(vw &all, float pdrop, v_array< example *> &examples)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
uint64_t hash_feature(vw &all, const std::string &s, uint64_t u)
uint64_t hash_space(vw &all, const std::string &s)
size_t read_features(vw *all, char *&line, size_t &num_chars)
bool parse_line_json(vw *all, char *line, size_t num_chars, v_array< example *> &examples)
bool EndArray(SizeType count)
BaseState< audit > * StartObject(Context< audit > &ctx) override
std::vector< float > probs
BaseState< audit > * Null(Context< audit > &) override
BaseState< audit > * Uint(Context< audit > &ctx, unsigned f) override
virtual BaseState< audit > * EndObject(Context< audit > &ctx, rapidjson::SizeType)
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
TextState< audit > text_state
example & get_unused_example(vw *all)
IgnoreState< audit > ignore_state
BaseState< audit > * current_state
bool Key(const char *str, SizeType len, bool copy)
BaseState< audit > * StartObject(Context< audit > &ctx) override
BaseState< audit > * Uint(Context< audit > &ctx, unsigned f) override
BaseState< audit > * previous_state
TagState< audit > tag_state
BaseState< audit > * EndArray(Context< audit > &ctx, rapidjson::SizeType) override
std::vector< uint32_t > actions
conditional_contextual_bandit_outcome * outcome
std::vector< T > * output_array
BaseState< audit > * String(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool)
BaseState< audit > * Uint(Context< audit > &ctx, unsigned v) override
void PushNamespace(const char *ns, BaseState< audit > *return_state)
BaseState< audit > * StartArray(Context< audit > &ctx) override
std::pair< std::string, std::string > audit_strings
BaseState< audit > * Key(Context< audit > &ctx, const char *str, rapidjson::SizeType length, bool) override
int read_features_json(vw *all, v_array< example *> &examples)
virtual BaseState< audit > * Uint(Context< audit > &ctx, unsigned v)