Vowpal Wabbit
Classes | Namespaces | Enumerations | Functions | Variables
lda_core.cc File Reference
#include <fstream>
#include <vector>
#include <queue>
#include <algorithm>
#include <numeric>
#include <cmath>
#include "correctedMath.h"
#include "vw_versions.h"
#include "vw.h"
#include "mwt.h"
#include <boost/math/special_functions/digamma.hpp>
#include <boost/math/special_functions/gamma.hpp>
#include <netdb.h>
#include <cstring>
#include <cstdio>
#include <cassert>
#include "no_label.h"
#include "gd.h"
#include "rand48.h"
#include "reductions.h"
#include "array_parameters.h"
#include <boost/version.hpp>

Go to the source code of this file.

Classes

class  index_feature
 
struct  lda
 
struct  initial_weights
 
class  set_initial_lda_wrapper< T >
 
struct  word_doc_frequency
 
struct  feature_pair
 

Namespaces

 anonymous_namespace{lda_core.cc}
 
 ldamath
 

Enumerations

enum  lda_math_mode { USE_SIMD, USE_PRECISE, USE_FAST_APPROX }
 

Functions

bool anonymous_namespace{lda_core.cc}::is_aligned16 (void *ptr)
 
float ldamath::fastlog2 (float x)
 
float ldamath::fastlog (float x)
 
float ldamath::fastpow2 (float p)
 
float ldamath::fastexp (float p)
 
float ldamath::fastpow (float x, float p)
 
float ldamath::fastlgamma (float x)
 
float ldamath::fastdigamma (float x)
 
template<typename T , const lda_math_mode mtype>
ldamath::lgamma (T)
 
template<typename T , const lda_math_mode mtype>
ldamath::digamma (T)
 
template<typename T , lda_math_mode mtype>
ldamath::exponential (T)
 
template<typename T , lda_math_mode mtype>
ldamath::powf (T, T)
 
template<>
float ldamath::lgamma< float, USE_PRECISE > (float x)
 
template<>
float ldamath::digamma< float, USE_PRECISE > (float x)
 
template<>
float ldamath::exponential< float, USE_PRECISE > (float x)
 
template<>
float ldamath::powf< float, USE_PRECISE > (float x, float p)
 
template<>
float ldamath::lgamma< float, USE_FAST_APPROX > (float x)
 
template<>
float ldamath::digamma< float, USE_FAST_APPROX > (float x)
 
template<>
float ldamath::exponential< float, USE_FAST_APPROX > (float x)
 
template<>
float ldamath::powf< float, USE_FAST_APPROX > (float x, float p)
 
template<>
float ldamath::lgamma< float, USE_SIMD > (float x)
 
template<>
float ldamath::digamma< float, USE_SIMD > (float x)
 
template<>
float ldamath::exponential< float, USE_SIMD > (float x)
 
template<>
float ldamath::powf< float, USE_SIMD > (float x, float p)
 
template<typename T , const lda_math_mode mtype>
void ldamath::expdigammify (vw &all, T *gamma, T threshold, T initial)
 
template<>
void ldamath::expdigammify< float, USE_SIMD > (vw &all, float *gamma, float threshold, float)
 
template<typename T , const lda_math_mode mtype>
void ldamath::expdigammify_2 (vw &all, float *gamma, T *norm, const T threshold)
 
template<>
void ldamath::expdigammify_2< float, USE_SIMD > (vw &all, float *gamma, float *norm, const float threshold)
 
static float average_diff (vw &all, float *oldgamma, float *newgamma)
 
float theta_kl (lda &l, v_array< float > &Elogtheta, float *gamma)
 
static float find_cw (lda &l, float *u_for_w, float *v)
 
float lda_loop (lda &l, v_array< float > &Elogtheta, float *v, example *ec, float)
 
size_t next_pow2 (size_t x)
 
void save_load (lda &l, io_buf &model_file, bool read, bool text)
 
void return_example (vw &all, example &ec)
 
void learn_batch (lda &l)
 
void learn (lda &l, LEARNER::single_learner &, example &ec)
 
void learn_with_metrics (lda &l, LEARNER::single_learner &base, example &ec)
 
void predict (lda &l, LEARNER::single_learner &base, example &ec)
 
void predict_with_metrics (lda &l, LEARNER::single_learner &base, example &ec)
 
template<class T >
void get_top_weights (vw *all, int top_words_count, int topic, std::vector< feature > &output, T &weights)
 
void get_top_weights (vw *all, int top_words_count, int topic, std::vector< feature > &output)
 
template<class T >
void compute_coherence_metrics (lda &l, T &weights)
 
void compute_coherence_metrics (lda &l)
 
void end_pass (lda &l)
 
template<class T >
void end_examples (lda &l, T &weights)
 
void end_examples (lda &l)
 
void finish_example (vw &, lda &, example &)
 
std::istream & operator>> (std::istream &in, lda_math_mode &mmode)
 
LEARNER::base_learnerlda_setup (options_i &options, vw &all)
 

Variables

v_array< float > anonymous_namespace{lda_core.cc}::new_gamma = v_init<float>()
 
v_array< float > anonymous_namespace{lda_core.cc}::old_gamma = v_init<float>()
 

Enumeration Type Documentation

◆ lda_math_mode

Enumerator
USE_SIMD 
USE_PRECISE 
USE_FAST_APPROX 

Definition at line 45 of file lda_core.cc.

Function Documentation

◆ average_diff()

static float average_diff ( vw all,
float *  oldgamma,
float *  newgamma 
)
inlinestatic

Definition at line 639 of file lda_core.cc.

References Search::absdiff(), accumulate(), and vw::lda.

Referenced by lda_loop().

640 {
641  float sum;
642  float normalizer;
643 
644  // This warps the normal sense of "inner product", but it accomplishes the same
645  // thing as the "plain old" for loop. clang does a good job of reducing the
646  // common subexpressions.
647  sum = std::inner_product(
648  oldgamma, oldgamma + all.lda, newgamma, 0.0f, [](float accum, float absdiff) { return accum + absdiff; },
649  [](float old_g, float new_g) { return std::abs(old_g - new_g); });
650 
651  normalizer = std::accumulate(newgamma, newgamma + all.lda, 0.0f);
652  return sum / normalizer;
653 }
void accumulate(vw &all, parameters &weights, size_t offset)
Definition: accumulate.cc:20
uint32_t lda
Definition: global_data.h:508
size_t absdiff(size_t a, size_t b)
Definition: search.cc:1946

◆ compute_coherence_metrics() [1/2]

template<class T >
void compute_coherence_metrics ( lda l,
T &  weights 
)

Definition at line 1103 of file lda_core.cc.

References lda::all, ACTION_SCORE::cmp(), f, lda::feature_counts, lda::feature_to_example_map, vw::num_bits, lda::topics, and feature::x.

Referenced by compute_coherence_metrics(), and end_pass().

1104 {
1105  uint64_t length = (uint64_t)1 << l.all->num_bits;
1106 
1107  std::vector<std::vector<feature_pair>> topics_word_pairs;
1108  topics_word_pairs.resize(l.topics);
1109 
1110  int top_words_count = 10; // parameterize and check
1111 
1112  for (size_t topic = 0; topic < l.topics; topic++)
1113  {
1114  // get top features for this topic
1115  auto cmp = [](feature &left, feature &right) { return left.x > right.x; };
1116  std::priority_queue<feature, std::vector<feature>, decltype(cmp)> top_features(cmp);
1117  typename T::iterator iter = weights.begin();
1118  for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
1119  top_features.push(feature((&(*iter))[topic], iter.index()));
1120 
1121  for (typename T::iterator v = weights.begin(); v != weights.end(); ++v)
1122  if ((&(*v))[topic] > top_features.top().x)
1123  {
1124  top_features.pop();
1125  top_features.push(feature((&(*v))[topic], v.index()));
1126  }
1127 
1128  // extract idx and sort descending
1129  std::vector<uint64_t> top_features_idx;
1130  top_features_idx.resize(top_features.size());
1131  for (int i = (int)top_features.size() - 1; i >= 0; i--)
1132  {
1133  top_features_idx[i] = top_features.top().weight_index;
1134  top_features.pop();
1135  }
1136 
1137  auto &word_pairs = topics_word_pairs[topic];
1138  for (size_t i = 0; i < top_features_idx.size(); i++)
1139  for (size_t j = i + 1; j < top_features_idx.size(); j++)
1140  word_pairs.emplace_back(top_features_idx[i], top_features_idx[j]);
1141  }
1142 
1143  // compress word pairs and create record for storing frequency
1144  std::map<uint64_t, std::vector<word_doc_frequency>> coWordsDFSet;
1145  for (auto &vec : topics_word_pairs)
1146  {
1147  for (auto &wp : vec)
1148  {
1149  auto f1 = wp.f1;
1150  auto f2 = wp.f2;
1151  auto wdf = coWordsDFSet.find(f1);
1152 
1153  if (wdf != coWordsDFSet.end())
1154  {
1155  // http://stackoverflow.com/questions/5377434/does-stdmapiterator-return-a-copy-of-value-or-a-value-itself
1156  // if (wdf->second.find(f2) == wdf->second.end())
1157 
1158  if (std::find_if(wdf->second.begin(), wdf->second.end(),
1159  [&f2](const word_doc_frequency &v) { return v.idx == f2; }) != wdf->second.end())
1160  {
1161  wdf->second.push_back({f2, 0});
1162  // printf(" add %d %d\n", f1, f2);
1163  }
1164  }
1165  else
1166  {
1167  std::vector<word_doc_frequency> vec = {{f2, 0}};
1168  coWordsDFSet.insert(std::make_pair(f1, vec));
1169  // printf(" insert %d %d\n", f1, f2);
1170  }
1171  }
1172  }
1173 
1174  // this.GetWordPairsDocumentFrequency(coWordsDFSet);
1175  for (auto &pair : coWordsDFSet)
1176  {
1177  auto &examples_for_f1 = l.feature_to_example_map[pair.first];
1178  for (auto &wdf : pair.second)
1179  {
1180  auto &examples_for_f2 = l.feature_to_example_map[wdf.idx];
1181 
1182  // assumes examples_for_f1 and examples_for_f2 are orderd
1183  size_t i = 0;
1184  size_t j = 0;
1185  while (i < examples_for_f1.size() && j < examples_for_f2.size())
1186  {
1187  if (examples_for_f1[i] == examples_for_f2[j])
1188  {
1189  wdf.count++;
1190  i++;
1191  j++;
1192  }
1193  else if (examples_for_f2[j] < examples_for_f1[i])
1194  j++;
1195  else
1196  i++;
1197  }
1198  }
1199  }
1200 
1201  float epsilon = 1e-6f; // TODO
1202  float avg_coherence = 0;
1203  for (size_t topic = 0; topic < l.topics; topic++)
1204  {
1205  float coherence = 0;
1206 
1207  for (auto &pairs : topics_word_pairs[topic])
1208  {
1209  auto f1 = pairs.f1;
1210  if (l.feature_counts[f1] == 0)
1211  continue;
1212 
1213  auto f2 = pairs.f2;
1214  auto &co_feature = coWordsDFSet[f1];
1215  auto co_feature_df = std::find_if(
1216  co_feature.begin(), co_feature.end(), [&f2](const word_doc_frequency &v) { return v.idx == f2; });
1217 
1218  if (co_feature_df != co_feature.end())
1219  {
1220  // printf("(%d:%d + eps)/(%d:%d)\n", f2, co_feature_df->count, f1, l.feature_counts[f1]);
1221  coherence += logf((co_feature_df->count + epsilon) / l.feature_counts[f1]);
1222  }
1223  }
1224 
1225  printf("Topic %3d coherence: %f\n", (int)topic, coherence);
1226 
1227  // TODO: expose per topic coherence
1228 
1229  // TODO: good vs. bad topics
1230  avg_coherence += coherence;
1231  }
1232 
1233  avg_coherence /= l.topics;
1234 
1235  printf("Avg topic coherence: %f\n", avg_coherence);
1236 }
float x
Definition: feature_group.h:27
vw * all
Definition: lda_core.cc:89
uint32_t num_bits
Definition: global_data.h:398
std::vector< uint32_t > feature_counts
Definition: lda_core.cc:83
std::vector< std::vector< size_t > > feature_to_example_map
Definition: lda_core.cc:84
size_t topics
Definition: lda_core.cc:62
int cmp(size_t a, size_t b)
Definition: action_score.h:47
float f
Definition: cache.cc:40

◆ compute_coherence_metrics() [2/2]

void compute_coherence_metrics ( lda l)

Definition at line 1238 of file lda_core.cc.

References lda::all, compute_coherence_metrics(), parameters::dense_weights, parameters::sparse, parameters::sparse_weights, and vw::weights.

1239 {
1240  if (l.all->weights.sparse)
1242  else
1244 }
parameters weights
Definition: global_data.h:537
void compute_coherence_metrics(lda &l, T &weights)
Definition: lda_core.cc:1103
vw * all
Definition: lda_core.cc:89
dense_parameters dense_weights
sparse_parameters sparse_weights

◆ end_examples() [1/2]

template<class T >
void end_examples ( lda l,
T &  weights 
)

Definition at line 1259 of file lda_core.cc.

References lda::all, correctedExp, lda::decay_levels, v_array< T >::end(), lda::example_t, f, v_array< T >::last(), and vw::lda.

Referenced by end_examples(), and lda_setup().

1260 {
1261  for (typename T::iterator iter = weights.begin(); iter != weights.end(); ++iter)
1262  {
1263  float decay_component =
1264  l.decay_levels.last() - l.decay_levels.end()[(int)(-1 - l.example_t + (&(*iter))[l.all->lda])];
1265  float decay = fmin(1.f, correctedExp(decay_component));
1266 
1267  weight *wp = &(*iter);
1268  for (size_t i = 0; i < l.all->lda; ++i) wp[i] *= decay;
1269  }
1270 }
#define correctedExp
Definition: correctedMath.h:27
v_array< float > decay_levels
Definition: lda_core.cc:71
vw * all
Definition: lda_core.cc:89
double example_t
Definition: lda_core.cc:88
uint32_t lda
Definition: global_data.h:508
T *& end()
Definition: v_array.h:43
float weight
T last() const
Definition: v_array.h:57
float f
Definition: cache.cc:40

◆ end_examples() [2/2]

void end_examples ( lda l)

Definition at line 1272 of file lda_core.cc.

References lda::all, parameters::dense_weights, end_examples(), parameters::sparse, parameters::sparse_weights, and vw::weights.

1273 {
1274  if (l.all->weights.sparse)
1276  else
1278 }
parameters weights
Definition: global_data.h:537
vw * all
Definition: lda_core.cc:89
dense_parameters dense_weights
sparse_parameters sparse_weights
void end_examples(lda &l, T &weights)
Definition: lda_core.cc:1259

◆ end_pass()

void end_pass ( lda l)

Definition at line 1246 of file lda_core.cc.

References lda::all, lda::compute_coherence_metrics, compute_coherence_metrics(), v_array< T >::empty(), lda::examples, learn_batch(), vw::numpasses, and vw::passes_complete.

Referenced by lda_setup().

1247 {
1248  if (!l.examples.empty())
1249  learn_batch(l);
1250 
1252  {
1254  // FASTPASS return;
1255  }
1256 }
v_array< example * > examples
Definition: lda_core.cc:73
void learn_batch(lda &l)
Definition: lda_core.cc:864
void compute_coherence_metrics(lda &l, T &weights)
Definition: lda_core.cc:1103
vw * all
Definition: lda_core.cc:89
bool compute_coherence_metrics
Definition: lda_core.cc:80
size_t numpasses
Definition: global_data.h:451
size_t passes_complete
Definition: global_data.h:452
bool empty() const
Definition: v_array.h:59

◆ find_cw()

static float find_cw ( lda l,
float *  u_for_w,
float *  v 
)
inlinestatic

Definition at line 679 of file lda_core.cc.

References lda::topics.

Referenced by lda_loop(), and learn_batch().

680 {
681  return 1.0f / std::inner_product(u_for_w, u_for_w + l.topics, v, 0.0f);
682 }
size_t topics
Definition: lda_core.cc:62

◆ finish_example()

void finish_example ( vw ,
lda ,
example  
)

Definition at line 1280 of file lda_core.cc.

1280 {}

◆ get_top_weights() [1/2]

template<class T >
void get_top_weights ( vw all,
int  top_words_count,
int  topic,
std::vector< feature > &  output,
T &  weights 
)

Definition at line 1063 of file lda_core.cc.

References ACTION_SCORE::cmp(), vw::num_bits, and feature::x.

Referenced by get_top_weights().

1064 {
1065  uint64_t length = (uint64_t)1 << all->num_bits;
1066 
1067  // get top features for this topic
1068  auto cmp = [](feature left, feature right) { return left.x > right.x; };
1069  std::priority_queue<feature, std::vector<feature>, decltype(cmp)> top_features(cmp);
1070  typename T::iterator iter = weights.begin();
1071 
1072  for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
1073  top_features.push({(&(*iter))[topic], iter.index()});
1074 
1075  for (uint64_t i = top_words_count; i < length; i++, ++iter)
1076  {
1077  weight v = (&(*iter))[topic];
1078  if (v > top_features.top().x)
1079  {
1080  top_features.pop();
1081  top_features.push({v, i});
1082  }
1083  }
1084 
1085  // extract idx and sort descending
1086  output.resize(top_features.size());
1087  for (int i = (int)top_features.size() - 1; i >= 0; i--)
1088  {
1089  output[i] = top_features.top();
1090  top_features.pop();
1091  }
1092 }
float x
Definition: feature_group.h:27
uint32_t num_bits
Definition: global_data.h:398
float weight
int cmp(size_t a, size_t b)
Definition: action_score.h:47

◆ get_top_weights() [2/2]

void get_top_weights ( vw all,
int  top_words_count,
int  topic,
std::vector< feature > &  output 
)

Definition at line 1094 of file lda_core.cc.

References parameters::dense_weights, get_top_weights(), parameters::sparse, parameters::sparse_weights, and vw::weights.

1095 {
1096  if (all->weights.sparse)
1097  get_top_weights(all, top_words_count, topic, output, all->weights.sparse_weights);
1098  else
1099  get_top_weights(all, top_words_count, topic, output, all->weights.dense_weights);
1100 }
parameters weights
Definition: global_data.h:537
dense_parameters dense_weights
void get_top_weights(vw *all, int top_words_count, int topic, std::vector< feature > &output, T &weights)
Definition: lda_core.cc:1063
sparse_parameters sparse_weights

◆ lda_loop()

float lda_loop ( lda l,
v_array< float > &  Elogtheta,
float *  v,
example ec,
float   
)

Definition at line 696 of file lda_core.cc.

References lda::all, average_diff(), v_array< T >::begin(), v_array< T >::clear(), lda::expdigammify(), f, find_cw(), lda::lda_alpha, lda::lda_epsilon, anonymous_namespace{lda_core.cc}::new_gamma, anonymous_namespace{lda_core.cc}::old_gamma, v_array< T >::push_back(), theta_kl(), lda::topics, and vw::weights.

Referenced by learn_batch().

697 {
698  parameters &weights = l.all->weights;
699  new_gamma.clear();
700  old_gamma.clear();
701 
702  for (size_t i = 0; i < l.topics; i++)
703  {
704  new_gamma.push_back(1.f);
705  old_gamma.push_back(0.f);
706  }
707  size_t num_words = 0;
708  for (features &fs : *ec) num_words += fs.size();
709 
710  float xc_w = 0;
711  float score = 0;
712  float doc_length = 0;
713  do
714  {
715  memcpy(v, new_gamma.begin(), sizeof(float) * l.topics);
716  l.expdigammify(*l.all, v);
717 
718  memcpy(old_gamma.begin(), new_gamma.begin(), sizeof(float) * l.topics);
719  memset(new_gamma.begin(), 0, sizeof(float) * l.topics);
720 
721  score = 0;
722  size_t word_count = 0;
723  doc_length = 0;
724  for (features &fs : *ec)
725  {
726  for (features::iterator &f : fs)
727  {
728  float *u_for_w = &(weights[f.index()]) + l.topics + 1;
729  float c_w = find_cw(l, u_for_w, v);
730  xc_w = c_w * f.value();
731  score += -f.value() * log(c_w);
732  size_t max_k = l.topics;
733  for (size_t k = 0; k < max_k; k++, ++u_for_w) new_gamma[k] += xc_w * *u_for_w;
734  word_count++;
735  doc_length += f.value();
736  }
737  }
738  for (size_t k = 0; k < l.topics; k++) new_gamma[k] = new_gamma[k] * v[k] + l.lda_alpha;
739  } while (average_diff(*l.all, old_gamma.begin(), new_gamma.begin()) > l.lda_epsilon);
740 
741  ec->pred.scalars.clear();
742  ec->pred.scalars.resize(l.topics);
743  memcpy(ec->pred.scalars.begin(), new_gamma.begin(), l.topics * sizeof(float));
744  ec->pred.scalars.end() = ec->pred.scalars.begin() + l.topics;
745 
746  score += theta_kl(l, Elogtheta, new_gamma.begin());
747 
748  return score / doc_length;
749 }
parameters weights
Definition: global_data.h:537
vw * all
Definition: lda_core.cc:89
the core definition of a set of features.
T *& begin()
Definition: v_array.h:42
float lda_epsilon
Definition: lda_core.cc:66
float lda_alpha
Definition: lda_core.cc:63
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
float theta_kl(lda &l, v_array< float > &Elogtheta, float *gamma)
Definition: lda_core.cc:656
iterator over values and indicies
size_t topics
Definition: lda_core.cc:62
void expdigammify(vw &all, float *gamma)
Definition: lda_core.cc:601
static float average_diff(vw &all, float *oldgamma, float *newgamma)
Definition: lda_core.cc:639
static float find_cw(lda &l, float *u_for_w, float *v)
Definition: lda_core.cc:679
float f
Definition: cache.cc:40

◆ lda_setup()

LEARNER::base_learner* lda_setup ( options_i options,
vw all 
)

Definition at line 1299 of file lda_core.cc.

References add(), VW::config::options_i::add_and_parse(), vw::add_constant, vw::delete_prediction, delete_scalars(), end_examples(), end_pass(), vw::eta, f, VW::finish_example(), LEARNER::init_learner(), vw::initial_t, vw::lda, learn(), learn_with_metrics(), parser::lp, LEARNER::make_base(), VW::config::make_option(), next_pow2(), no_label::no_label_parser, vw::num_bits, vw::p, predict(), predict_with_metrics(), vw::random_weights, parser::ring_size, save_load(), prediction_type::scalars, LEARNER::learner< T, E >::set_end_examples(), LEARNER::learner< T, E >::set_end_pass(), LEARNER::learner< T, E >::set_finish_example(), LEARNER::learner< T, E >::set_save_load(), parser::strict_parse, parameters::stride_shift(), UINT64_ONE, USE_SIMD, VW::config::options_i::was_supplied(), and vw::weights.

Referenced by parse_reductions().

1300 {
1301  auto ld = scoped_calloc_or_throw<lda>();
1302  option_group_definition new_options("Latent Dirichlet Allocation");
1303  int math_mode;
1304  new_options.add(make_option("lda", ld->topics).keep().help("Run lda with <int> topics"))
1305  .add(make_option("lda_alpha", ld->lda_alpha)
1306  .keep()
1307  .default_value(0.1f)
1308  .help("Prior on sparsity of per-document topic weights"))
1309  .add(make_option("lda_rho", ld->lda_rho)
1310  .keep()
1311  .default_value(0.1f)
1312  .help("Prior on sparsity of topic distributions"))
1313  .add(make_option("lda_D", ld->lda_D).default_value(10000.0f).help("Number of documents"))
1314  .add(make_option("lda_epsilon", ld->lda_epsilon).default_value(0.001f).help("Loop convergence threshold"))
1315  .add(make_option("minibatch", ld->minibatch).default_value(1).help("Minibatch size, for LDA"))
1316  .add(make_option("math-mode", math_mode).default_value(USE_SIMD).help("Math mode: simd, accuracy, fast-approx"))
1317  .add(make_option("metrics", ld->compute_coherence_metrics).help("Compute metrics"));
1318  options.add_and_parse(new_options);
1319 
1320  // Convert from int to corresponding enum value.
1321  ld->mmode = static_cast<lda_math_mode>(math_mode);
1322 
1323  if (!options.was_supplied("lda"))
1324  return nullptr;
1325 
1326  all.lda = (uint32_t)ld->topics;
1328  ld->sorted_features = std::vector<index_feature>();
1329  ld->total_lambda_init = false;
1330  ld->all = &all;
1331  ld->example_t = all.initial_t;
1332  if (ld->compute_coherence_metrics)
1333  {
1334  ld->feature_counts.resize((uint32_t)(UINT64_ONE << all.num_bits));
1335  ld->feature_to_example_map.resize((uint32_t)(UINT64_ONE << all.num_bits));
1336  }
1337 
1338  float temp = ceilf(logf((float)(all.lda * 2 + 1)) / logf(2.f));
1339 
1340  all.weights.stride_shift((size_t)temp);
1341  all.random_weights = true;
1342  all.add_constant = false;
1343 
1344  if (all.eta > 1.)
1345  {
1346  std::cerr << "your learning rate is too high, setting it to 1" << std::endl;
1347  all.eta = std::min(all.eta, 1.f);
1348  }
1349 
1350  size_t minibatch2 = next_pow2(ld->minibatch);
1351  if (minibatch2 > all.p->ring_size)
1352  {
1353  bool previous_strict_parse = all.p->strict_parse;
1354  delete all.p;
1355  all.p = new parser{minibatch2, previous_strict_parse};
1356  }
1357 
1358  ld->v.resize(all.lda * ld->minibatch);
1359 
1360  ld->decay_levels.push_back(0.f);
1361 
1363 
1364  LEARNER::learner<lda, example> &l = init_learner(ld, ld->compute_coherence_metrics ? learn_with_metrics : learn,
1365  ld->compute_coherence_metrics ? predict_with_metrics : predict, UINT64_ONE << all.weights.stride_shift(),
1367 
1372 
1373  return make_base(l);
1374 }
parameters weights
Definition: global_data.h:537
void learn_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)
Definition: lda_core.cc:1017
void(* delete_prediction)(void *)
Definition: global_data.h:485
float initial_t
Definition: global_data.h:530
lda_math_mode
Definition: lda_core.cc:45
bool add_constant
Definition: global_data.h:496
base_learner * make_base(learner< T, E > &base)
Definition: learner.h:462
void predict_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)
Definition: lda_core.cc:1041
virtual void add_and_parse(const option_group_definition &group)=0
void set_save_load(void(*sl)(T &, io_buf &, bool, bool))
Definition: learner.h:257
uint32_t num_bits
Definition: global_data.h:398
bool strict_parse
Definition: parser.h:107
parser * p
Definition: global_data.h:377
uint32_t lda
Definition: global_data.h:508
size_t next_pow2(size_t x)
Definition: lda_core.cc:751
void set_finish_example(void(*f)(vw &all, T &, E &))
Definition: learner.h:307
void finish_example(vw &, lda &, example &)
Definition: lda_core.cc:1280
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
Definition: learner.h:369
void delete_scalars(void *v)
Definition: example.h:37
virtual bool was_supplied(const std::string &key)=0
bool random_weights
Definition: global_data.h:492
const size_t ring_size
Definition: parser.h:80
void learn(lda &l, LEARNER::single_learner &, example &ec)
Definition: lda_core.cc:999
void save_load(lda &l, io_buf &model_file, bool read, bool text)
Definition: lda_core.cc:793
float eta
Definition: global_data.h:531
int add(svm_params &params, svm_example *fec)
Definition: kernel_svm.cc:546
typed_option< T > make_option(std::string name, T &location)
Definition: options.h:80
constexpr uint64_t UINT64_ONE
void set_end_pass(void(*f)(T &))
Definition: learner.h:286
void predict(lda &l, LEARNER::single_learner &base, example &ec)
Definition: lda_core.cc:1040
uint32_t stride_shift()
Definition: parser.h:38
void end_examples(lda &l, T &weights)
Definition: lda_core.cc:1259
void end_pass(lda &l)
Definition: lda_core.cc:1246
float f
Definition: cache.cc:40
void set_end_examples(void(*f)(T &))
Definition: learner.h:295
label_parser lp
Definition: parser.h:102
label_parser no_label_parser
Definition: no_label.cc:41

◆ learn()

void learn ( lda l,
LEARNER::single_learner ,
example ec 
)

Definition at line 999 of file lda_core.cc.

References lda::doc_lengths, lda::examples, f, learn_batch(), lda::minibatch, v_array< T >::push_back(), v_array< T >::size(), and lda::sorted_features.

Referenced by lda_setup(), learn_with_metrics(), and predict().

1000 {
1001  uint32_t num_ex = (uint32_t)l.examples.size();
1002  l.examples.push_back(&ec);
1003  l.doc_lengths.push_back(0);
1004  for (features &fs : ec)
1005  {
1006  for (features::iterator &f : fs)
1007  {
1008  index_feature temp = {num_ex, feature(f.value(), f.index())};
1009  l.sorted_features.push_back(temp);
1010  l.doc_lengths[num_ex] += (int)f.value();
1011  }
1012  }
1013  if (++num_ex == l.minibatch)
1014  learn_batch(l);
1015 }
v_array< example * > examples
Definition: lda_core.cc:73
v_array< int > doc_lengths
Definition: lda_core.cc:75
void learn_batch(lda &l)
Definition: lda_core.cc:864
size_t minibatch
Definition: lda_core.cc:67
the core definition of a set of features.
size_t size() const
Definition: v_array.h:68
void push_back(const T &new_ele)
Definition: v_array.h:107
iterator over values and indicies
std::vector< index_feature > sorted_features
Definition: lda_core.cc:78
float f
Definition: cache.cc:40

◆ learn_batch()

void learn_batch ( lda l)

Definition at line 864 of file lda_core.cc.

References lda::all, vw::audit, v_array< T >::begin(), v_array< T >::clear(), correctedExp, lda::decay_levels, lda::digamma(), lda::digammas, lda::doc_lengths, lda::Elogtheta, v_array< T >::empty(), v_array< T >::end(), vw::eta, lda::example_t, lda::examples, lda::expdigammify_2(), f, index_feature::f, find_cw(), v_array< T >::last(), vw::lda, lda::lda_D, lda_loop(), lda::lda_rho, vw::length(), parameters::mask(), vw::power_t, lda::powf(), GD::print_audit_features(), v_array< T >::push_back(), v_array< T >::resize(), return_example(), vw::sd, v_array< T >::size(), lda::sorted_features, parameters::stride(), shared_data::sum_loss, shared_data::sum_loss_since_last_dump, lda::topics, lda::total_lambda, lda::total_new, lda::v, feature::weight_index, and vw::weights.

Referenced by end_pass(), and learn().

865 {
866  parameters &weights = l.all->weights;
867  if (l.sorted_features.empty()) // FAST-PASS for real "true"
868  {
869  // This can happen when the socket connection is dropped by the client.
870  // If l.sorted_features is empty, then l.sorted_features[0] does not
871  // exist, so we should not try to take its address in the beginning of
872  // the for loops down there. Since it seems that there's not much to
873  // do in this case, we just return.
874  for (size_t d = 0; d < l.examples.size(); d++)
875  {
876  l.examples[d]->pred.scalars.clear();
877  l.examples[d]->pred.scalars.resize(l.topics);
878  memset(l.examples[d]->pred.scalars.begin(), 0, l.topics * sizeof(float));
879  l.examples[d]->pred.scalars.end() = l.examples[d]->pred.scalars.begin() + l.topics;
880 
881  l.examples[d]->pred.scalars.clear();
882  return_example(*l.all, *l.examples[d]);
883  }
884  l.examples.clear();
885  return;
886  }
887 
888  float eta = -1;
889  float minuseta = -1;
890 
891  if (l.total_lambda.empty())
892  {
893  for (size_t k = 0; k < l.all->lda; k++) l.total_lambda.push_back(0.f);
894  // This part does not work with sparse parameters
895  size_t stride = weights.stride();
896  for (size_t i = 0; i <= weights.mask(); i += stride)
897  {
898  weight *w = &(weights[i]);
899  for (size_t k = 0; k < l.all->lda; k++) l.total_lambda[k] += w[k];
900  }
901  }
902 
903  l.example_t++;
904  l.total_new.clear();
905  for (size_t k = 0; k < l.all->lda; k++) l.total_new.push_back(0.f);
906 
907  size_t batch_size = l.examples.size();
908 
909  sort(l.sorted_features.begin(), l.sorted_features.end());
910 
911  eta = l.all->eta * l.powf((float)l.example_t, -l.all->power_t);
912  minuseta = 1.0f - eta;
913  eta *= l.lda_D / batch_size;
914  l.decay_levels.push_back(l.decay_levels.last() + log(minuseta));
915 
916  l.digammas.clear();
917  float additional = (float)(l.all->length()) * l.lda_rho;
918  for (size_t i = 0; i < l.all->lda; i++) l.digammas.push_back(l.digamma(l.total_lambda[i] + additional));
919 
920  uint64_t last_weight_index = -1;
921  for (index_feature *s = &l.sorted_features[0]; s <= &l.sorted_features.back(); s++)
922  {
923  if (last_weight_index == s->f.weight_index)
924  continue;
925  last_weight_index = s->f.weight_index;
926  // float *weights_for_w = &(weights[s->f.weight_index]);
927  float *weights_for_w = &(weights[s->f.weight_index & weights.mask()]);
928  float decay_component =
929  l.decay_levels.end()[-2] - l.decay_levels.end()[(int)(-1 - l.example_t + *(weights_for_w + l.all->lda))];
930  float decay = fmin(1.0f, correctedExp(decay_component));
931  float *u_for_w = weights_for_w + l.all->lda + 1;
932 
933  *(weights_for_w + l.all->lda) = (float)l.example_t;
934  for (size_t k = 0; k < l.all->lda; k++)
935  {
936  weights_for_w[k] *= decay;
937  u_for_w[k] = weights_for_w[k] + l.lda_rho;
938  }
939 
940  l.expdigammify_2(*l.all, u_for_w, l.digammas.begin());
941  }
942 
943  for (size_t d = 0; d < batch_size; d++)
944  {
945  float score = lda_loop(l, l.Elogtheta, &(l.v[d * l.all->lda]), l.examples[d], l.all->power_t);
946  if (l.all->audit)
948  // If the doc is empty, give it loss of 0.
949  if (l.doc_lengths[d] > 0)
950  {
951  l.all->sd->sum_loss -= score;
952  l.all->sd->sum_loss_since_last_dump -= score;
953  }
954  return_example(*l.all, *l.examples[d]);
955  }
956 
957  // -t there's no need to update weights (especially since it's a noop)
958  if (eta != 0)
959  {
960  for (index_feature *s = &l.sorted_features[0]; s <= &l.sorted_features.back();)
961  {
962  index_feature *next = s + 1;
963  while (next <= &l.sorted_features.back() && next->f.weight_index == s->f.weight_index) next++;
964 
965  float *word_weights = &(weights[s->f.weight_index]);
966  for (size_t k = 0; k < l.all->lda; k++, ++word_weights)
967  {
968  float new_value = minuseta * *word_weights;
969  *word_weights = new_value;
970  }
971 
972  for (; s != next; s++)
973  {
974  float *v_s = &(l.v[s->document * l.all->lda]);
975  float *u_for_w = &(weights[s->f.weight_index]) + l.all->lda + 1;
976  float c_w = eta * find_cw(l, u_for_w, v_s) * s->f.x;
977  word_weights = &(weights[s->f.weight_index]);
978  for (size_t k = 0; k < l.all->lda; k++, ++u_for_w, ++word_weights)
979  {
980  float new_value = *u_for_w * v_s[k] * c_w;
981  l.total_new[k] += new_value;
982  *word_weights += new_value;
983  }
984  }
985  }
986 
987  for (size_t k = 0; k < l.all->lda; k++)
988  {
989  l.total_lambda[k] *= minuseta;
990  l.total_lambda[k] += l.total_new[k];
991  }
992  }
993  l.sorted_features.resize(0);
994 
995  l.examples.clear();
996  l.doc_lengths.clear();
997 }
v_array< example * > examples
Definition: lda_core.cc:73
v_array< int > doc_lengths
Definition: lda_core.cc:75
double sum_loss
Definition: global_data.h:145
void resize(size_t length)
Definition: v_array.h:69
size_t length()
Definition: global_data.h:513
#define correctedExp
Definition: correctedMath.h:27
parameters weights
Definition: global_data.h:537
void print_audit_features(vw &all, example &ec)
Definition: gd.cc:331
v_array< float > total_lambda
Definition: lda_core.cc:74
v_array< float > decay_levels
Definition: lda_core.cc:71
float powf(float x, float p)
Definition: lda_core.cc:581
float lda_rho
Definition: lda_core.cc:64
float power_t
Definition: global_data.h:447
uint32_t stride()
vw * all
Definition: lda_core.cc:89
void expdigammify_2(vw &all, float *gamma, float *norm)
Definition: lda_core.cc:620
double example_t
Definition: lda_core.cc:88
T *& begin()
Definition: v_array.h:42
uint64_t weight_index
Definition: feature_group.h:28
size_t size() const
Definition: v_array.h:68
void return_example(vw &all, example &ec)
Definition: lda_core.cc:853
double sum_loss_since_last_dump
Definition: global_data.h:146
uint32_t lda
Definition: global_data.h:508
v_array< float > Elogtheta
Definition: lda_core.cc:70
void push_back(const T &new_ele)
Definition: v_array.h:107
shared_data * sd
Definition: global_data.h:375
float lda_loop(lda &l, v_array< float > &Elogtheta, float *v, example *ec, float)
Definition: lda_core.cc:696
float lda_D
Definition: lda_core.cc:65
void clear()
Definition: v_array.h:88
T *& end()
Definition: v_array.h:43
v_array< float > digammas
Definition: lda_core.cc:76
float eta
Definition: global_data.h:531
float weight
v_array< float > total_new
Definition: lda_core.cc:72
size_t topics
Definition: lda_core.cc:62
float digamma(float x)
Definition: lda_core.cc:540
bool empty() const
Definition: v_array.h:59
std::vector< index_feature > sorted_features
Definition: lda_core.cc:78
static float find_cw(lda &l, float *u_for_w, float *v)
Definition: lda_core.cc:679
bool audit
Definition: global_data.h:486
T last() const
Definition: v_array.h:57
feature f
Definition: lda_core.cc:56
uint64_t mask()
float f
Definition: cache.cc:40
v_array< float > v
Definition: lda_core.cc:77

◆ learn_with_metrics()

void learn_with_metrics ( lda l,
LEARNER::single_learner base,
example ec 
)

Definition at line 1017 of file lda_core.cc.

References lda::all, f, lda::feature_counts, lda::feature_to_example_map, learn(), parameters::mask(), vw::passes_complete, stride_shift(), parameters::stride_shift(), and vw::weights.

Referenced by lda_setup(), and predict_with_metrics().

1018 {
1019  if (l.all->passes_complete == 0)
1020  {
1021  // build feature to example map
1022  uint64_t stride_shift = l.all->weights.stride_shift();
1023  uint64_t weight_mask = l.all->weights.mask();
1024 
1025  for (features &fs : ec)
1026  {
1027  for (features::iterator &f : fs)
1028  {
1029  uint64_t idx = (f.index() & weight_mask) >> stride_shift;
1030  l.feature_counts[idx] += (uint32_t)f.value();
1031  l.feature_to_example_map[idx].push_back(ec.example_counter);
1032  }
1033  }
1034  }
1035 
1036  learn(l, base, ec);
1037 }
parameters weights
Definition: global_data.h:537
uint64_t stride_shift(const stagewise_poly &poly, uint64_t idx)
vw * all
Definition: lda_core.cc:89
the core definition of a set of features.
std::vector< uint32_t > feature_counts
Definition: lda_core.cc:83
std::vector< std::vector< size_t > > feature_to_example_map
Definition: lda_core.cc:84
void learn(lda &l, LEARNER::single_learner &, example &ec)
Definition: lda_core.cc:999
iterator over values and indicies
size_t passes_complete
Definition: global_data.h:452
uint32_t stride_shift()
uint64_t mask()
float f
Definition: cache.cc:40

◆ next_pow2()

size_t next_pow2 ( size_t  x)

Definition at line 751 of file lda_core.cc.

Referenced by lda_setup().

752 {
753  int i = 0;
754  x = x > 0 ? x - 1 : 0;
755  while (x > 0)
756  {
757  x >>= 1;
758  i++;
759  }
760  return ((size_t)1) << i;
761 }

◆ operator>>()

std::istream& operator>> ( std::istream &  in,
lda_math_mode mmode 
)

Definition at line 1282 of file lda_core.cc.

References USE_FAST_APPROX, USE_PRECISE, and USE_SIMD.

1283 {
1284  using namespace boost::program_options;
1285 
1286  std::string token;
1287  in >> token;
1288  if (token == "simd")
1289  mmode = USE_SIMD;
1290  else if (token == "accuracy" || token == "precise")
1291  mmode = USE_PRECISE;
1292  else if (token == "fast-approx" || token == "approx")
1293  mmode = USE_FAST_APPROX;
1294  else
1295  throw boost::program_options::invalid_option_value(token);
1296  return in;
1297 }

◆ predict()

void predict ( lda l,
LEARNER::single_learner base,
example ec 
)

Definition at line 1040 of file lda_core.cc.

References learn().

Referenced by lda_setup().

1040 { learn(l, base, ec); }
void learn(lda &l, LEARNER::single_learner &, example &ec)
Definition: lda_core.cc:999

◆ predict_with_metrics()

void predict_with_metrics ( lda l,
LEARNER::single_learner base,
example ec 
)

Definition at line 1041 of file lda_core.cc.

References learn_with_metrics().

Referenced by lda_setup().

1041 { learn_with_metrics(l, base, ec); }
void learn_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)
Definition: lda_core.cc:1017

◆ return_example()

void return_example ( vw all,
example ec 
)

Definition at line 853 of file lda_core.cc.

References vw::current_pass, shared_data::dump_interval, f, vw::final_prediction_sink, VW::finish_example(), vw::holdout_set_off, example::loss, example::num_features, example::pred, MWT::print_scalars(), shared_data::print_update(), vw::progress_add, vw::progress_arg, vw::quiet, polyprediction::scalars, vw::sd, example::tag, example::test_only, shared_data::update(), example::weight, and shared_data::weighted_examples().

Referenced by learn_batch().

854 {
855  all.sd->update(ec.test_only, true, ec.loss, ec.weight, ec.num_features);
856  for (int f : all.final_prediction_sink) MWT::print_scalars(f, ec.pred.scalars, ec.tag);
857 
858  if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet)
859  all.sd->print_update(
860  all.holdout_set_off, all.current_pass, "none", 0, ec.num_features, all.progress_add, all.progress_arg);
861  VW::finish_example(all, ec);
862 }
v_array< char > tag
Definition: example.h:63
void print_scalars(int f, v_array< float > &scalars, v_array< char > &tag)
Definition: mwt.cc:149
v_array< int > final_prediction_sink
Definition: global_data.h:518
bool quiet
Definition: global_data.h:487
bool holdout_set_off
Definition: global_data.h:499
bool progress_add
Definition: global_data.h:545
shared_data * sd
Definition: global_data.h:375
float progress_arg
Definition: global_data.h:546
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
Definition: global_data.h:225
size_t num_features
Definition: example.h:67
uint64_t current_pass
Definition: global_data.h:396
void finish_example(vw &, example &)
Definition: parser.cc:881
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
Definition: global_data.h:190
float loss
Definition: example.h:70
polyprediction pred
Definition: example.h:60
float weight
Definition: example.h:62
double weighted_examples()
Definition: global_data.h:188
float dump_interval
Definition: global_data.h:147
v_array< float > scalars
Definition: example.h:46
float f
Definition: cache.cc:40
bool test_only
Definition: example.h:76

◆ save_load()

void save_load ( lda l,
io_buf model_file,
bool  read,
bool  text 
)

Definition at line 793 of file lda_core.cc.

References lda::all, bin_text_read_write_fixed(), parameters::dense_weights, v_array< T >::empty(), io_buf::files, vw::initial_t, initialize_regressor(), vw::lda, lda::lda_D, lda::lda_rho, vw::length(), vw::model_file_ver, vw::num_bits, vw::random_weights, dense_parameters::set_default(), sparse_parameters::set_default(), parameters::sparse, parameters::sparse_weights, parameters::stride(), parameters::strided_index(), VERSION_FILE_WITH_HEADER_ID, and vw::weights.

Referenced by lda_setup().

794 {
795  vw &all = *(l.all);
796  uint64_t length = (uint64_t)1 << all.num_bits;
797  if (read)
798  {
800  initial_weights init(all.initial_t, (float)(l.lda_D / all.lda / all.length() * 200), all.random_weights, all.lda,
801  all.weights.stride());
802  if (all.weights.sparse)
804  else
806  }
807  if (!model_file.files.empty())
808  {
809  uint64_t i = 0;
810  std::stringstream msg;
811  size_t brw = 1;
812 
813  do
814  {
815  brw = 0;
816  size_t K = all.lda;
817  if (!read && text)
818  msg << i << " ";
819 
820  if (!read || all.model_file_ver >= VERSION_FILE_WITH_HEADER_ID)
821  brw += bin_text_read_write_fixed(model_file, (char *)&i, sizeof(i), "", read, msg, text);
822  else
823  {
824  // support 32bit build models
825  uint32_t j;
826  brw += bin_text_read_write_fixed(model_file, (char *)&j, sizeof(j), "", read, msg, text);
827  i = j;
828  }
829 
830  if (brw != 0)
831  {
832  weight *w = &(all.weights.strided_index(i));
833  for (uint64_t k = 0; k < K; k++)
834  {
835  weight *v = w + k;
836  if (!read && text)
837  msg << *v + l.lda_rho << " ";
838  brw += bin_text_read_write_fixed(model_file, (char *)v, sizeof(*v), "", read, msg, text);
839  }
840  }
841  if (text)
842  {
843  if (!read)
844  msg << "\n";
845  brw += bin_text_read_write_fixed(model_file, nullptr, 0, "", read, msg, text);
846  }
847  if (!read)
848  ++i;
849  } while ((!read && i < length) || (read && brw > 0));
850  }
851 }
size_t length()
Definition: global_data.h:513
#define VERSION_FILE_WITH_HEADER_ID
Definition: vw_versions.h:19
parameters weights
Definition: global_data.h:537
void initialize_regressor(vw &all, T &weights)
float initial_t
Definition: global_data.h:530
float lda_rho
Definition: lda_core.cc:64
uint32_t stride()
vw * all
Definition: lda_core.cc:89
void set_default(R &info)
uint32_t num_bits
Definition: global_data.h:398
uint32_t lda
Definition: global_data.h:508
VW::version_struct model_file_ver
Definition: global_data.h:419
float lda_D
Definition: lda_core.cc:65
v_array< int > files
Definition: io_buf.h:64
bool random_weights
Definition: global_data.h:492
dense_parameters dense_weights
weight & strided_index(size_t index)
float weight
sparse_parameters sparse_weights
bool empty() const
Definition: v_array.h:59
size_t bin_text_read_write_fixed(io_buf &io, char *data, size_t len, const char *read_message, bool read, std::stringstream &msg, bool text)
Definition: io_buf.h:326

◆ theta_kl()

float theta_kl ( lda l,
v_array< float > &  Elogtheta,
float *  gamma 
)

Definition at line 656 of file lda_core.cc.

References v_array< T >::clear(), lda::digamma(), lda::lda_alpha, lda::lgamma(), v_array< T >::push_back(), and lda::topics.

Referenced by lda_loop().

657 {
658  float gammasum = 0;
659  Elogtheta.clear();
660  for (size_t k = 0; k < l.topics; k++)
661  {
662  Elogtheta.push_back(l.digamma(gamma[k]));
663  gammasum += gamma[k];
664  }
665  float digammasum = l.digamma(gammasum);
666  gammasum = l.lgamma(gammasum);
667  float kl = -(l.topics * l.lgamma(l.lda_alpha));
668  kl += l.lgamma(l.lda_alpha * l.topics) - gammasum;
669  for (size_t k = 0; k < l.topics; k++)
670  {
671  Elogtheta[k] -= digammasum;
672  kl += (l.lda_alpha - gamma[k]) * Elogtheta[k];
673  kl += l.lgamma(gamma[k]);
674  }
675 
676  return kl;
677 }
float lgamma(float x)
Definition: lda_core.cc:561
float lda_alpha
Definition: lda_core.cc:63
void push_back(const T &new_ele)
Definition: v_array.h:107
void clear()
Definition: v_array.h:88
size_t topics
Definition: lda_core.cc:62
float digamma(float x)
Definition: lda_core.cc:540