#include <fstream>
#include <vector>
#include <queue>
#include <algorithm>
#include <numeric>
#include <cmath>
#include "correctedMath.h"
#include "vw_versions.h"
#include "vw.h"
#include "mwt.h"
#include <boost/math/special_functions/digamma.hpp>
#include <boost/math/special_functions/gamma.hpp>
#include <netdb.h>
#include <cstring>
#include <cstdio>
#include <cassert>
#include "no_label.h"
#include "gd.h"
#include "rand48.h"
#include "reductions.h"
#include "array_parameters.h"
#include <boost/version.hpp>

Classes
class	index_feature

struct	lda

struct	initial_weights

class	set_initial_lda_wrapper< T >

struct	word_doc_frequency

struct	feature_pair

Namespaces
	anonymous_namespace{lda_core.cc}

	ldamath

Enumerations
enum	lda_math_mode { USE_SIMD, USE_PRECISE, USE_FAST_APPROX }

Functions
bool	anonymous_namespace{lda_core.cc}::is_aligned16 (void *ptr)

float	ldamath::fastlog2 (float x)

float	ldamath::fastlog (float x)

float	ldamath::fastpow2 (float p)

float	ldamath::fastexp (float p)

float	ldamath::fastpow (float x, float p)

float	ldamath::fastlgamma (float x)

float	ldamath::fastdigamma (float x)

template<typename T , const lda_math_mode mtype>
T	ldamath::lgamma (T)

template<typename T , const lda_math_mode mtype>
T	ldamath::digamma (T)

template<typename T , lda_math_mode mtype>
T	ldamath::exponential (T)

template<typename T , lda_math_mode mtype>
T	ldamath::powf (T, T)

template<>
float	ldamath::lgamma< float, USE_PRECISE > (float x)

template<>
float	ldamath::digamma< float, USE_PRECISE > (float x)

template<>
float	ldamath::exponential< float, USE_PRECISE > (float x)

template<>
float	ldamath::powf< float, USE_PRECISE > (float x, float p)

template<>
float	ldamath::lgamma< float, USE_FAST_APPROX > (float x)

template<>
float	ldamath::digamma< float, USE_FAST_APPROX > (float x)

template<>
float	ldamath::exponential< float, USE_FAST_APPROX > (float x)

template<>
float	ldamath::powf< float, USE_FAST_APPROX > (float x, float p)

template<>
float	ldamath::lgamma< float, USE_SIMD > (float x)

template<>
float	ldamath::digamma< float, USE_SIMD > (float x)

template<>
float	ldamath::exponential< float, USE_SIMD > (float x)

template<>
float	ldamath::powf< float, USE_SIMD > (float x, float p)

template<typename T , const lda_math_mode mtype>
void	ldamath::expdigammify (vw &all, T *gamma, T threshold, T initial)

template<>
void	ldamath::expdigammify< float, USE_SIMD > (vw &all, float *gamma, float threshold, float)

template<typename T , const lda_math_mode mtype>
void	ldamath::expdigammify_2 (vw &all, float gamma, T norm, const T threshold)

template<>
void	ldamath::expdigammify_2< float, USE_SIMD > (vw &all, float gamma, float norm, const float threshold)

static float	average_diff (vw &all, float oldgamma, float newgamma)

float	theta_kl (lda &l, v_array< float > &Elogtheta, float *gamma)

static float	find_cw (lda &l, float u_for_w, float v)

float	lda_loop (lda &l, v_array< float > &Elogtheta, float v, example ec, float)

size_t	next_pow2 (size_t x)

void	save_load (lda &l, io_buf &model_file, bool read, bool text)

void	return_example (vw &all, example &ec)

void	learn_batch (lda &l)

void	learn (lda &l, LEARNER::single_learner &, example &ec)

void	learn_with_metrics (lda &l, LEARNER::single_learner &base, example &ec)

void	predict (lda &l, LEARNER::single_learner &base, example &ec)

void	predict_with_metrics (lda &l, LEARNER::single_learner &base, example &ec)

template<class T >
void	get_top_weights (vw *all, int top_words_count, int topic, std::vector< feature > &output, T &weights)

void	get_top_weights (vw *all, int top_words_count, int topic, std::vector< feature > &output)

template<class T >
void	compute_coherence_metrics (lda &l, T &weights)

void	compute_coherence_metrics (lda &l)

void	end_pass (lda &l)

template<class T >
void	end_examples (lda &l, T &weights)

void	end_examples (lda &l)

void	finish_example (vw &, lda &, example &)

std::istream &	operator>> (std::istream &in, lda_math_mode &mmode)

LEARNER::base_learner *	lda_setup (options_i &options, vw &all)

Variables
v_array< float >	anonymous_namespace{lda_core.cc}::new_gamma = v_init<float>()

v_array< float >	anonymous_namespace{lda_core.cc}::old_gamma = v_init<float>()

Enumeration Type Documentation

◆ lda_math_mode

enum lda_math_mode

Enumerator
USE_SIMD
USE_PRECISE
USE_FAST_APPROX

Definition at line 45 of file lda_core.cc.

 {
   USE_SIMD,
   USE_PRECISE,
   USE_FAST_APPROX
 };

Function Documentation

◆ average_diff()

static float average_diff	(	vw &	all,
		float *	oldgamma,
		float *	newgamma
	)

inlinestatic

Definition at line 639 of file lda_core.cc.

References Search::absdiff(), accumulate(), and vw::lda.

Referenced by lda_loop().

 {
   float sum;
   float normalizer;
 
   // This warps the normal sense of "inner product", but it accomplishes the same
   // thing as the "plain old" for loop. clang does a good job of reducing the
   // common subexpressions.
   sum = std::inner_product(
       oldgamma, oldgamma + all.lda, newgamma, 0.0f, [](float accum, float absdiff) { return accum + absdiff; },
       [](float old_g, float new_g) { return std::abs(old_g - new_g); });
 
   normalizer = std::accumulate(newgamma, newgamma + all.lda, 0.0f);
   return sum / normalizer;
 }

◆ compute_coherence_metrics() [1/2]

template<class T >

void compute_coherence_metrics	(	lda &	l,
		T &	weights
	)

Definition at line 1103 of file lda_core.cc.

References lda::all, ACTION_SCORE::cmp(), f, lda::feature_counts, lda::feature_to_example_map, vw::num_bits, lda::topics, and feature::x.

Referenced by compute_coherence_metrics(), and end_pass().

 {
   uint64_t length = (uint64_t)1 << l.all->num_bits;
 
   std::vector<std::vector<feature_pair>> topics_word_pairs;
   topics_word_pairs.resize(l.topics);
 
   int top_words_count = 10;  // parameterize and check
 
   for (size_t topic = 0; topic < l.topics; topic++)
   {
     // get top features for this topic
     auto cmp = [](feature &left, feature &right) { return left.x > right.x; };
     std::priority_queue<feature, std::vector<feature>, decltype(cmp)> top_features(cmp);
     typename T::iterator iter = weights.begin();
     for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
       top_features.push(feature((&(*iter))[topic], iter.index()));
 
     for (typename T::iterator v = weights.begin(); v != weights.end(); ++v)
       if ((&(*v))[topic] > top_features.top().x)
       {
         top_features.pop();
         top_features.push(feature((&(*v))[topic], v.index()));
       }
 
     // extract idx and sort descending
     std::vector<uint64_t> top_features_idx;
     top_features_idx.resize(top_features.size());
     for (int i = (int)top_features.size() - 1; i >= 0; i--)
     {
       top_features_idx[i] = top_features.top().weight_index;
       top_features.pop();
     }
 
     auto &word_pairs = topics_word_pairs[topic];
     for (size_t i = 0; i < top_features_idx.size(); i++)
       for (size_t j = i + 1; j < top_features_idx.size(); j++)
         word_pairs.emplace_back(top_features_idx[i], top_features_idx[j]);
   }
 
   // compress word pairs and create record for storing frequency
   std::map<uint64_t, std::vector<word_doc_frequency>> coWordsDFSet;
   for (auto &vec : topics_word_pairs)
   {
     for (auto &wp : vec)
     {
       auto f1 = wp.f1;
       auto f2 = wp.f2;
       auto wdf = coWordsDFSet.find(f1);
 
       if (wdf != coWordsDFSet.end())
       {
         // http://stackoverflow.com/questions/5377434/does-stdmapiterator-return-a-copy-of-value-or-a-value-itself
         // if (wdf->second.find(f2) == wdf->second.end())
 
         if (std::find_if(wdf->second.begin(), wdf->second.end(),
                 [&f2](const word_doc_frequency &v) { return v.idx == f2; }) != wdf->second.end())
         {
           wdf->second.push_back({f2, 0});
           // printf(" add %d %d\n", f1, f2);
         }
       }
       else
       {
         std::vector<word_doc_frequency> vec = {{f2, 0}};
         coWordsDFSet.insert(std::make_pair(f1, vec));
         // printf(" insert %d %d\n", f1, f2);
       }
     }
   }
 
   // this.GetWordPairsDocumentFrequency(coWordsDFSet);
   for (auto &pair : coWordsDFSet)
   {
     auto &examples_for_f1 = l.feature_to_example_map[pair.first];
     for (auto &wdf : pair.second)
     {
       auto &examples_for_f2 = l.feature_to_example_map[wdf.idx];
 
       // assumes examples_for_f1 and examples_for_f2 are orderd
       size_t i = 0;
       size_t j = 0;
       while (i < examples_for_f1.size() && j < examples_for_f2.size())
       {
         if (examples_for_f1[i] == examples_for_f2[j])
         {
           wdf.count++;
           i++;
           j++;
         }
         else if (examples_for_f2[j] < examples_for_f1[i])
           j++;
         else
           i++;
       }
     }
   }
 
   float epsilon = 1e-6f;  // TODO
   float avg_coherence = 0;
   for (size_t topic = 0; topic < l.topics; topic++)
   {
     float coherence = 0;
 
     for (auto &pairs : topics_word_pairs[topic])
     {
       auto f1 = pairs.f1;
       if (l.feature_counts[f1] == 0)
         continue;
 
       auto f2 = pairs.f2;
       auto &co_feature = coWordsDFSet[f1];
       auto co_feature_df = std::find_if(
           co_feature.begin(), co_feature.end(), [&f2](const word_doc_frequency &v) { return v.idx == f2; });
 
       if (co_feature_df != co_feature.end())
       {
         // printf("(%d:%d + eps)/(%d:%d)\n", f2, co_feature_df->count, f1, l.feature_counts[f1]);
         coherence += logf((co_feature_df->count + epsilon) / l.feature_counts[f1]);
       }
     }
 
     printf("Topic %3d coherence: %f\n", (int)topic, coherence);
 
     // TODO: expose per topic coherence
 
     // TODO: good vs. bad topics
     avg_coherence += coherence;
   }
 
   avg_coherence /= l.topics;
 
   printf("Avg topic coherence: %f\n", avg_coherence);
 }

◆ compute_coherence_metrics() [2/2]

void compute_coherence_metrics ( lda & l )

Definition at line 1238 of file lda_core.cc.

References lda::all, compute_coherence_metrics(), parameters::dense_weights, parameters::sparse, parameters::sparse_weights, and vw::weights.

 {
   if (l.all->weights.sparse)
     compute_coherence_metrics(l, l.all->weights.sparse_weights);
   else
     compute_coherence_metrics(l, l.all->weights.dense_weights);
 }

◆ end_examples() [1/2]

template<class T >

void end_examples	(	lda &	l,
		T &	weights
	)

Definition at line 1259 of file lda_core.cc.

References lda::all, correctedExp, lda::decay_levels, v_array< T >::end(), lda::example_t, f, v_array< T >::last(), and vw::lda.

Referenced by end_examples(), and lda_setup().

 {
   for (typename T::iterator iter = weights.begin(); iter != weights.end(); ++iter)
   {
     float decay_component =
         l.decay_levels.last() - l.decay_levels.end()[(int)(-1 - l.example_t + (&(*iter))[l.all->lda])];
     float decay = fmin(1.f, correctedExp(decay_component));
 
     weight *wp = &(*iter);
     for (size_t i = 0; i < l.all->lda; ++i) wp[i] *= decay;
   }
 }

◆ end_examples() [2/2]

void end_examples ( lda & l )

Definition at line 1272 of file lda_core.cc.

References lda::all, parameters::dense_weights, end_examples(), parameters::sparse, parameters::sparse_weights, and vw::weights.

 {
   if (l.all->weights.sparse)
     end_examples(l, l.all->weights.sparse_weights);
   else
     end_examples(l, l.all->weights.dense_weights);
 }

◆ end_pass()

void end_pass ( lda & l )

Definition at line 1246 of file lda_core.cc.

References lda::all, lda::compute_coherence_metrics, compute_coherence_metrics(), v_array< T >::empty(), lda::examples, learn_batch(), vw::numpasses, and vw::passes_complete.

Referenced by lda_setup().

 {
   if (!l.examples.empty())
     learn_batch(l);
 
   if (l.compute_coherence_metrics && l.all->passes_complete == l.all->numpasses)
   {
     compute_coherence_metrics(l);
     // FASTPASS return;
   }
 }

◆ find_cw()

static float find_cw	(	lda &	l,
		float *	u_for_w,
		float *	v
	)

inlinestatic

Definition at line 679 of file lda_core.cc.

References lda::topics.

Referenced by lda_loop(), and learn_batch().

 {
   return 1.0f / std::inner_product(u_for_w, u_for_w + l.topics, v, 0.0f);
 }

◆ finish_example()

void finish_example	(	vw &	,
		lda &	,
		example &
	)

Definition at line 1280 of file lda_core.cc.

1280 {}

◆ get_top_weights() [1/2]

template<class T >

void get_top_weights	(	vw *	all,
		int	top_words_count,
		int	topic,
		std::vector< feature > &	output,
		T &	weights
	)

Definition at line 1063 of file lda_core.cc.

References ACTION_SCORE::cmp(), vw::num_bits, and feature::x.

Referenced by get_top_weights().

 {
   uint64_t length = (uint64_t)1 << all->num_bits;
 
   // get top features for this topic
   auto cmp = [](feature left, feature right) { return left.x > right.x; };
   std::priority_queue<feature, std::vector<feature>, decltype(cmp)> top_features(cmp);
   typename T::iterator iter = weights.begin();
 
   for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
     top_features.push({(&(*iter))[topic], iter.index()});
 
   for (uint64_t i = top_words_count; i < length; i++, ++iter)
   {
     weight v = (&(*iter))[topic];
     if (v > top_features.top().x)
     {
       top_features.pop();
       top_features.push({v, i});
     }
   }
 
   // extract idx and sort descending
   output.resize(top_features.size());
   for (int i = (int)top_features.size() - 1; i >= 0; i--)
   {
     output[i] = top_features.top();
     top_features.pop();
   }
 }

◆ get_top_weights() [2/2]

void get_top_weights	(	vw *	all,
		int	top_words_count,
		int	topic,
		std::vector< feature > &	output
	)

Definition at line 1094 of file lda_core.cc.

References parameters::dense_weights, get_top_weights(), parameters::sparse, parameters::sparse_weights, and vw::weights.

 {
   if (all->weights.sparse)
     get_top_weights(all, top_words_count, topic, output, all->weights.sparse_weights);
   else
     get_top_weights(all, top_words_count, topic, output, all->weights.dense_weights);
 }

◆ lda_loop()

float lda_loop	(	lda &	l,
		v_array< float > &	Elogtheta,
		float *	v,
		example *	ec,
		float
	)

Definition at line 696 of file lda_core.cc.

References lda::all, average_diff(), v_array< T >::begin(), v_array< T >::clear(), lda::expdigammify(), f, find_cw(), lda::lda_alpha, lda::lda_epsilon, anonymous_namespace{lda_core.cc}::new_gamma, anonymous_namespace{lda_core.cc}::old_gamma, v_array< T >::push_back(), theta_kl(), lda::topics, and vw::weights.

Referenced by learn_batch().

 {
   parameters &weights = l.all->weights;
   new_gamma.clear();
   old_gamma.clear();
 
   for (size_t i = 0; i < l.topics; i++)
   {
     new_gamma.push_back(1.f);
     old_gamma.push_back(0.f);
   }
   size_t num_words = 0;
   for (features &fs : *ec) num_words += fs.size();
 
   float xc_w = 0;
   float score = 0;
   float doc_length = 0;
   do
   {
     memcpy(v, new_gamma.begin(), sizeof(float) * l.topics);
     l.expdigammify(*l.all, v);
 
     memcpy(old_gamma.begin(), new_gamma.begin(), sizeof(float) * l.topics);
     memset(new_gamma.begin(), 0, sizeof(float) * l.topics);
 
     score = 0;
     size_t word_count = 0;
     doc_length = 0;
     for (features &fs : *ec)
     {
       for (features::iterator &f : fs)
       {
         float *u_for_w = &(weights[f.index()]) + l.topics + 1;
         float c_w = find_cw(l, u_for_w, v);
         xc_w = c_w * f.value();
         score += -f.value() * log(c_w);
         size_t max_k = l.topics;
         for (size_t k = 0; k < max_k; k++, ++u_for_w) new_gamma[k] += xc_w * *u_for_w;
         word_count++;
         doc_length += f.value();
       }
     }
     for (size_t k = 0; k < l.topics; k++) new_gamma[k] = new_gamma[k] * v[k] + l.lda_alpha;
   } while (average_diff(*l.all, old_gamma.begin(), new_gamma.begin()) > l.lda_epsilon);
 
   ec->pred.scalars.clear();
   ec->pred.scalars.resize(l.topics);
   memcpy(ec->pred.scalars.begin(), new_gamma.begin(), l.topics * sizeof(float));
   ec->pred.scalars.end() = ec->pred.scalars.begin() + l.topics;
 
   score += theta_kl(l, Elogtheta, new_gamma.begin());
 
   return score / doc_length;
 }

◆ lda_setup()

LEARNER::base_learner* lda_setup	(	options_i &	options,
		vw &	all
	)

Definition at line 1299 of file lda_core.cc.

References add(), VW::config::options_i::add_and_parse(), vw::add_constant, vw::delete_prediction, delete_scalars(), end_examples(), end_pass(), vw::eta, f, VW::finish_example(), LEARNER::init_learner(), vw::initial_t, vw::lda, learn(), learn_with_metrics(), parser::lp, LEARNER::make_base(), VW::config::make_option(), next_pow2(), no_label::no_label_parser, vw::num_bits, vw::p, predict(), predict_with_metrics(), vw::random_weights, parser::ring_size, save_load(), prediction_type::scalars, LEARNER::learner< T, E >::set_end_examples(), LEARNER::learner< T, E >::set_end_pass(), LEARNER::learner< T, E >::set_finish_example(), LEARNER::learner< T, E >::set_save_load(), parser::strict_parse, parameters::stride_shift(), UINT64_ONE, USE_SIMD, VW::config::options_i::was_supplied(), and vw::weights.

Referenced by parse_reductions().

 {
   auto ld = scoped_calloc_or_throw<lda>();
   option_group_definition new_options("Latent Dirichlet Allocation");
   int math_mode;
   new_options.add(make_option("lda", ld->topics).keep().help("Run lda with <int> topics"))
       .add(make_option("lda_alpha", ld->lda_alpha)
                .keep()
                .default_value(0.1f)
                .help("Prior on sparsity of per-document topic weights"))
       .add(make_option("lda_rho", ld->lda_rho)
                .keep()
                .default_value(0.1f)
                .help("Prior on sparsity of topic distributions"))
       .add(make_option("lda_D", ld->lda_D).default_value(10000.0f).help("Number of documents"))
       .add(make_option("lda_epsilon", ld->lda_epsilon).default_value(0.001f).help("Loop convergence threshold"))
       .add(make_option("minibatch", ld->minibatch).default_value(1).help("Minibatch size, for LDA"))
       .add(make_option("math-mode", math_mode).default_value(USE_SIMD).help("Math mode: simd, accuracy, fast-approx"))
       .add(make_option("metrics", ld->compute_coherence_metrics).help("Compute metrics"));
   options.add_and_parse(new_options);
 
   // Convert from int to corresponding enum value.
   ld->mmode = static_cast<lda_math_mode>(math_mode);
 
   if (!options.was_supplied("lda"))
     return nullptr;
 
   all.lda = (uint32_t)ld->topics;
   all.delete_prediction = delete_scalars;
   ld->sorted_features = std::vector<index_feature>();
   ld->total_lambda_init = false;
   ld->all = &all;
   ld->example_t = all.initial_t;
   if (ld->compute_coherence_metrics)
   {
     ld->feature_counts.resize((uint32_t)(UINT64_ONE << all.num_bits));
     ld->feature_to_example_map.resize((uint32_t)(UINT64_ONE << all.num_bits));
   }
 
   float temp = ceilf(logf((float)(all.lda * 2 + 1)) / logf(2.f));
 
   all.weights.stride_shift((size_t)temp);
   all.random_weights = true;
   all.add_constant = false;
 
   if (all.eta > 1.)
   {
     std::cerr << "your learning rate is too high, setting it to 1" << std::endl;
     all.eta = std::min(all.eta, 1.f);
   }
 
   size_t minibatch2 = next_pow2(ld->minibatch);
   if (minibatch2 > all.p->ring_size)
   {
     bool previous_strict_parse = all.p->strict_parse;
     delete all.p;
     all.p = new parser{minibatch2, previous_strict_parse};
   }
 
   ld->v.resize(all.lda * ld->minibatch);
 
   ld->decay_levels.push_back(0.f);
 
   all.p->lp = no_label::no_label_parser;
 
   LEARNER::learner<lda, example> &l = init_learner(ld, ld->compute_coherence_metrics ? learn_with_metrics : learn,
       ld->compute_coherence_metrics ? predict_with_metrics : predict, UINT64_ONE << all.weights.stride_shift(),
       prediction_type::scalars);
 
   l.set_save_load(save_load);
   l.set_finish_example(finish_example);
   l.set_end_examples(end_examples);
   l.set_end_pass(end_pass);
 
   return make_base(l);
 }

◆ learn()

void learn	(	lda &	l,
		LEARNER::single_learner &	,
		example &	ec
	)

Definition at line 999 of file lda_core.cc.

References lda::doc_lengths, lda::examples, f, learn_batch(), lda::minibatch, v_array< T >::push_back(), v_array< T >::size(), and lda::sorted_features.

Referenced by lda_setup(), learn_with_metrics(), and predict().

 {
   uint32_t num_ex = (uint32_t)l.examples.size();
   l.examples.push_back(&ec);
   l.doc_lengths.push_back(0);
   for (features &fs : ec)
   {
     for (features::iterator &f : fs)
     {
       index_feature temp = {num_ex, feature(f.value(), f.index())};
       l.sorted_features.push_back(temp);
       l.doc_lengths[num_ex] += (int)f.value();
     }
   }
   if (++num_ex == l.minibatch)
     learn_batch(l);
 }

◆ learn_batch()

void learn_batch ( lda & l )

Definition at line 864 of file lda_core.cc.

References lda::all, vw::audit, v_array< T >::begin(), v_array< T >::clear(), correctedExp, lda::decay_levels, lda::digamma(), lda::digammas, lda::doc_lengths, lda::Elogtheta, v_array< T >::empty(), v_array< T >::end(), vw::eta, lda::example_t, lda::examples, lda::expdigammify_2(), f, index_feature::f, find_cw(), v_array< T >::last(), vw::lda, lda::lda_D, lda_loop(), lda::lda_rho, vw::length(), parameters::mask(), vw::power_t, lda::powf(), GD::print_audit_features(), v_array< T >::push_back(), v_array< T >::resize(), return_example(), vw::sd, v_array< T >::size(), lda::sorted_features, parameters::stride(), shared_data::sum_loss, shared_data::sum_loss_since_last_dump, lda::topics, lda::total_lambda, lda::total_new, lda::v, feature::weight_index, and vw::weights.

Referenced by end_pass(), and learn().

 {
   parameters &weights = l.all->weights;
   if (l.sorted_features.empty())  // FAST-PASS for real "true"
   {
     // This can happen when the socket connection is dropped by the client.
     // If l.sorted_features is empty, then l.sorted_features[0] does not
     // exist, so we should not try to take its address in the beginning of
     // the for loops down there. Since it seems that there's not much to
     // do in this case, we just return.
     for (size_t d = 0; d < l.examples.size(); d++)
     {
       l.examples[d]->pred.scalars.clear();
       l.examples[d]->pred.scalars.resize(l.topics);
       memset(l.examples[d]->pred.scalars.begin(), 0, l.topics * sizeof(float));
       l.examples[d]->pred.scalars.end() = l.examples[d]->pred.scalars.begin() + l.topics;
 
       l.examples[d]->pred.scalars.clear();
       return_example(*l.all, *l.examples[d]);
     }
     l.examples.clear();
     return;
   }
 
   float eta = -1;
   float minuseta = -1;
 
   if (l.total_lambda.empty())
   {
     for (size_t k = 0; k < l.all->lda; k++) l.total_lambda.push_back(0.f);
     // This part does not work with sparse parameters
     size_t stride = weights.stride();
     for (size_t i = 0; i <= weights.mask(); i += stride)
     {
       weight *w = &(weights[i]);
       for (size_t k = 0; k < l.all->lda; k++) l.total_lambda[k] += w[k];
     }
   }
 
   l.example_t++;
   l.total_new.clear();
   for (size_t k = 0; k < l.all->lda; k++) l.total_new.push_back(0.f);
 
   size_t batch_size = l.examples.size();
 
   sort(l.sorted_features.begin(), l.sorted_features.end());
 
   eta = l.all->eta * l.powf((float)l.example_t, -l.all->power_t);
   minuseta = 1.0f - eta;
   eta *= l.lda_D / batch_size;
   l.decay_levels.push_back(l.decay_levels.last() + log(minuseta));
 
   l.digammas.clear();
   float additional = (float)(l.all->length()) * l.lda_rho;
   for (size_t i = 0; i < l.all->lda; i++) l.digammas.push_back(l.digamma(l.total_lambda[i] + additional));
 
   uint64_t last_weight_index = -1;
   for (index_feature *s = &l.sorted_features[0]; s <= &l.sorted_features.back(); s++)
   {
     if (last_weight_index == s->f.weight_index)
       continue;
     last_weight_index = s->f.weight_index;
     // float *weights_for_w = &(weights[s->f.weight_index]);
     float *weights_for_w = &(weights[s->f.weight_index & weights.mask()]);
     float decay_component =
         l.decay_levels.end()[-2] - l.decay_levels.end()[(int)(-1 - l.example_t + *(weights_for_w + l.all->lda))];
     float decay = fmin(1.0f, correctedExp(decay_component));
     float *u_for_w = weights_for_w + l.all->lda + 1;
 
     *(weights_for_w + l.all->lda) = (float)l.example_t;
     for (size_t k = 0; k < l.all->lda; k++)
     {
       weights_for_w[k] *= decay;
       u_for_w[k] = weights_for_w[k] + l.lda_rho;
     }
 
     l.expdigammify_2(*l.all, u_for_w, l.digammas.begin());
   }
 
   for (size_t d = 0; d < batch_size; d++)
   {
     float score = lda_loop(l, l.Elogtheta, &(l.v[d * l.all->lda]), l.examples[d], l.all->power_t);
     if (l.all->audit)
       GD::print_audit_features(*l.all, *l.examples[d]);
     // If the doc is empty, give it loss of 0.
     if (l.doc_lengths[d] > 0)
     {
       l.all->sd->sum_loss -= score;
       l.all->sd->sum_loss_since_last_dump -= score;
     }
     return_example(*l.all, *l.examples[d]);
   }
 
   // -t there's no need to update weights (especially since it's a noop)
   if (eta != 0)
   {
     for (index_feature *s = &l.sorted_features[0]; s <= &l.sorted_features.back();)
     {
       index_feature *next = s + 1;
       while (next <= &l.sorted_features.back() && next->f.weight_index == s->f.weight_index) next++;
 
       float *word_weights = &(weights[s->f.weight_index]);
       for (size_t k = 0; k < l.all->lda; k++, ++word_weights)
       {
         float new_value = minuseta * *word_weights;
         *word_weights = new_value;
       }
 
       for (; s != next; s++)
       {
         float *v_s = &(l.v[s->document * l.all->lda]);
         float *u_for_w = &(weights[s->f.weight_index]) + l.all->lda + 1;
         float c_w = eta * find_cw(l, u_for_w, v_s) * s->f.x;
         word_weights = &(weights[s->f.weight_index]);
         for (size_t k = 0; k < l.all->lda; k++, ++u_for_w, ++word_weights)
         {
           float new_value = *u_for_w * v_s[k] * c_w;
           l.total_new[k] += new_value;
           *word_weights += new_value;
         }
       }
     }
 
     for (size_t k = 0; k < l.all->lda; k++)
     {
       l.total_lambda[k] *= minuseta;
       l.total_lambda[k] += l.total_new[k];
     }
   }
   l.sorted_features.resize(0);
 
   l.examples.clear();
   l.doc_lengths.clear();
 }

◆ learn_with_metrics()

void learn_with_metrics	(	lda &	l,
		LEARNER::single_learner &	base,
		example &	ec
	)

Definition at line 1017 of file lda_core.cc.

References lda::all, f, lda::feature_counts, lda::feature_to_example_map, learn(), parameters::mask(), vw::passes_complete, stride_shift(), parameters::stride_shift(), and vw::weights.

Referenced by lda_setup(), and predict_with_metrics().

 {
   if (l.all->passes_complete == 0)
   {
     // build feature to example map
     uint64_t stride_shift = l.all->weights.stride_shift();
     uint64_t weight_mask = l.all->weights.mask();
 
     for (features &fs : ec)
     {
       for (features::iterator &f : fs)
       {
         uint64_t idx = (f.index() & weight_mask) >> stride_shift;
         l.feature_counts[idx] += (uint32_t)f.value();
         l.feature_to_example_map[idx].push_back(ec.example_counter);
       }
     }
   }
 
   learn(l, base, ec);
 }

◆ next_pow2()

size_t next_pow2 ( size_t x )

Definition at line 751 of file lda_core.cc.

Referenced by lda_setup().

 {
   int i = 0;
   x = x > 0 ? x - 1 : 0;
   while (x > 0)
   {
     x >>= 1;
     i++;
   }
   return ((size_t)1) << i;
 }

◆ operator>>()

std::istream& operator>>	(	std::istream &	in,
		lda_math_mode &	mmode
	)

Definition at line 1282 of file lda_core.cc.

References USE_FAST_APPROX, USE_PRECISE, and USE_SIMD.

 {
   using namespace boost::program_options;
 
   std::string token;
   in >> token;
   if (token == "simd")
     mmode = USE_SIMD;
   else if (token == "accuracy" || token == "precise")
     mmode = USE_PRECISE;
   else if (token == "fast-approx" || token == "approx")
     mmode = USE_FAST_APPROX;
   else
     throw boost::program_options::invalid_option_value(token);
   return in;
 }

◆ predict()

void predict	(	lda &	l,
		LEARNER::single_learner &	base,
		example &	ec
	)

Definition at line 1040 of file lda_core.cc.

References learn().

Referenced by lda_setup().

1040 { learn(l, base, ec); }

learn

void learn(lda &l, LEARNER::single_learner &, example &ec)

Definition: lda_core.cc:999

◆ predict_with_metrics()

void predict_with_metrics	(	lda &	l,
		LEARNER::single_learner &	base,
		example &	ec
	)

Definition at line 1041 of file lda_core.cc.

References learn_with_metrics().

Referenced by lda_setup().

1041 { learn_with_metrics(l, base, ec); }

learn_with_metrics

void learn_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)

Definition: lda_core.cc:1017

◆ return_example()

void return_example	(	vw &	all,
		example &	ec
	)

Definition at line 853 of file lda_core.cc.

References vw::current_pass, shared_data::dump_interval, f, vw::final_prediction_sink, VW::finish_example(), vw::holdout_set_off, example::loss, example::num_features, example::pred, MWT::print_scalars(), shared_data::print_update(), vw::progress_add, vw::progress_arg, vw::quiet, polyprediction::scalars, vw::sd, example::tag, example::test_only, shared_data::update(), example::weight, and shared_data::weighted_examples().

Referenced by learn_batch().

 {
   all.sd->update(ec.test_only, true, ec.loss, ec.weight, ec.num_features);
   for (int f : all.final_prediction_sink) MWT::print_scalars(f, ec.pred.scalars, ec.tag);
 
   if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet)
     all.sd->print_update(
         all.holdout_set_off, all.current_pass, "none", 0, ec.num_features, all.progress_add, all.progress_arg);
   VW::finish_example(all, ec);
 }

◆ save_load()

void save_load	(	lda &	l,
		io_buf &	model_file,
		bool	read,
		bool	text
	)

Definition at line 793 of file lda_core.cc.

References lda::all, bin_text_read_write_fixed(), parameters::dense_weights, v_array< T >::empty(), io_buf::files, vw::initial_t, initialize_regressor(), vw::lda, lda::lda_D, lda::lda_rho, vw::length(), vw::model_file_ver, vw::num_bits, vw::random_weights, dense_parameters::set_default(), sparse_parameters::set_default(), parameters::sparse, parameters::sparse_weights, parameters::stride(), parameters::strided_index(), VERSION_FILE_WITH_HEADER_ID, and vw::weights.

Referenced by lda_setup().

 {
   vw &all = *(l.all);
   uint64_t length = (uint64_t)1 << all.num_bits;
   if (read)
   {
     initialize_regressor(all);
     initial_weights init(all.initial_t, (float)(l.lda_D / all.lda / all.length() * 200), all.random_weights, all.lda,
         all.weights.stride());
     if (all.weights.sparse)
       all.weights.sparse_weights.set_default<initial_weights, set_initial_lda_wrapper<sparse_parameters>>(init);
     else
       all.weights.dense_weights.set_default<initial_weights, set_initial_lda_wrapper<dense_parameters>>(init);
   }
   if (!model_file.files.empty())
   {
     uint64_t i = 0;
     std::stringstream msg;
     size_t brw = 1;
 
     do
     {
       brw = 0;
       size_t K = all.lda;
       if (!read && text)
         msg << i << " ";
 
       if (!read || all.model_file_ver >= VERSION_FILE_WITH_HEADER_ID)
         brw += bin_text_read_write_fixed(model_file, (char *)&i, sizeof(i), "", read, msg, text);
       else
       {
         // support 32bit build models
         uint32_t j;
         brw += bin_text_read_write_fixed(model_file, (char *)&j, sizeof(j), "", read, msg, text);
         i = j;
       }
 
       if (brw != 0)
       {
         weight *w = &(all.weights.strided_index(i));
         for (uint64_t k = 0; k < K; k++)
         {
           weight *v = w + k;
           if (!read && text)
             msg << *v + l.lda_rho << " ";
           brw += bin_text_read_write_fixed(model_file, (char *)v, sizeof(*v), "", read, msg, text);
         }
       }
       if (text)
       {
         if (!read)
           msg << "\n";
         brw += bin_text_read_write_fixed(model_file, nullptr, 0, "", read, msg, text);
       }
       if (!read)
         ++i;
     } while ((!read && i < length) || (read && brw > 0));
   }
 }

◆ theta_kl()

float theta_kl	(	lda &	l,
		v_array< float > &	Elogtheta,
		float *	gamma
	)

Definition at line 656 of file lda_core.cc.

References v_array< T >::clear(), lda::digamma(), lda::lda_alpha, lda::lgamma(), v_array< T >::push_back(), and lda::topics.

Referenced by lda_loop().

 {
   float gammasum = 0;
   Elogtheta.clear();
   for (size_t k = 0; k < l.topics; k++)
   {
     Elogtheta.push_back(l.digamma(gamma[k]));
     gammasum += gamma[k];
   }
   float digammasum = l.digamma(gammasum);
   gammasum = l.lgamma(gammasum);
   float kl = -(l.topics * l.lgamma(l.lda_alpha));
   kl += l.lgamma(l.lda_alpha * l.topics) - gammasum;
   for (size_t k = 0; k < l.topics; k++)
   {
     Elogtheta[k] -= digammasum;
     kl += (l.lda_alpha - gamma[k]) * Elogtheta[k];
     kl += l.lgamma(gamma[k]);
   }
 
   return kl;
 }

Classes

Namespaces

Enumerations

Functions

Variables

Enumeration Type Documentation

◆ lda_math_mode

Function Documentation

◆ average_diff()

◆ compute_coherence_metrics() [1/2]

◆ compute_coherence_metrics() [2/2]

◆ end_examples() [1/2]

◆ end_examples() [2/2]

◆ end_pass()

◆ find_cw()

◆ finish_example()

◆ get_top_weights() [1/2]

◆ get_top_weights() [2/2]

◆ lda_loop()

◆ lda_setup()

◆ learn()

◆ learn_batch()

◆ learn_with_metrics()

◆ next_pow2()

◆ operator>>()

◆ predict()

◆ predict_with_metrics()

◆ return_example()

◆ save_load()

◆ theta_kl()