7 #pragma warning(disable : 4996) // generated by inner_product use 19 #include <boost/math/special_functions/digamma.hpp> 20 #include <boost/math/special_functions/gamma.hpp> 37 #include <boost/version.hpp> 39 #if BOOST_VERSION >= 105600 40 #include <boost/align/is_aligned.hpp> 91 static constexpr
float underflow_threshold = 1.0e-10
f;
93 inline float lgamma(
float x);
94 inline float powf(
float x,
float p);
117 #if BOOST_VERSION >= 105600 118 return boost::alignment::is_aligned(16, ptr);
120 return ((reinterpret_cast<uintptr_t>(ptr) & 0x0f) == 0);
130 memcpy(&mx, &x,
sizeof(uint32_t));
131 mx = (mx & 0x007FFFFF) | (0x7e << 23);
134 memcpy(&mx_f, &mx,
sizeof(
float));
137 memcpy(&vx, &x,
sizeof(uint32_t));
139 float y =
static_cast<float>(vx);
140 y *= 1.0f / (float)(1 << 23);
142 return y - 124.22544637f - 1.498030302f * mx_f - 1.72587999f / (0.3520887068f + mx_f);
149 float offset = (p < 0) * 1.0
f;
150 float clipp = (p < -126.0) ? -126.0f : p;
152 float z = clipp - w + offset;
153 uint32_t approx = (uint32_t)((1 << 23) * (clipp + 121.2740838f + 27.7280233f / (4.84252568f - z) - 1.49012907
f * z));
156 memcpy(&v, &approx,
sizeof(uint32_t));
166 float logterm =
fastlog(x * (1.0
f + x) * (2.0
f + x));
167 float xp3 = 3.0f + x;
169 return -2.081061466f - x + 0.0833333f / xp3 - logterm + (2.5f + x) *
fastlog(xp3);
174 float twopx = 2.0f + x;
175 float logterm =
fastlog(twopx);
177 return -(1.0f + 2.0f * x) / (x * (1.0
f + x)) - (13.0
f + 6.0
f * x) / (12.0f * twopx * twopx) + logterm;
180 #if !defined(VW_NO_INLINE_SIMD) 182 #if defined(__SSE2__) || defined(__SSE3__) || defined(__SSE4_1__) 185 #if defined(__SSE2__) 186 #include <emmintrin.h> 188 #if defined(__SSE3__) 189 #include <tmmintrin.h> 191 #if defined(__SSE4_1__) 192 #include <smmintrin.h> 195 #define HAVE_SIMD_MATHMODE 198 typedef __m128i v4si;
200 inline v4sf v4si_to_v4sf(v4si x) {
return _mm_cvtepi32_ps(x); }
202 inline v4si v4sf_to_v4si(v4sf x) {
return _mm_cvttps_epi32(x); }
205 template <const
int idx>
206 float v4sf_index(
const v4sf x)
208 #if defined(__SSE4_1__) 212 val = _mm_extract_ps(x, idx);
215 memcpy(&ret, &val,
sizeof(uint32_t));
218 return _mm_cvtss_f32(_mm_shuffle_ps(x, x, _MM_SHUFFLE(idx, idx, idx, idx)));
224 float v4sf_index<0>(
const v4sf x)
226 return _mm_cvtss_f32(x);
229 inline v4sf v4sfl(
const float x) {
return _mm_set1_ps(x); }
231 inline v4si v4sil(
const uint32_t x) {
return _mm_set1_epi32(x); }
235 inline __m128 operator+(
const __m128
a,
const __m128 b) {
return _mm_add_ps(a, b); }
237 inline __m128 operator-(
const __m128 a,
const __m128 b) {
return _mm_sub_ps(a, b); }
239 inline __m128 operator*(
const __m128 a,
const __m128 b) {
return _mm_mul_ps(a, b); }
241 inline __m128 operator/(
const __m128 a,
const __m128 b) {
return _mm_div_ps(a, b); }
245 inline v4sf vfastpow2(
const v4sf p)
247 v4sf ltzero = _mm_cmplt_ps(p, v4sfl(0.0
f));
248 v4sf offset = _mm_and_ps(ltzero, v4sfl(1.0
f));
249 v4sf lt126 = _mm_cmplt_ps(p, v4sfl(-126.0
f));
250 v4sf clipp = _mm_andnot_ps(lt126, p) + _mm_and_ps(lt126, v4sfl(-126.0
f));
251 v4si w = v4sf_to_v4si(clipp);
252 v4sf z = clipp - v4si_to_v4sf(w) + offset;
254 const v4sf c_121_2740838 = v4sfl(121.2740838
f);
255 const v4sf c_27_7280233 = v4sfl(27.7280233
f);
256 const v4sf c_4_84252568 = v4sfl(4.84252568
f);
257 const v4sf c_1_49012907 = v4sfl(1.49012907
f);
259 v4sf v = v4sfl(1 << 23) * (clipp + c_121_2740838 + c_27_7280233 / (c_4_84252568 - z) - c_1_49012907 * z);
261 return _mm_castsi128_ps(v4sf_to_v4si(v));
264 inline v4sf vfastexp(
const v4sf p)
266 const v4sf c_invlog_2 = v4sfl(1.442695040
f);
268 return vfastpow2(c_invlog_2 * p);
271 inline v4sf vfastlog2(v4sf x)
273 v4si vx_i = _mm_castps_si128(x);
274 v4sf mx_f = _mm_castsi128_ps(_mm_or_si128(_mm_and_si128(vx_i, v4sil(0x007FFFFF)), v4sil(0x3f000000)));
275 v4sf y = v4si_to_v4sf(vx_i) * v4sfl(1.1920928955078125e-7
f);
277 const v4sf c_124_22551499 = v4sfl(124.22551499
f);
278 const v4sf c_1_498030302 = v4sfl(1.498030302
f);
279 const v4sf c_1_725877999 = v4sfl(1.72587999
f);
280 const v4sf c_0_3520087068 = v4sfl(0.3520887068
f);
282 return y - c_124_22551499 - c_1_498030302 * mx_f - c_1_725877999 / (c_0_3520087068 + mx_f);
285 inline v4sf vfastlog(v4sf x)
287 const v4sf c_0_69314718 = v4sfl(0.69314718
f);
289 return c_0_69314718 * vfastlog2(x);
292 inline v4sf vfastdigamma(v4sf x)
294 v4sf twopx = v4sfl(2.0
f) + x;
295 v4sf logterm = vfastlog(twopx);
297 return (v4sfl(-48.0
f) + x * (v4sfl(-157.0
f) + x * (v4sfl(-127.0
f) - v4sfl(30.0
f) * x))) /
298 (v4sfl(12.0
f) * x * (v4sfl(1.0
f) + x) * twopx * twopx) +
302 void vexpdigammify(
vw &all,
float *gamma,
const float underflow_threshold)
304 float extra_sum = 0.0f;
305 v4sf sum = v4sfl(0.0
f);
307 const float *fpend = gamma + all.
lda;
320 v4sf arg = _mm_load_ps(fp);
322 arg = vfastdigamma(arg);
323 _mm_store_ps(fp, arg);
326 for (; fp < fpend; ++fp)
332 #if defined(__SSE3__) || defined(__SSE4_1__) 334 sum = _mm_hadd_ps(sum, sum);
335 sum = _mm_hadd_ps(sum, sum);
336 extra_sum += v4sf_index<0>(sum);
338 extra_sum += v4sf_index<0>(sum) + v4sf_index<1>(sum) + v4sf_index<2>(sum) + v4sf_index<3>(sum);
342 sum = v4sfl(extra_sum);
346 *fp = fmax(underflow_threshold,
fastexp(*fp - extra_sum));
351 v4sf arg = _mm_load_ps(fp);
354 arg = _mm_max_ps(v4sfl(underflow_threshold), arg);
355 _mm_store_ps(fp, arg);
358 for (; fp < fpend; ++fp)
360 *fp = fmax(underflow_threshold,
fastexp(*fp - extra_sum));
364 void vexpdigammify_2(
vw &all,
float *gamma,
const float *norm,
const float underflow_threshold)
368 const float *fpend = gamma + all.
lda;
370 for (np = norm; fp < fpend && !
is_aligned16(fp); ++fp, ++np)
373 for (;
is_aligned16(fp) && fp + 4 < fpend; fp += 4, np += 4)
375 v4sf arg = _mm_load_ps(fp);
376 arg = vfastdigamma(arg);
377 v4sf vnorm = _mm_loadu_ps(np);
380 arg = _mm_max_ps(v4sfl(underflow_threshold), arg);
381 _mm_store_ps(fp, arg);
384 for (; fp < fpend; ++fp, ++np) *fp = fmax(underflow_threshold,
fastexp(
fastdigamma(*fp) - *np));
392 #endif // !VW_NO_INLINE_SIMD 406 template <
typename T, const lda_math_mode mtype>
409 BOOST_STATIC_ASSERT_MSG(
true,
"ldamath::lgamma is not defined for this type and math mode.");
413 template <
typename T, const lda_math_mode mtype>
416 BOOST_STATIC_ASSERT_MSG(
true,
"ldamath::digamma is not defined for this type and math mode.");
420 template <
typename T, lda_math_mode mtype>
423 BOOST_STATIC_ASSERT_MSG(
true,
"ldamath::exponential is not defined for this type and math mode.");
427 template <
typename T, lda_math_mode mtype>
430 BOOST_STATIC_ASSERT_MSG(
true,
"ldamath::powf is not defined for this type and math mode.");
453 return std::pow(x, p);
502 template <
typename T, const lda_math_mode mtype>
507 std::transform(gamma, gamma + all.
lda, gamma,
508 [sum, threshold](T g) { return fmax(threshold, exponential<T, mtype>(digamma<T, mtype>(g) - sum)); });
513 #if defined(HAVE_SIMD_MATHMODE) 514 vexpdigammify(all, gamma, threshold);
517 expdigammify<float, USE_FAST_APPROX>(all, gamma, threshold, 0.0);
521 template <
typename T, const lda_math_mode mtype>
524 std::transform(gamma, gamma + all.
lda, norm, gamma,
525 [threshold](
float g,
float n) { return fmax(threshold, exponential<T, mtype>(digamma<T, mtype>(g) - n)); });
530 #if defined(HAVE_SIMD_MATHMODE) 531 vexpdigammify_2(all, gamma, norm, threshold);
534 expdigammify_2<float, USE_FAST_APPROX>(all, gamma, norm, threshold);
555 std::cerr <<
"lda::digamma: Trampled or invalid math mode, aborting" << std::endl;
575 std::cerr <<
"lda::lgamma: Trampled or invalid math mode, aborting" << std::endl;
595 std::cerr <<
"lda::powf: Trampled or invalid math mode, aborting" << std::endl;
606 ldamath::expdigammify<float, USE_FAST_APPROX>(all, gamma, underflow_threshold, 0.0f);
609 ldamath::expdigammify<float, USE_PRECISE>(all, gamma, underflow_threshold, 0.0f);
615 std::cerr <<
"lda::expdigammify: Trampled or invalid math mode, aborting" << std::endl;
625 ldamath::expdigammify_2<float, USE_FAST_APPROX>(all, gamma, norm, underflow_threshold);
628 ldamath::expdigammify_2<float, USE_PRECISE>(all, gamma, norm, underflow_threshold);
634 std::cerr <<
"lda::expdigammify_2: Trampled or invalid math mode, aborting" << std::endl;
647 sum = std::inner_product(
648 oldgamma, oldgamma + all.
lda, newgamma, 0.0f, [](
float accum,
float absdiff) { return accum + absdiff; },
649 [](
float old_g,
float new_g) { return std::abs(old_g - new_g); });
652 return sum / normalizer;
660 for (
size_t k = 0; k < l.
topics; k++)
663 gammasum += gamma[k];
665 float digammasum = l.
digamma(gammasum);
666 gammasum = l.
lgamma(gammasum);
669 for (
size_t k = 0; k < l.
topics; k++)
671 Elogtheta[k] -= digammasum;
672 kl += (l.
lda_alpha - gamma[k]) * Elogtheta[k];
679 static inline float find_cw(
lda &l,
float *u_for_w,
float *v)
681 return 1.0f / std::inner_product(u_for_w, u_for_w + l.
topics, v, 0.0f);
702 for (
size_t i = 0; i < l.
topics; i++)
707 size_t num_words = 0;
708 for (
features &fs : *ec) num_words += fs.size();
712 float doc_length = 0;
722 size_t word_count = 0;
728 float *u_for_w = &(weights[
f.index()]) + l.
topics + 1;
729 float c_w =
find_cw(l, u_for_w, v);
730 xc_w = c_w *
f.value();
731 score += -
f.value() * log(c_w);
733 for (
size_t k = 0; k < max_k; k++, ++u_for_w)
new_gamma[k] += xc_w * *u_for_w;
735 doc_length +=
f.value();
741 ec->pred.scalars.clear();
742 ec->pred.scalars.resize(l.
topics);
744 ec->pred.scalars.end() = ec->pred.scalars.begin() + l.
topics;
748 return score / doc_length;
754 x = x > 0 ? x - 1 : 0;
760 return ((
size_t)1) << i;
771 : _initial(initial), _initial_random(initial_random), _random(random), _lda(lda), _stride(stride)
787 for (
size_t i = 0; i != lda; ++i, ++index) pw[i] = (
float)(-log(
merand48(index) + 1e-6) + 1.0f) * initial_random;
796 uint64_t length = (uint64_t)1 << all.
num_bits;
810 std::stringstream msg;
833 for (uint64_t k = 0; k < K; k++)
849 }
while ((!read && i < length) || (read && brw > 0));
895 size_t stride = weights.
stride();
896 for (
size_t i = 0; i <= weights.
mask(); i += stride)
898 weight *w = &(weights[i]);
912 minuseta = 1.0f - eta;
913 eta *= l.
lda_D / batch_size;
920 uint64_t last_weight_index = -1;
923 if (last_weight_index == s->f.weight_index)
925 last_weight_index = s->f.weight_index;
927 float *weights_for_w = &(weights[s->f.weight_index & weights.
mask()]);
928 float decay_component =
931 float *u_for_w = weights_for_w + l.
all->
lda + 1;
934 for (
size_t k = 0; k < l.
all->
lda; k++)
936 weights_for_w[k] *= decay;
937 u_for_w[k] = weights_for_w[k] + l.
lda_rho;
943 for (
size_t d = 0; d < batch_size; d++)
965 float *word_weights = &(weights[s->f.weight_index]);
966 for (
size_t k = 0; k < l.
all->
lda; k++, ++word_weights)
968 float new_value = minuseta * *word_weights;
969 *word_weights = new_value;
972 for (; s != next; s++)
974 float *v_s = &(l.
v[s->document * l.
all->
lda]);
975 float *u_for_w = &(weights[s->f.weight_index]) + l.
all->
lda + 1;
976 float c_w = eta *
find_cw(l, u_for_w, v_s) * s->f.x;
977 word_weights = &(weights[s->f.weight_index]);
978 for (
size_t k = 0; k < l.
all->
lda; k++, ++u_for_w, ++word_weights)
980 float new_value = *u_for_w * v_s[k] * c_w;
982 *word_weights += new_value;
987 for (
size_t k = 0; k < l.
all->
lda; k++)
1029 uint64_t idx = (
f.index() & weight_mask) >> stride_shift;
1063 void get_top_weights(
vw *all,
int top_words_count,
int topic, std::vector<feature> &output, T &weights)
1065 uint64_t length = (uint64_t)1 << all->
num_bits;
1069 std::priority_queue<feature, std::vector<feature>, decltype(
cmp)> top_features(
cmp);
1070 typename T::iterator iter = weights.begin();
1072 for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
1073 top_features.push({(&(*iter))[topic], iter.index()});
1075 for (uint64_t i = top_words_count; i < length; i++, ++iter)
1077 weight v = (&(*iter))[topic];
1078 if (v > top_features.top().x)
1081 top_features.push({v, i});
1086 output.resize(top_features.size());
1087 for (
int i = (
int)top_features.size() - 1; i >= 0; i--)
1089 output[i] = top_features.top();
1107 std::vector<std::vector<feature_pair>> topics_word_pairs;
1108 topics_word_pairs.resize(l.
topics);
1110 int top_words_count = 10;
1112 for (
size_t topic = 0; topic < l.
topics; topic++)
1116 std::priority_queue<feature, std::vector<feature>, decltype(
cmp)> top_features(
cmp);
1117 typename T::iterator iter = weights.begin();
1118 for (uint64_t i = 0; i < std::min(static_cast<uint64_t>(top_words_count), length); i++, ++iter)
1119 top_features.push(
feature((&(*iter))[topic], iter.index()));
1121 for (
typename T::iterator v = weights.begin(); v != weights.end(); ++v)
1122 if ((&(*v))[topic] > top_features.top().x)
1125 top_features.push(
feature((&(*v))[topic], v.index()));
1129 std::vector<uint64_t> top_features_idx;
1130 top_features_idx.resize(top_features.size());
1131 for (
int i = (
int)top_features.size() - 1; i >= 0; i--)
1133 top_features_idx[i] = top_features.top().weight_index;
1137 auto &word_pairs = topics_word_pairs[topic];
1138 for (
size_t i = 0; i < top_features_idx.size(); i++)
1139 for (
size_t j = i + 1; j < top_features_idx.size(); j++)
1140 word_pairs.emplace_back(top_features_idx[i], top_features_idx[j]);
1144 std::map<uint64_t, std::vector<word_doc_frequency>> coWordsDFSet;
1145 for (
auto &vec : topics_word_pairs)
1147 for (
auto &wp : vec)
1151 auto wdf = coWordsDFSet.find(f1);
1153 if (wdf != coWordsDFSet.end())
1158 if (std::find_if(wdf->second.begin(), wdf->second.end(),
1161 wdf->second.push_back({f2, 0});
1167 std::vector<word_doc_frequency> vec = {{f2, 0}};
1168 coWordsDFSet.insert(std::make_pair(f1, vec));
1175 for (
auto &pair : coWordsDFSet)
1178 for (
auto &wdf : pair.second)
1185 while (i < examples_for_f1.size() && j < examples_for_f2.size())
1187 if (examples_for_f1[i] == examples_for_f2[j])
1193 else if (examples_for_f2[j] < examples_for_f1[i])
1201 float epsilon = 1e-6
f;
1202 float avg_coherence = 0;
1203 for (
size_t topic = 0; topic < l.
topics; topic++)
1205 float coherence = 0;
1207 for (
auto &pairs : topics_word_pairs[topic])
1214 auto &co_feature = coWordsDFSet[f1];
1215 auto co_feature_df = std::find_if(
1216 co_feature.begin(), co_feature.end(), [&f2](
const word_doc_frequency &v) {
return v.idx == f2; });
1218 if (co_feature_df != co_feature.end())
1221 coherence += logf((co_feature_df->count + epsilon) / l.
feature_counts[f1]);
1225 printf(
"Topic %3d coherence: %f\n", (
int)topic, coherence);
1230 avg_coherence += coherence;
1233 avg_coherence /= l.
topics;
1235 printf(
"Avg topic coherence: %f\n", avg_coherence);
1261 for (
typename T::iterator iter = weights.begin(); iter != weights.end(); ++iter)
1263 float decay_component =
1268 for (
size_t i = 0; i < l.
all->
lda; ++i) wp[i] *= decay;
1284 using namespace boost::program_options;
1288 if (token ==
"simd")
1290 else if (token ==
"accuracy" || token ==
"precise")
1292 else if (token ==
"fast-approx" || token ==
"approx")
1295 throw boost::program_options::invalid_option_value(token);
1301 auto ld = scoped_calloc_or_throw<lda>();
1304 new_options.add(
make_option(
"lda", ld->topics).keep().help(
"Run lda with <int> topics"))
1307 .default_value(0.1
f)
1308 .help(
"Prior on sparsity of per-document topic weights"))
1311 .default_value(0.1
f)
1312 .help(
"Prior on sparsity of topic distributions"))
1313 .
add(
make_option(
"lda_D", ld->lda_D).default_value(10000.0
f).help(
"Number of documents"))
1314 .
add(
make_option(
"lda_epsilon", ld->lda_epsilon).default_value(0.001
f).help(
"Loop convergence threshold"))
1315 .
add(
make_option(
"minibatch", ld->minibatch).default_value(1).help(
"Minibatch size, for LDA"))
1316 .
add(
make_option(
"math-mode", math_mode).default_value(
USE_SIMD).help(
"Math mode: simd, accuracy, fast-approx"))
1317 .
add(
make_option(
"metrics", ld->compute_coherence_metrics).help(
"Compute metrics"));
1326 all.
lda = (uint32_t)ld->topics;
1328 ld->sorted_features = std::vector<index_feature>();
1329 ld->total_lambda_init =
false;
1332 if (ld->compute_coherence_metrics)
1338 float temp = ceilf(logf((
float)(all.
lda * 2 + 1)) / logf(2.
f));
1346 std::cerr <<
"your learning rate is too high, setting it to 1" << std::endl;
1347 all.
eta = std::min(all.
eta, 1.f);
1350 size_t minibatch2 =
next_pow2(ld->minibatch);
1355 all.
p =
new parser{minibatch2, previous_strict_parse};
1358 ld->v.resize(all.
lda * ld->minibatch);
1360 ld->decay_levels.push_back(0.
f);
v_array< example * > examples
v_array< int > doc_lengths
float powf< float, USE_SIMD >(float x, float p)
void resize(size_t length)
static float fastexp(float p)
bool operator<(const index_feature b) const
#define VERSION_FILE_WITH_HEADER_ID
void compute_coherence_metrics(lda &l, T &weights)
void print_audit_features(vw &all, example &ec)
void accumulate(vw &all, parameters &weights, size_t offset)
v_array< float > total_lambda
v_array< float > decay_levels
void learn_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)
void(* delete_prediction)(void *)
void initialize_regressor(vw &all, T &weights)
float powf(float x, float p)
uint64_t stride_shift(const stagewise_poly &poly, uint64_t idx)
void print_scalars(int f, v_array< float > &scalars, v_array< char > &tag)
void expdigammify< float, USE_SIMD >(vw &all, float *gamma, float threshold, float)
void expdigammify_2(vw &all, float *gamma, T *norm, const T threshold)
float digamma< float, USE_SIMD >(float x)
float lgamma< float, USE_PRECISE >(float x)
float powf< float, USE_PRECISE >(float x, float p)
v_array< int > final_prediction_sink
float fastdigamma(float x)
void expdigammify_2(vw &all, float *gamma, float *norm)
the core definition of a set of features.
void set_default(R &info)
float exponential< float, USE_PRECISE >(float x)
LEARNER::base_learner * lda_setup(options_i &options, vw &all)
base_learner * make_base(learner< T, E > &base)
void predict_with_metrics(lda &l, LEARNER::single_learner &base, example &ec)
float exponential< float, USE_FAST_APPROX >(float x)
float exponential< float, USE_SIMD >(float x)
virtual void add_and_parse(const option_group_definition &group)=0
void set_save_load(void(*sl)(T &, io_buf &, bool, bool))
void set_default(R &info)
float merand48(uint64_t &initial)
bool is_aligned16(void *ptr)
v_array< float > new_gamma
void return_example(vw &all, example &ec)
double sum_loss_since_last_dump
size_t next_pow2(size_t x)
float digamma< float, USE_PRECISE >(float x)
void set_finish_example(void(*f)(vw &all, T &, E &))
float powf< float, USE_FAST_APPROX >(float x, float p)
float lgamma< float, USE_FAST_APPROX >(float x)
float lgamma< float, USE_SIMD >(float x)
initial_weights(weight initial, weight initial_random, bool random, uint32_t lda, uint32_t stride)
v_array< float > Elogtheta
learner< T, E > & init_learner(free_ptr< T > &dat, L *base, void(*learn)(T &, L &, E &), void(*predict)(T &, L &, E &), size_t ws, prediction_type::prediction_type_t pred_type)
void push_back(const T &new_ele)
size_t random(std::shared_ptr< rand_state > &rs, size_t max)
float fastlgamma(float x)
VW::version_struct model_file_ver
float lda_loop(lda &l, v_array< float > &Elogtheta, float *v, example *ec, float)
void delete_scalars(void *v)
void print_update(bool holdout_set_off, size_t current_pass, float label, float prediction, size_t num_features, bool progress_add, float progress_arg)
virtual bool was_supplied(const std::string &key)=0
size_t absdiff(size_t a, size_t b)
void expdigammify_2< float, USE_SIMD >(vw &all, float *gamma, float *norm, const float threshold)
float theta_kl(lda &l, v_array< float > &Elogtheta, float *gamma)
std::vector< uint32_t > feature_counts
dense_parameters dense_weights
std::vector< std::vector< size_t > > feature_to_example_map
float fastpow(float x, float p)
weight & strided_index(size_t index)
bool compute_coherence_metrics
static void func(weight &w, initial_weights &iw, uint64_t index)
void learn(lda &l, LEARNER::single_learner &, example &ec)
void expdigammify(vw &all, T *gamma, T threshold, T initial)
void finish_example(vw &, example &)
void save_load(lda &l, io_buf &model_file, bool read, bool text)
v_array< float > digammas
void update(bool test_example, bool labeled_example, float loss, float weight, size_t num_features)
int add(svm_params ¶ms, svm_example *fec)
iterator over values and indicies
v_array< float > total_new
typed_option< T > make_option(std::string name, T &location)
void get_top_weights(vw *all, int top_words_count, int topic, std::vector< feature > &output, T &weights)
constexpr uint64_t UINT64_ONE
void set_end_pass(void(*f)(T &))
void expdigammify(vw &all, float *gamma)
sparse_parameters sparse_weights
std::istream & operator>>(std::istream &in, lda_math_mode &mmode)
void predict(lda &l, LEARNER::single_learner &base, example &ec)
static float average_diff(vw &all, float *oldgamma, float *newgamma)
int cmp(size_t a, size_t b)
std::vector< index_feature > sorted_features
static float fastpow2(float p)
v_array< float > old_gamma
static float find_cw(lda &l, float *u_for_w, float *v)
void end_examples(lda &l, T &weights)
double weighted_examples()
size_t bin_text_read_write_fixed(io_buf &io, char *data, size_t len, const char *read_message, bool read, std::stringstream &msg, bool text)
float digamma< float, USE_FAST_APPROX >(float x)
void set_end_examples(void(*f)(T &))
feature_pair(uint64_t _f1, uint64_t _f2)
label_parser no_label_parser